Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 57de31c0

History | View | Annotate | Download (494.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70
# States of instance
71
INSTANCE_UP = [constants.ADMINST_UP]
72
INSTANCE_DOWN = [constants.ADMINST_DOWN]
73
INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc_runner):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    # readability alias
135
    self.owned_locks = context.glm.list_owned
136
    self.context = context
137
    self.rpc = rpc_runner
138
    # Dicts used to declare locking needs to mcpu
139
    self.needed_locks = None
140
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141
    self.add_locks = {}
142
    self.remove_locks = {}
143
    # Used to force good behavior when calling helper functions
144
    self.recalculate_locks = {}
145
    # logging
146
    self.Log = processor.Log # pylint: disable=C0103
147
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
148
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
149
    self.LogStep = processor.LogStep # pylint: disable=C0103
150
    # support for dry-run
151
    self.dry_run_result = None
152
    # support for generic debug attribute
153
    if (not hasattr(self.op, "debug_level") or
154
        not isinstance(self.op.debug_level, int)):
155
      self.op.debug_level = 0
156

    
157
    # Tasklets
158
    self.tasklets = None
159

    
160
    # Validate opcode parameters and set defaults
161
    self.op.Validate(True)
162

    
163
    self.CheckArguments()
164

    
165
  def CheckArguments(self):
166
    """Check syntactic validity for the opcode arguments.
167

168
    This method is for doing a simple syntactic check and ensure
169
    validity of opcode parameters, without any cluster-related
170
    checks. While the same can be accomplished in ExpandNames and/or
171
    CheckPrereq, doing these separate is better because:
172

173
      - ExpandNames is left as as purely a lock-related function
174
      - CheckPrereq is run after we have acquired locks (and possible
175
        waited for them)
176

177
    The function is allowed to change the self.op attribute so that
178
    later methods can no longer worry about missing parameters.
179

180
    """
181
    pass
182

    
183
  def ExpandNames(self):
184
    """Expand names for this LU.
185

186
    This method is called before starting to execute the opcode, and it should
187
    update all the parameters of the opcode to their canonical form (e.g. a
188
    short node name must be fully expanded after this method has successfully
189
    completed). This way locking, hooks, logging, etc. can work correctly.
190

191
    LUs which implement this method must also populate the self.needed_locks
192
    member, as a dict with lock levels as keys, and a list of needed lock names
193
    as values. Rules:
194

195
      - use an empty dict if you don't need any lock
196
      - if you don't need any lock at a particular level omit that level
197
      - don't put anything for the BGL level
198
      - if you want all locks at a level use locking.ALL_SET as a value
199

200
    If you need to share locks (rather than acquire them exclusively) at one
201
    level you can modify self.share_locks, setting a true value (usually 1) for
202
    that level. By default locks are not shared.
203

204
    This function can also define a list of tasklets, which then will be
205
    executed in order instead of the usual LU-level CheckPrereq and Exec
206
    functions, if those are not defined by the LU.
207

208
    Examples::
209

210
      # Acquire all nodes and one instance
211
      self.needed_locks = {
212
        locking.LEVEL_NODE: locking.ALL_SET,
213
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
214
      }
215
      # Acquire just two nodes
216
      self.needed_locks = {
217
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218
      }
219
      # Acquire no locks
220
      self.needed_locks = {} # No, you can't leave it to the default value None
221

222
    """
223
    # The implementation of this method is mandatory only if the new LU is
224
    # concurrent, so that old LUs don't need to be changed all at the same
225
    # time.
226
    if self.REQ_BGL:
227
      self.needed_locks = {} # Exclusive LUs don't need locks.
228
    else:
229
      raise NotImplementedError
230

    
231
  def DeclareLocks(self, level):
232
    """Declare LU locking needs for a level
233

234
    While most LUs can just declare their locking needs at ExpandNames time,
235
    sometimes there's the need to calculate some locks after having acquired
236
    the ones before. This function is called just before acquiring locks at a
237
    particular level, but after acquiring the ones at lower levels, and permits
238
    such calculations. It can be used to modify self.needed_locks, and by
239
    default it does nothing.
240

241
    This function is only called if you have something already set in
242
    self.needed_locks for the level.
243

244
    @param level: Locking level which is going to be locked
245
    @type level: member of ganeti.locking.LEVELS
246

247
    """
248

    
249
  def CheckPrereq(self):
250
    """Check prerequisites for this LU.
251

252
    This method should check that the prerequisites for the execution
253
    of this LU are fulfilled. It can do internode communication, but
254
    it should be idempotent - no cluster or system changes are
255
    allowed.
256

257
    The method should raise errors.OpPrereqError in case something is
258
    not fulfilled. Its return value is ignored.
259

260
    This method should also update all the parameters of the opcode to
261
    their canonical form if it hasn't been done by ExpandNames before.
262

263
    """
264
    if self.tasklets is not None:
265
      for (idx, tl) in enumerate(self.tasklets):
266
        logging.debug("Checking prerequisites for tasklet %s/%s",
267
                      idx + 1, len(self.tasklets))
268
        tl.CheckPrereq()
269
    else:
270
      pass
271

    
272
  def Exec(self, feedback_fn):
273
    """Execute the LU.
274

275
    This method should implement the actual work. It should raise
276
    errors.OpExecError for failures that are somewhat dealt with in
277
    code, or expected.
278

279
    """
280
    if self.tasklets is not None:
281
      for (idx, tl) in enumerate(self.tasklets):
282
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283
        tl.Exec(feedback_fn)
284
    else:
285
      raise NotImplementedError
286

    
287
  def BuildHooksEnv(self):
288
    """Build hooks environment for this LU.
289

290
    @rtype: dict
291
    @return: Dictionary containing the environment that will be used for
292
      running the hooks for this LU. The keys of the dict must not be prefixed
293
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294
      will extend the environment with additional variables. If no environment
295
      should be defined, an empty dictionary should be returned (not C{None}).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def BuildHooksNodes(self):
303
    """Build list of nodes to run LU's hooks.
304

305
    @rtype: tuple; (list, list)
306
    @return: Tuple containing a list of node names on which the hook
307
      should run before the execution and a list of node names on which the
308
      hook should run after the execution. No nodes should be returned as an
309
      empty list (and not None).
310
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311
      will not be called.
312

313
    """
314
    raise NotImplementedError
315

    
316
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317
    """Notify the LU about the results of its hooks.
318

319
    This method is called every time a hooks phase is executed, and notifies
320
    the Logical Unit about the hooks' result. The LU can then use it to alter
321
    its result based on the hooks.  By default the method does nothing and the
322
    previous result is passed back unchanged but any LU can define it if it
323
    wants to use the local cluster hook-scripts somehow.
324

325
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
326
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327
    @param hook_results: the results of the multi-node hooks rpc call
328
    @param feedback_fn: function used send feedback back to the caller
329
    @param lu_result: the previous Exec result this LU had, or None
330
        in the PRE phase
331
    @return: the new Exec result, based on the previous result
332
        and hook results
333

334
    """
335
    # API must be kept, thus we ignore the unused argument and could
336
    # be a function warnings
337
    # pylint: disable=W0613,R0201
338
    return lu_result
339

    
340
  def _ExpandAndLockInstance(self):
341
    """Helper function to expand and lock an instance.
342

343
    Many LUs that work on an instance take its name in self.op.instance_name
344
    and need to expand it and then declare the expanded name for locking. This
345
    function does it, and then updates self.op.instance_name to the expanded
346
    name. It also initializes needed_locks as a dict, if this hasn't been done
347
    before.
348

349
    """
350
    if self.needed_locks is None:
351
      self.needed_locks = {}
352
    else:
353
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354
        "_ExpandAndLockInstance called with instance-level locks set"
355
    self.op.instance_name = _ExpandInstanceName(self.cfg,
356
                                                self.op.instance_name)
357
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358

    
359
  def _LockInstancesNodes(self, primary_only=False,
360
                          level=locking.LEVEL_NODE):
361
    """Helper function to declare instances' nodes for locking.
362

363
    This function should be called after locking one or more instances to lock
364
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365
    with all primary or secondary nodes for instances already locked and
366
    present in self.needed_locks[locking.LEVEL_INSTANCE].
367

368
    It should be called from DeclareLocks, and for safety only works if
369
    self.recalculate_locks[locking.LEVEL_NODE] is set.
370

371
    In the future it may grow parameters to just lock some instance's nodes, or
372
    to just lock primaries or secondary nodes, if needed.
373

374
    If should be called in DeclareLocks in a way similar to::
375

376
      if level == locking.LEVEL_NODE:
377
        self._LockInstancesNodes()
378

379
    @type primary_only: boolean
380
    @param primary_only: only lock primary nodes of locked instances
381
    @param level: Which lock level to use for locking nodes
382

383
    """
384
    assert level in self.recalculate_locks, \
385
      "_LockInstancesNodes helper function called with no nodes to recalculate"
386

    
387
    # TODO: check if we're really been called with the instance locks held
388

    
389
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390
    # future we might want to have different behaviors depending on the value
391
    # of self.recalculate_locks[locking.LEVEL_NODE]
392
    wanted_nodes = []
393
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395
      wanted_nodes.append(instance.primary_node)
396
      if not primary_only:
397
        wanted_nodes.extend(instance.secondary_nodes)
398

    
399
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400
      self.needed_locks[level] = wanted_nodes
401
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402
      self.needed_locks[level].extend(wanted_nodes)
403
    else:
404
      raise errors.ProgrammerError("Unknown recalculation mode")
405

    
406
    del self.recalculate_locks[level]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
426

    
427
  def BuildHooksNodes(self):
428
    """Empty BuildHooksNodes for NoHooksLU.
429

430
    """
431
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
432

    
433

    
434
class Tasklet:
435
  """Tasklet base class.
436

437
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
439
  tasklets know nothing about locks.
440

441
  Subclasses must follow these rules:
442
    - Implement CheckPrereq
443
    - Implement Exec
444

445
  """
446
  def __init__(self, lu):
447
    self.lu = lu
448

    
449
    # Shortcuts
450
    self.cfg = lu.cfg
451
    self.rpc = lu.rpc
452

    
453
  def CheckPrereq(self):
454
    """Check prerequisites for this tasklets.
455

456
    This method should check whether the prerequisites for the execution of
457
    this tasklet are fulfilled. It can do internode communication, but it
458
    should be idempotent - no cluster or system changes are allowed.
459

460
    The method should raise errors.OpPrereqError in case something is not
461
    fulfilled. Its return value is ignored.
462

463
    This method should also update all parameters to their canonical form if it
464
    hasn't been done before.
465

466
    """
467
    pass
468

    
469
  def Exec(self, feedback_fn):
470
    """Execute the tasklet.
471

472
    This method should implement the actual work. It should raise
473
    errors.OpExecError for failures that are somewhat dealt with in code, or
474
    expected.
475

476
    """
477
    raise NotImplementedError
478

    
479

    
480
class _QueryBase:
481
  """Base for query utility classes.
482

483
  """
484
  #: Attribute holding field definitions
485
  FIELDS = None
486

    
487
  def __init__(self, qfilter, fields, use_locking):
488
    """Initializes this class.
489

490
    """
491
    self.use_locking = use_locking
492

    
493
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
494
                             namefield="name")
495
    self.requested_data = self.query.RequestedData()
496
    self.names = self.query.RequestedNames()
497

    
498
    # Sort only if no names were requested
499
    self.sort_by_name = not self.names
500

    
501
    self.do_locking = None
502
    self.wanted = None
503

    
504
  def _GetNames(self, lu, all_names, lock_level):
505
    """Helper function to determine names asked for in the query.
506

507
    """
508
    if self.do_locking:
509
      names = lu.owned_locks(lock_level)
510
    else:
511
      names = all_names
512

    
513
    if self.wanted == locking.ALL_SET:
514
      assert not self.names
515
      # caller didn't specify names, so ordering is not important
516
      return utils.NiceSort(names)
517

    
518
    # caller specified names and we must keep the same order
519
    assert self.names
520
    assert not self.do_locking or lu.glm.is_owned(lock_level)
521

    
522
    missing = set(self.wanted).difference(names)
523
    if missing:
524
      raise errors.OpExecError("Some items were removed before retrieving"
525
                               " their data: %s" % missing)
526

    
527
    # Return expanded names
528
    return self.wanted
529

    
530
  def ExpandNames(self, lu):
531
    """Expand names for this query.
532

533
    See L{LogicalUnit.ExpandNames}.
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def DeclareLocks(self, lu, level):
539
    """Declare locks for this query.
540

541
    See L{LogicalUnit.DeclareLocks}.
542

543
    """
544
    raise NotImplementedError()
545

    
546
  def _GetQueryData(self, lu):
547
    """Collects all data for this query.
548

549
    @return: Query data object
550

551
    """
552
    raise NotImplementedError()
553

    
554
  def NewStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559
                                  sort_by_name=self.sort_by_name)
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu),
566
                                    sort_by_name=self.sort_by_name)
567

    
568

    
569
def _ShareAll():
570
  """Returns a dict declaring all lock levels shared.
571

572
  """
573
  return dict.fromkeys(locking.LEVELS, 1)
574

    
575

    
576
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
577
  """Checks if the owned node groups are still correct for an instance.
578

579
  @type cfg: L{config.ConfigWriter}
580
  @param cfg: The cluster configuration
581
  @type instance_name: string
582
  @param instance_name: Instance name
583
  @type owned_groups: set or frozenset
584
  @param owned_groups: List of currently owned node groups
585

586
  """
587
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
588

    
589
  if not owned_groups.issuperset(inst_groups):
590
    raise errors.OpPrereqError("Instance %s's node groups changed since"
591
                               " locks were acquired, current groups are"
592
                               " are '%s', owning groups '%s'; retry the"
593
                               " operation" %
594
                               (instance_name,
595
                                utils.CommaJoin(inst_groups),
596
                                utils.CommaJoin(owned_groups)),
597
                               errors.ECODE_STATE)
598

    
599
  return inst_groups
600

    
601

    
602
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
603
  """Checks if the instances in a node group are still correct.
604

605
  @type cfg: L{config.ConfigWriter}
606
  @param cfg: The cluster configuration
607
  @type group_uuid: string
608
  @param group_uuid: Node group UUID
609
  @type owned_instances: set or frozenset
610
  @param owned_instances: List of currently owned instances
611

612
  """
613
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
614
  if owned_instances != wanted_instances:
615
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
616
                               " locks were acquired, wanted '%s', have '%s';"
617
                               " retry the operation" %
618
                               (group_uuid,
619
                                utils.CommaJoin(wanted_instances),
620
                                utils.CommaJoin(owned_instances)),
621
                               errors.ECODE_STATE)
622

    
623
  return wanted_instances
624

    
625

    
626
def _SupportsOob(cfg, node):
627
  """Tells if node supports OOB.
628

629
  @type cfg: L{config.ConfigWriter}
630
  @param cfg: The cluster configuration
631
  @type node: L{objects.Node}
632
  @param node: The node
633
  @return: The OOB script if supported or an empty string otherwise
634

635
  """
636
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
637

    
638

    
639
def _GetWantedNodes(lu, nodes):
640
  """Returns list of checked and expanded node names.
641

642
  @type lu: L{LogicalUnit}
643
  @param lu: the logical unit on whose behalf we execute
644
  @type nodes: list
645
  @param nodes: list of node names or None for all nodes
646
  @rtype: list
647
  @return: the list of nodes, sorted
648
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
649

650
  """
651
  if nodes:
652
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
653

    
654
  return utils.NiceSort(lu.cfg.GetNodeList())
655

    
656

    
657
def _GetWantedInstances(lu, instances):
658
  """Returns list of checked and expanded instance names.
659

660
  @type lu: L{LogicalUnit}
661
  @param lu: the logical unit on whose behalf we execute
662
  @type instances: list
663
  @param instances: list of instance names or None for all instances
664
  @rtype: list
665
  @return: the list of instances, sorted
666
  @raise errors.OpPrereqError: if the instances parameter is wrong type
667
  @raise errors.OpPrereqError: if any of the passed instances is not found
668

669
  """
670
  if instances:
671
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
672
  else:
673
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
674
  return wanted
675

    
676

    
677
def _GetUpdatedParams(old_params, update_dict,
678
                      use_default=True, use_none=False):
679
  """Return the new version of a parameter dictionary.
680

681
  @type old_params: dict
682
  @param old_params: old parameters
683
  @type update_dict: dict
684
  @param update_dict: dict containing new parameter values, or
685
      constants.VALUE_DEFAULT to reset the parameter to its default
686
      value
687
  @param use_default: boolean
688
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
689
      values as 'to be deleted' values
690
  @param use_none: boolean
691
  @type use_none: whether to recognise C{None} values as 'to be
692
      deleted' values
693
  @rtype: dict
694
  @return: the new parameter dictionary
695

696
  """
697
  params_copy = copy.deepcopy(old_params)
698
  for key, val in update_dict.iteritems():
699
    if ((use_default and val == constants.VALUE_DEFAULT) or
700
        (use_none and val is None)):
701
      try:
702
        del params_copy[key]
703
      except KeyError:
704
        pass
705
    else:
706
      params_copy[key] = val
707
  return params_copy
708

    
709

    
710
def _ReleaseLocks(lu, level, names=None, keep=None):
711
  """Releases locks owned by an LU.
712

713
  @type lu: L{LogicalUnit}
714
  @param level: Lock level
715
  @type names: list or None
716
  @param names: Names of locks to release
717
  @type keep: list or None
718
  @param keep: Names of locks to retain
719

720
  """
721
  assert not (keep is not None and names is not None), \
722
         "Only one of the 'names' and the 'keep' parameters can be given"
723

    
724
  if names is not None:
725
    should_release = names.__contains__
726
  elif keep:
727
    should_release = lambda name: name not in keep
728
  else:
729
    should_release = None
730

    
731
  owned = lu.owned_locks(level)
732
  if not owned:
733
    # Not owning any lock at this level, do nothing
734
    pass
735

    
736
  elif should_release:
737
    retain = []
738
    release = []
739

    
740
    # Determine which locks to release
741
    for name in owned:
742
      if should_release(name):
743
        release.append(name)
744
      else:
745
        retain.append(name)
746

    
747
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
748

    
749
    # Release just some locks
750
    lu.glm.release(level, names=release)
751

    
752
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
753
  else:
754
    # Release everything
755
    lu.glm.release(level)
756

    
757
    assert not lu.glm.is_owned(level), "No locks should be owned"
758

    
759

    
760
def _MapInstanceDisksToNodes(instances):
761
  """Creates a map from (node, volume) to instance name.
762

763
  @type instances: list of L{objects.Instance}
764
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
765

766
  """
767
  return dict(((node, vol), inst.name)
768
              for inst in instances
769
              for (node, vols) in inst.MapLVsByNode().items()
770
              for vol in vols)
771

    
772

    
773
def _RunPostHook(lu, node_name):
774
  """Runs the post-hook for an opcode on a single node.
775

776
  """
777
  hm = lu.proc.BuildHooksManager(lu)
778
  try:
779
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
780
  except:
781
    # pylint: disable=W0702
782
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
783

    
784

    
785
def _CheckOutputFields(static, dynamic, selected):
786
  """Checks whether all selected fields are valid.
787

788
  @type static: L{utils.FieldSet}
789
  @param static: static fields set
790
  @type dynamic: L{utils.FieldSet}
791
  @param dynamic: dynamic fields set
792

793
  """
794
  f = utils.FieldSet()
795
  f.Extend(static)
796
  f.Extend(dynamic)
797

    
798
  delta = f.NonMatching(selected)
799
  if delta:
800
    raise errors.OpPrereqError("Unknown output fields selected: %s"
801
                               % ",".join(delta), errors.ECODE_INVAL)
802

    
803

    
804
def _CheckGlobalHvParams(params):
805
  """Validates that given hypervisor params are not global ones.
806

807
  This will ensure that instances don't get customised versions of
808
  global params.
809

810
  """
811
  used_globals = constants.HVC_GLOBALS.intersection(params)
812
  if used_globals:
813
    msg = ("The following hypervisor parameters are global and cannot"
814
           " be customized at instance level, please modify them at"
815
           " cluster level: %s" % utils.CommaJoin(used_globals))
816
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
817

    
818

    
819
def _CheckNodeOnline(lu, node, msg=None):
820
  """Ensure that a given node is online.
821

822
  @param lu: the LU on behalf of which we make the check
823
  @param node: the node to check
824
  @param msg: if passed, should be a message to replace the default one
825
  @raise errors.OpPrereqError: if the node is offline
826

827
  """
828
  if msg is None:
829
    msg = "Can't use offline node"
830
  if lu.cfg.GetNodeInfo(node).offline:
831
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
832

    
833

    
834
def _CheckNodeNotDrained(lu, node):
835
  """Ensure that a given node is not drained.
836

837
  @param lu: the LU on behalf of which we make the check
838
  @param node: the node to check
839
  @raise errors.OpPrereqError: if the node is drained
840

841
  """
842
  if lu.cfg.GetNodeInfo(node).drained:
843
    raise errors.OpPrereqError("Can't use drained node %s" % node,
844
                               errors.ECODE_STATE)
845

    
846

    
847
def _CheckNodeVmCapable(lu, node):
848
  """Ensure that a given node is vm capable.
849

850
  @param lu: the LU on behalf of which we make the check
851
  @param node: the node to check
852
  @raise errors.OpPrereqError: if the node is not vm capable
853

854
  """
855
  if not lu.cfg.GetNodeInfo(node).vm_capable:
856
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
857
                               errors.ECODE_STATE)
858

    
859

    
860
def _CheckNodeHasOS(lu, node, os_name, force_variant):
861
  """Ensure that a node supports a given OS.
862

863
  @param lu: the LU on behalf of which we make the check
864
  @param node: the node to check
865
  @param os_name: the OS to query about
866
  @param force_variant: whether to ignore variant errors
867
  @raise errors.OpPrereqError: if the node is not supporting the OS
868

869
  """
870
  result = lu.rpc.call_os_get(node, os_name)
871
  result.Raise("OS '%s' not in supported OS list for node %s" %
872
               (os_name, node),
873
               prereq=True, ecode=errors.ECODE_INVAL)
874
  if not force_variant:
875
    _CheckOSVariant(result.payload, os_name)
876

    
877

    
878
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
879
  """Ensure that a node has the given secondary ip.
880

881
  @type lu: L{LogicalUnit}
882
  @param lu: the LU on behalf of which we make the check
883
  @type node: string
884
  @param node: the node to check
885
  @type secondary_ip: string
886
  @param secondary_ip: the ip to check
887
  @type prereq: boolean
888
  @param prereq: whether to throw a prerequisite or an execute error
889
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
890
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
891

892
  """
893
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
894
  result.Raise("Failure checking secondary ip on node %s" % node,
895
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
896
  if not result.payload:
897
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
898
           " please fix and re-run this command" % secondary_ip)
899
    if prereq:
900
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
901
    else:
902
      raise errors.OpExecError(msg)
903

    
904

    
905
def _GetClusterDomainSecret():
906
  """Reads the cluster domain secret.
907

908
  """
909
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
910
                               strict=True)
911

    
912

    
913
def _CheckInstanceState(lu, instance, req_states, msg=None):
914
  """Ensure that an instance is in one of the required states.
915

916
  @param lu: the LU on behalf of which we make the check
917
  @param instance: the instance to check
918
  @param msg: if passed, should be a message to replace the default one
919
  @raise errors.OpPrereqError: if the instance is not in the required state
920

921
  """
922
  if msg is None:
923
    msg = "can't use instance from outside %s states" % ", ".join(req_states)
924
  if instance.admin_state not in req_states:
925
    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
926
                               (instance, instance.admin_state, msg),
927
                               errors.ECODE_STATE)
928

    
929
  if constants.ADMINST_UP not in req_states:
930
    pnode = instance.primary_node
931
    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
932
    ins_l.Raise("Can't contact node %s for instance information" % pnode,
933
                prereq=True, ecode=errors.ECODE_ENVIRON)
934

    
935
    if instance.name in ins_l.payload:
936
      raise errors.OpPrereqError("Instance %s is running, %s" %
937
                                 (instance.name, msg), errors.ECODE_STATE)
938

    
939

    
940
def _ExpandItemName(fn, name, kind):
941
  """Expand an item name.
942

943
  @param fn: the function to use for expansion
944
  @param name: requested item name
945
  @param kind: text description ('Node' or 'Instance')
946
  @return: the resolved (full) name
947
  @raise errors.OpPrereqError: if the item is not found
948

949
  """
950
  full_name = fn(name)
951
  if full_name is None:
952
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
953
                               errors.ECODE_NOENT)
954
  return full_name
955

    
956

    
957
def _ExpandNodeName(cfg, name):
958
  """Wrapper over L{_ExpandItemName} for nodes."""
959
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
960

    
961

    
962
def _ExpandInstanceName(cfg, name):
963
  """Wrapper over L{_ExpandItemName} for instance."""
964
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
965

    
966

    
967
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
968
                          memory, vcpus, nics, disk_template, disks,
969
                          bep, hvp, hypervisor_name, tags):
970
  """Builds instance related env variables for hooks
971

972
  This builds the hook environment from individual variables.
973

974
  @type name: string
975
  @param name: the name of the instance
976
  @type primary_node: string
977
  @param primary_node: the name of the instance's primary node
978
  @type secondary_nodes: list
979
  @param secondary_nodes: list of secondary nodes as strings
980
  @type os_type: string
981
  @param os_type: the name of the instance's OS
982
  @type status: string
983
  @param status: the desired status of the instance
984
  @type memory: string
985
  @param memory: the memory size of the instance
986
  @type vcpus: string
987
  @param vcpus: the count of VCPUs the instance has
988
  @type nics: list
989
  @param nics: list of tuples (ip, mac, mode, link) representing
990
      the NICs the instance has
991
  @type disk_template: string
992
  @param disk_template: the disk template of the instance
993
  @type disks: list
994
  @param disks: the list of (size, mode) pairs
995
  @type bep: dict
996
  @param bep: the backend parameters for the instance
997
  @type hvp: dict
998
  @param hvp: the hypervisor parameters for the instance
999
  @type hypervisor_name: string
1000
  @param hypervisor_name: the hypervisor for the instance
1001
  @type tags: list
1002
  @param tags: list of instance tags as strings
1003
  @rtype: dict
1004
  @return: the hook environment for this instance
1005

1006
  """
1007
  env = {
1008
    "OP_TARGET": name,
1009
    "INSTANCE_NAME": name,
1010
    "INSTANCE_PRIMARY": primary_node,
1011
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1012
    "INSTANCE_OS_TYPE": os_type,
1013
    "INSTANCE_STATUS": status,
1014
    "INSTANCE_MEMORY": memory,
1015
    "INSTANCE_VCPUS": vcpus,
1016
    "INSTANCE_DISK_TEMPLATE": disk_template,
1017
    "INSTANCE_HYPERVISOR": hypervisor_name,
1018
  }
1019

    
1020
  if nics:
1021
    nic_count = len(nics)
1022
    for idx, (ip, mac, mode, link) in enumerate(nics):
1023
      if ip is None:
1024
        ip = ""
1025
      env["INSTANCE_NIC%d_IP" % idx] = ip
1026
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1027
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1028
      env["INSTANCE_NIC%d_LINK" % idx] = link
1029
      if mode == constants.NIC_MODE_BRIDGED:
1030
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1031
  else:
1032
    nic_count = 0
1033

    
1034
  env["INSTANCE_NIC_COUNT"] = nic_count
1035

    
1036
  if disks:
1037
    disk_count = len(disks)
1038
    for idx, (size, mode) in enumerate(disks):
1039
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1040
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1041
  else:
1042
    disk_count = 0
1043

    
1044
  env["INSTANCE_DISK_COUNT"] = disk_count
1045

    
1046
  if not tags:
1047
    tags = []
1048

    
1049
  env["INSTANCE_TAGS"] = " ".join(tags)
1050

    
1051
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1052
    for key, value in source.items():
1053
      env["INSTANCE_%s_%s" % (kind, key)] = value
1054

    
1055
  return env
1056

    
1057

    
1058
def _NICListToTuple(lu, nics):
1059
  """Build a list of nic information tuples.
1060

1061
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1062
  value in LUInstanceQueryData.
1063

1064
  @type lu:  L{LogicalUnit}
1065
  @param lu: the logical unit on whose behalf we execute
1066
  @type nics: list of L{objects.NIC}
1067
  @param nics: list of nics to convert to hooks tuples
1068

1069
  """
1070
  hooks_nics = []
1071
  cluster = lu.cfg.GetClusterInfo()
1072
  for nic in nics:
1073
    ip = nic.ip
1074
    mac = nic.mac
1075
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1076
    mode = filled_params[constants.NIC_MODE]
1077
    link = filled_params[constants.NIC_LINK]
1078
    hooks_nics.append((ip, mac, mode, link))
1079
  return hooks_nics
1080

    
1081

    
1082
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1083
  """Builds instance related env variables for hooks from an object.
1084

1085
  @type lu: L{LogicalUnit}
1086
  @param lu: the logical unit on whose behalf we execute
1087
  @type instance: L{objects.Instance}
1088
  @param instance: the instance for which we should build the
1089
      environment
1090
  @type override: dict
1091
  @param override: dictionary with key/values that will override
1092
      our values
1093
  @rtype: dict
1094
  @return: the hook environment dictionary
1095

1096
  """
1097
  cluster = lu.cfg.GetClusterInfo()
1098
  bep = cluster.FillBE(instance)
1099
  hvp = cluster.FillHV(instance)
1100
  args = {
1101
    "name": instance.name,
1102
    "primary_node": instance.primary_node,
1103
    "secondary_nodes": instance.secondary_nodes,
1104
    "os_type": instance.os,
1105
    "status": instance.admin_state,
1106
    "memory": bep[constants.BE_MEMORY],
1107
    "vcpus": bep[constants.BE_VCPUS],
1108
    "nics": _NICListToTuple(lu, instance.nics),
1109
    "disk_template": instance.disk_template,
1110
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1111
    "bep": bep,
1112
    "hvp": hvp,
1113
    "hypervisor_name": instance.hypervisor,
1114
    "tags": instance.tags,
1115
  }
1116
  if override:
1117
    args.update(override)
1118
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1119

    
1120

    
1121
def _AdjustCandidatePool(lu, exceptions):
1122
  """Adjust the candidate pool after node operations.
1123

1124
  """
1125
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1126
  if mod_list:
1127
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1128
               utils.CommaJoin(node.name for node in mod_list))
1129
    for name in mod_list:
1130
      lu.context.ReaddNode(name)
1131
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1132
  if mc_now > mc_max:
1133
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1134
               (mc_now, mc_max))
1135

    
1136

    
1137
def _DecideSelfPromotion(lu, exceptions=None):
1138
  """Decide whether I should promote myself as a master candidate.
1139

1140
  """
1141
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1142
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1143
  # the new node will increase mc_max with one, so:
1144
  mc_should = min(mc_should + 1, cp_size)
1145
  return mc_now < mc_should
1146

    
1147

    
1148
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1149
  """Check that the brigdes needed by a list of nics exist.
1150

1151
  """
1152
  cluster = lu.cfg.GetClusterInfo()
1153
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1154
  brlist = [params[constants.NIC_LINK] for params in paramslist
1155
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1156
  if brlist:
1157
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1158
    result.Raise("Error checking bridges on destination node '%s'" %
1159
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1160

    
1161

    
1162
def _CheckInstanceBridgesExist(lu, instance, node=None):
1163
  """Check that the brigdes needed by an instance exist.
1164

1165
  """
1166
  if node is None:
1167
    node = instance.primary_node
1168
  _CheckNicsBridgesExist(lu, instance.nics, node)
1169

    
1170

    
1171
def _CheckOSVariant(os_obj, name):
1172
  """Check whether an OS name conforms to the os variants specification.
1173

1174
  @type os_obj: L{objects.OS}
1175
  @param os_obj: OS object to check
1176
  @type name: string
1177
  @param name: OS name passed by the user, to check for validity
1178

1179
  """
1180
  variant = objects.OS.GetVariant(name)
1181
  if not os_obj.supported_variants:
1182
    if variant:
1183
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1184
                                 " passed)" % (os_obj.name, variant),
1185
                                 errors.ECODE_INVAL)
1186
    return
1187
  if not variant:
1188
    raise errors.OpPrereqError("OS name must include a variant",
1189
                               errors.ECODE_INVAL)
1190

    
1191
  if variant not in os_obj.supported_variants:
1192
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1193

    
1194

    
1195
def _GetNodeInstancesInner(cfg, fn):
1196
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1197

    
1198

    
1199
def _GetNodeInstances(cfg, node_name):
1200
  """Returns a list of all primary and secondary instances on a node.
1201

1202
  """
1203

    
1204
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1205

    
1206

    
1207
def _GetNodePrimaryInstances(cfg, node_name):
1208
  """Returns primary instances on a node.
1209

1210
  """
1211
  return _GetNodeInstancesInner(cfg,
1212
                                lambda inst: node_name == inst.primary_node)
1213

    
1214

    
1215
def _GetNodeSecondaryInstances(cfg, node_name):
1216
  """Returns secondary instances on a node.
1217

1218
  """
1219
  return _GetNodeInstancesInner(cfg,
1220
                                lambda inst: node_name in inst.secondary_nodes)
1221

    
1222

    
1223
def _GetStorageTypeArgs(cfg, storage_type):
1224
  """Returns the arguments for a storage type.
1225

1226
  """
1227
  # Special case for file storage
1228
  if storage_type == constants.ST_FILE:
1229
    # storage.FileStorage wants a list of storage directories
1230
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1231

    
1232
  return []
1233

    
1234

    
1235
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1236
  faulty = []
1237

    
1238
  for dev in instance.disks:
1239
    cfg.SetDiskID(dev, node_name)
1240

    
1241
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1242
  result.Raise("Failed to get disk status from node %s" % node_name,
1243
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1244

    
1245
  for idx, bdev_status in enumerate(result.payload):
1246
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1247
      faulty.append(idx)
1248

    
1249
  return faulty
1250

    
1251

    
1252
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1253
  """Check the sanity of iallocator and node arguments and use the
1254
  cluster-wide iallocator if appropriate.
1255

1256
  Check that at most one of (iallocator, node) is specified. If none is
1257
  specified, then the LU's opcode's iallocator slot is filled with the
1258
  cluster-wide default iallocator.
1259

1260
  @type iallocator_slot: string
1261
  @param iallocator_slot: the name of the opcode iallocator slot
1262
  @type node_slot: string
1263
  @param node_slot: the name of the opcode target node slot
1264

1265
  """
1266
  node = getattr(lu.op, node_slot, None)
1267
  iallocator = getattr(lu.op, iallocator_slot, None)
1268

    
1269
  if node is not None and iallocator is not None:
1270
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1271
                               errors.ECODE_INVAL)
1272
  elif node is None and iallocator is None:
1273
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1274
    if default_iallocator:
1275
      setattr(lu.op, iallocator_slot, default_iallocator)
1276
    else:
1277
      raise errors.OpPrereqError("No iallocator or node given and no"
1278
                                 " cluster-wide default iallocator found;"
1279
                                 " please specify either an iallocator or a"
1280
                                 " node, or set a cluster-wide default"
1281
                                 " iallocator")
1282

    
1283

    
1284
def _GetDefaultIAllocator(cfg, iallocator):
1285
  """Decides on which iallocator to use.
1286

1287
  @type cfg: L{config.ConfigWriter}
1288
  @param cfg: Cluster configuration object
1289
  @type iallocator: string or None
1290
  @param iallocator: Iallocator specified in opcode
1291
  @rtype: string
1292
  @return: Iallocator name
1293

1294
  """
1295
  if not iallocator:
1296
    # Use default iallocator
1297
    iallocator = cfg.GetDefaultIAllocator()
1298

    
1299
  if not iallocator:
1300
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1301
                               " opcode nor as a cluster-wide default",
1302
                               errors.ECODE_INVAL)
1303

    
1304
  return iallocator
1305

    
1306

    
1307
class LUClusterPostInit(LogicalUnit):
1308
  """Logical unit for running hooks after cluster initialization.
1309

1310
  """
1311
  HPATH = "cluster-init"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [self.cfg.GetMasterNode()])
1327

    
1328
  def Exec(self, feedback_fn):
1329
    """Nothing to do.
1330

1331
    """
1332
    return True
1333

    
1334

    
1335
class LUClusterDestroy(LogicalUnit):
1336
  """Logical unit for destroying the cluster.
1337

1338
  """
1339
  HPATH = "cluster-destroy"
1340
  HTYPE = constants.HTYPE_CLUSTER
1341

    
1342
  def BuildHooksEnv(self):
1343
    """Build hooks env.
1344

1345
    """
1346
    return {
1347
      "OP_TARGET": self.cfg.GetClusterName(),
1348
      }
1349

    
1350
  def BuildHooksNodes(self):
1351
    """Build hooks nodes.
1352

1353
    """
1354
    return ([], [])
1355

    
1356
  def CheckPrereq(self):
1357
    """Check prerequisites.
1358

1359
    This checks whether the cluster is empty.
1360

1361
    Any errors are signaled by raising errors.OpPrereqError.
1362

1363
    """
1364
    master = self.cfg.GetMasterNode()
1365

    
1366
    nodelist = self.cfg.GetNodeList()
1367
    if len(nodelist) != 1 or nodelist[0] != master:
1368
      raise errors.OpPrereqError("There are still %d node(s) in"
1369
                                 " this cluster." % (len(nodelist) - 1),
1370
                                 errors.ECODE_INVAL)
1371
    instancelist = self.cfg.GetInstanceList()
1372
    if instancelist:
1373
      raise errors.OpPrereqError("There are still %d instance(s) in"
1374
                                 " this cluster." % len(instancelist),
1375
                                 errors.ECODE_INVAL)
1376

    
1377
  def Exec(self, feedback_fn):
1378
    """Destroys the cluster.
1379

1380
    """
1381
    master_params = self.cfg.GetMasterNetworkParameters()
1382

    
1383
    # Run post hooks on master node before it's removed
1384
    _RunPostHook(self, master_params.name)
1385

    
1386
    ems = self.cfg.GetUseExternalMipScript()
1387
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1388
                                                     master_params, ems)
1389
    result.Raise("Could not disable the master role")
1390

    
1391
    return master_params.name
1392

    
1393

    
1394
def _VerifyCertificate(filename):
1395
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1396

1397
  @type filename: string
1398
  @param filename: Path to PEM file
1399

1400
  """
1401
  try:
1402
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1403
                                           utils.ReadFile(filename))
1404
  except Exception, err: # pylint: disable=W0703
1405
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1406
            "Failed to load X509 certificate %s: %s" % (filename, err))
1407

    
1408
  (errcode, msg) = \
1409
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1410
                                constants.SSL_CERT_EXPIRATION_ERROR)
1411

    
1412
  if msg:
1413
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1414
  else:
1415
    fnamemsg = None
1416

    
1417
  if errcode is None:
1418
    return (None, fnamemsg)
1419
  elif errcode == utils.CERT_WARNING:
1420
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1421
  elif errcode == utils.CERT_ERROR:
1422
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1423

    
1424
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1425

    
1426

    
1427
def _GetAllHypervisorParameters(cluster, instances):
1428
  """Compute the set of all hypervisor parameters.
1429

1430
  @type cluster: L{objects.Cluster}
1431
  @param cluster: the cluster object
1432
  @param instances: list of L{objects.Instance}
1433
  @param instances: additional instances from which to obtain parameters
1434
  @rtype: list of (origin, hypervisor, parameters)
1435
  @return: a list with all parameters found, indicating the hypervisor they
1436
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1437

1438
  """
1439
  hvp_data = []
1440

    
1441
  for hv_name in cluster.enabled_hypervisors:
1442
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1443

    
1444
  for os_name, os_hvp in cluster.os_hvp.items():
1445
    for hv_name, hv_params in os_hvp.items():
1446
      if hv_params:
1447
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1448
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1449

    
1450
  # TODO: collapse identical parameter values in a single one
1451
  for instance in instances:
1452
    if instance.hvparams:
1453
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1454
                       cluster.FillHV(instance)))
1455

    
1456
  return hvp_data
1457

    
1458

    
1459
class _VerifyErrors(object):
1460
  """Mix-in for cluster/group verify LUs.
1461

1462
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1463
  self.op and self._feedback_fn to be available.)
1464

1465
  """
1466

    
1467
  ETYPE_FIELD = "code"
1468
  ETYPE_ERROR = "ERROR"
1469
  ETYPE_WARNING = "WARNING"
1470

    
1471
  def _Error(self, ecode, item, msg, *args, **kwargs):
1472
    """Format an error message.
1473

1474
    Based on the opcode's error_codes parameter, either format a
1475
    parseable error code, or a simpler error string.
1476

1477
    This must be called only from Exec and functions called from Exec.
1478

1479
    """
1480
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1481
    itype, etxt, _ = ecode
1482
    # first complete the msg
1483
    if args:
1484
      msg = msg % args
1485
    # then format the whole message
1486
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1487
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1488
    else:
1489
      if item:
1490
        item = " " + item
1491
      else:
1492
        item = ""
1493
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1494
    # and finally report it via the feedback_fn
1495
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1496

    
1497
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1498
    """Log an error message if the passed condition is True.
1499

1500
    """
1501
    cond = (bool(cond)
1502
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1503

    
1504
    # If the error code is in the list of ignored errors, demote the error to a
1505
    # warning
1506
    (_, etxt, _) = ecode
1507
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1508
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1509

    
1510
    if cond:
1511
      self._Error(ecode, *args, **kwargs)
1512

    
1513
    # do not mark the operation as failed for WARN cases only
1514
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1515
      self.bad = self.bad or cond
1516

    
1517

    
1518
class LUClusterVerify(NoHooksLU):
1519
  """Submits all jobs necessary to verify the cluster.
1520

1521
  """
1522
  REQ_BGL = False
1523

    
1524
  def ExpandNames(self):
1525
    self.needed_locks = {}
1526

    
1527
  def Exec(self, feedback_fn):
1528
    jobs = []
1529

    
1530
    if self.op.group_name:
1531
      groups = [self.op.group_name]
1532
      depends_fn = lambda: None
1533
    else:
1534
      groups = self.cfg.GetNodeGroupList()
1535

    
1536
      # Verify global configuration
1537
      jobs.append([
1538
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1539
        ])
1540

    
1541
      # Always depend on global verification
1542
      depends_fn = lambda: [(-len(jobs), [])]
1543

    
1544
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1545
                                            ignore_errors=self.op.ignore_errors,
1546
                                            depends=depends_fn())]
1547
                for group in groups)
1548

    
1549
    # Fix up all parameters
1550
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1551
      op.debug_simulate_errors = self.op.debug_simulate_errors
1552
      op.verbose = self.op.verbose
1553
      op.error_codes = self.op.error_codes
1554
      try:
1555
        op.skip_checks = self.op.skip_checks
1556
      except AttributeError:
1557
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1558

    
1559
    return ResultWithJobs(jobs)
1560

    
1561

    
1562
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1563
  """Verifies the cluster config.
1564

1565
  """
1566
  REQ_BGL = True
1567

    
1568
  def _VerifyHVP(self, hvp_data):
1569
    """Verifies locally the syntax of the hypervisor parameters.
1570

1571
    """
1572
    for item, hv_name, hv_params in hvp_data:
1573
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1574
             (item, hv_name))
1575
      try:
1576
        hv_class = hypervisor.GetHypervisor(hv_name)
1577
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1578
        hv_class.CheckParameterSyntax(hv_params)
1579
      except errors.GenericError, err:
1580
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1581

    
1582
  def ExpandNames(self):
1583
    # Information can be safely retrieved as the BGL is acquired in exclusive
1584
    # mode
1585
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1586
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1587
    self.all_node_info = self.cfg.GetAllNodesInfo()
1588
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1589
    self.needed_locks = {}
1590

    
1591
  def Exec(self, feedback_fn):
1592
    """Verify integrity of cluster, performing various test on nodes.
1593

1594
    """
1595
    self.bad = False
1596
    self._feedback_fn = feedback_fn
1597

    
1598
    feedback_fn("* Verifying cluster config")
1599

    
1600
    for msg in self.cfg.VerifyConfig():
1601
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1602

    
1603
    feedback_fn("* Verifying cluster certificate files")
1604

    
1605
    for cert_filename in constants.ALL_CERT_FILES:
1606
      (errcode, msg) = _VerifyCertificate(cert_filename)
1607
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1608

    
1609
    feedback_fn("* Verifying hypervisor parameters")
1610

    
1611
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1612
                                                self.all_inst_info.values()))
1613

    
1614
    feedback_fn("* Verifying all nodes belong to an existing group")
1615

    
1616
    # We do this verification here because, should this bogus circumstance
1617
    # occur, it would never be caught by VerifyGroup, which only acts on
1618
    # nodes/instances reachable from existing node groups.
1619

    
1620
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1621
                         if node.group not in self.all_group_info)
1622

    
1623
    dangling_instances = {}
1624
    no_node_instances = []
1625

    
1626
    for inst in self.all_inst_info.values():
1627
      if inst.primary_node in dangling_nodes:
1628
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1629
      elif inst.primary_node not in self.all_node_info:
1630
        no_node_instances.append(inst.name)
1631

    
1632
    pretty_dangling = [
1633
        "%s (%s)" %
1634
        (node.name,
1635
         utils.CommaJoin(dangling_instances.get(node.name,
1636
                                                ["no instances"])))
1637
        for node in dangling_nodes]
1638

    
1639
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1640
                  None,
1641
                  "the following nodes (and their instances) belong to a non"
1642
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1643

    
1644
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1645
                  None,
1646
                  "the following instances have a non-existing primary-node:"
1647
                  " %s", utils.CommaJoin(no_node_instances))
1648

    
1649
    return not self.bad
1650

    
1651

    
1652
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1653
  """Verifies the status of a node group.
1654

1655
  """
1656
  HPATH = "cluster-verify"
1657
  HTYPE = constants.HTYPE_CLUSTER
1658
  REQ_BGL = False
1659

    
1660
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1661

    
1662
  class NodeImage(object):
1663
    """A class representing the logical and physical status of a node.
1664

1665
    @type name: string
1666
    @ivar name: the node name to which this object refers
1667
    @ivar volumes: a structure as returned from
1668
        L{ganeti.backend.GetVolumeList} (runtime)
1669
    @ivar instances: a list of running instances (runtime)
1670
    @ivar pinst: list of configured primary instances (config)
1671
    @ivar sinst: list of configured secondary instances (config)
1672
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1673
        instances for which this node is secondary (config)
1674
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1675
    @ivar dfree: free disk, as reported by the node (runtime)
1676
    @ivar offline: the offline status (config)
1677
    @type rpc_fail: boolean
1678
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1679
        not whether the individual keys were correct) (runtime)
1680
    @type lvm_fail: boolean
1681
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1682
    @type hyp_fail: boolean
1683
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1684
    @type ghost: boolean
1685
    @ivar ghost: whether this is a known node or not (config)
1686
    @type os_fail: boolean
1687
    @ivar os_fail: whether the RPC call didn't return valid OS data
1688
    @type oslist: list
1689
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1690
    @type vm_capable: boolean
1691
    @ivar vm_capable: whether the node can host instances
1692

1693
    """
1694
    def __init__(self, offline=False, name=None, vm_capable=True):
1695
      self.name = name
1696
      self.volumes = {}
1697
      self.instances = []
1698
      self.pinst = []
1699
      self.sinst = []
1700
      self.sbp = {}
1701
      self.mfree = 0
1702
      self.dfree = 0
1703
      self.offline = offline
1704
      self.vm_capable = vm_capable
1705
      self.rpc_fail = False
1706
      self.lvm_fail = False
1707
      self.hyp_fail = False
1708
      self.ghost = False
1709
      self.os_fail = False
1710
      self.oslist = {}
1711

    
1712
  def ExpandNames(self):
1713
    # This raises errors.OpPrereqError on its own:
1714
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1715

    
1716
    # Get instances in node group; this is unsafe and needs verification later
1717
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1718

    
1719
    self.needed_locks = {
1720
      locking.LEVEL_INSTANCE: inst_names,
1721
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1722
      locking.LEVEL_NODE: [],
1723
      }
1724

    
1725
    self.share_locks = _ShareAll()
1726

    
1727
  def DeclareLocks(self, level):
1728
    if level == locking.LEVEL_NODE:
1729
      # Get members of node group; this is unsafe and needs verification later
1730
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1731

    
1732
      all_inst_info = self.cfg.GetAllInstancesInfo()
1733

    
1734
      # In Exec(), we warn about mirrored instances that have primary and
1735
      # secondary living in separate node groups. To fully verify that
1736
      # volumes for these instances are healthy, we will need to do an
1737
      # extra call to their secondaries. We ensure here those nodes will
1738
      # be locked.
1739
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1740
        # Important: access only the instances whose lock is owned
1741
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1742
          nodes.update(all_inst_info[inst].secondary_nodes)
1743

    
1744
      self.needed_locks[locking.LEVEL_NODE] = nodes
1745

    
1746
  def CheckPrereq(self):
1747
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1748
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1749

    
1750
    group_nodes = set(self.group_info.members)
1751
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1752

    
1753
    unlocked_nodes = \
1754
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1755

    
1756
    unlocked_instances = \
1757
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1758

    
1759
    if unlocked_nodes:
1760
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1761
                                 utils.CommaJoin(unlocked_nodes))
1762

    
1763
    if unlocked_instances:
1764
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1765
                                 utils.CommaJoin(unlocked_instances))
1766

    
1767
    self.all_node_info = self.cfg.GetAllNodesInfo()
1768
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1769

    
1770
    self.my_node_names = utils.NiceSort(group_nodes)
1771
    self.my_inst_names = utils.NiceSort(group_instances)
1772

    
1773
    self.my_node_info = dict((name, self.all_node_info[name])
1774
                             for name in self.my_node_names)
1775

    
1776
    self.my_inst_info = dict((name, self.all_inst_info[name])
1777
                             for name in self.my_inst_names)
1778

    
1779
    # We detect here the nodes that will need the extra RPC calls for verifying
1780
    # split LV volumes; they should be locked.
1781
    extra_lv_nodes = set()
1782

    
1783
    for inst in self.my_inst_info.values():
1784
      if inst.disk_template in constants.DTS_INT_MIRROR:
1785
        group = self.my_node_info[inst.primary_node].group
1786
        for nname in inst.secondary_nodes:
1787
          if self.all_node_info[nname].group != group:
1788
            extra_lv_nodes.add(nname)
1789

    
1790
    unlocked_lv_nodes = \
1791
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1792

    
1793
    if unlocked_lv_nodes:
1794
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1795
                                 utils.CommaJoin(unlocked_lv_nodes))
1796
    self.extra_lv_nodes = list(extra_lv_nodes)
1797

    
1798
  def _VerifyNode(self, ninfo, nresult):
1799
    """Perform some basic validation on data returned from a node.
1800

1801
      - check the result data structure is well formed and has all the
1802
        mandatory fields
1803
      - check ganeti version
1804

1805
    @type ninfo: L{objects.Node}
1806
    @param ninfo: the node to check
1807
    @param nresult: the results from the node
1808
    @rtype: boolean
1809
    @return: whether overall this call was successful (and we can expect
1810
         reasonable values in the respose)
1811

1812
    """
1813
    node = ninfo.name
1814
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1815

    
1816
    # main result, nresult should be a non-empty dict
1817
    test = not nresult or not isinstance(nresult, dict)
1818
    _ErrorIf(test, constants.CV_ENODERPC, node,
1819
                  "unable to verify node: no data returned")
1820
    if test:
1821
      return False
1822

    
1823
    # compares ganeti version
1824
    local_version = constants.PROTOCOL_VERSION
1825
    remote_version = nresult.get("version", None)
1826
    test = not (remote_version and
1827
                isinstance(remote_version, (list, tuple)) and
1828
                len(remote_version) == 2)
1829
    _ErrorIf(test, constants.CV_ENODERPC, node,
1830
             "connection to node returned invalid data")
1831
    if test:
1832
      return False
1833

    
1834
    test = local_version != remote_version[0]
1835
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1836
             "incompatible protocol versions: master %s,"
1837
             " node %s", local_version, remote_version[0])
1838
    if test:
1839
      return False
1840

    
1841
    # node seems compatible, we can actually try to look into its results
1842

    
1843
    # full package version
1844
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1845
                  constants.CV_ENODEVERSION, node,
1846
                  "software version mismatch: master %s, node %s",
1847
                  constants.RELEASE_VERSION, remote_version[1],
1848
                  code=self.ETYPE_WARNING)
1849

    
1850
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1851
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1852
      for hv_name, hv_result in hyp_result.iteritems():
1853
        test = hv_result is not None
1854
        _ErrorIf(test, constants.CV_ENODEHV, node,
1855
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1856

    
1857
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1858
    if ninfo.vm_capable and isinstance(hvp_result, list):
1859
      for item, hv_name, hv_result in hvp_result:
1860
        _ErrorIf(True, constants.CV_ENODEHV, node,
1861
                 "hypervisor %s parameter verify failure (source %s): %s",
1862
                 hv_name, item, hv_result)
1863

    
1864
    test = nresult.get(constants.NV_NODESETUP,
1865
                       ["Missing NODESETUP results"])
1866
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1867
             "; ".join(test))
1868

    
1869
    return True
1870

    
1871
  def _VerifyNodeTime(self, ninfo, nresult,
1872
                      nvinfo_starttime, nvinfo_endtime):
1873
    """Check the node time.
1874

1875
    @type ninfo: L{objects.Node}
1876
    @param ninfo: the node to check
1877
    @param nresult: the remote results for the node
1878
    @param nvinfo_starttime: the start time of the RPC call
1879
    @param nvinfo_endtime: the end time of the RPC call
1880

1881
    """
1882
    node = ninfo.name
1883
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1884

    
1885
    ntime = nresult.get(constants.NV_TIME, None)
1886
    try:
1887
      ntime_merged = utils.MergeTime(ntime)
1888
    except (ValueError, TypeError):
1889
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1890
      return
1891

    
1892
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1893
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1894
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1895
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1896
    else:
1897
      ntime_diff = None
1898

    
1899
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1900
             "Node time diverges by at least %s from master node time",
1901
             ntime_diff)
1902

    
1903
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1904
    """Check the node LVM results.
1905

1906
    @type ninfo: L{objects.Node}
1907
    @param ninfo: the node to check
1908
    @param nresult: the remote results for the node
1909
    @param vg_name: the configured VG name
1910

1911
    """
1912
    if vg_name is None:
1913
      return
1914

    
1915
    node = ninfo.name
1916
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1917

    
1918
    # checks vg existence and size > 20G
1919
    vglist = nresult.get(constants.NV_VGLIST, None)
1920
    test = not vglist
1921
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1922
    if not test:
1923
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1924
                                            constants.MIN_VG_SIZE)
1925
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1926

    
1927
    # check pv names
1928
    pvlist = nresult.get(constants.NV_PVLIST, None)
1929
    test = pvlist is None
1930
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1931
    if not test:
1932
      # check that ':' is not present in PV names, since it's a
1933
      # special character for lvcreate (denotes the range of PEs to
1934
      # use on the PV)
1935
      for _, pvname, owner_vg in pvlist:
1936
        test = ":" in pvname
1937
        _ErrorIf(test, constants.CV_ENODELVM, node,
1938
                 "Invalid character ':' in PV '%s' of VG '%s'",
1939
                 pvname, owner_vg)
1940

    
1941
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1942
    """Check the node bridges.
1943

1944
    @type ninfo: L{objects.Node}
1945
    @param ninfo: the node to check
1946
    @param nresult: the remote results for the node
1947
    @param bridges: the expected list of bridges
1948

1949
    """
1950
    if not bridges:
1951
      return
1952

    
1953
    node = ninfo.name
1954
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1955

    
1956
    missing = nresult.get(constants.NV_BRIDGES, None)
1957
    test = not isinstance(missing, list)
1958
    _ErrorIf(test, constants.CV_ENODENET, node,
1959
             "did not return valid bridge information")
1960
    if not test:
1961
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1962
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1963

    
1964
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1965
    """Check the results of user scripts presence and executability on the node
1966

1967
    @type ninfo: L{objects.Node}
1968
    @param ninfo: the node to check
1969
    @param nresult: the remote results for the node
1970

1971
    """
1972
    node = ninfo.name
1973

    
1974
    test = not constants.NV_USERSCRIPTS in nresult
1975
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1976
                  "did not return user scripts information")
1977

    
1978
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1979
    if not test:
1980
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1981
                    "user scripts not present or not executable: %s" %
1982
                    utils.CommaJoin(sorted(broken_scripts)))
1983

    
1984
  def _VerifyNodeNetwork(self, ninfo, nresult):
1985
    """Check the node network connectivity results.
1986

1987
    @type ninfo: L{objects.Node}
1988
    @param ninfo: the node to check
1989
    @param nresult: the remote results for the node
1990

1991
    """
1992
    node = ninfo.name
1993
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1994

    
1995
    test = constants.NV_NODELIST not in nresult
1996
    _ErrorIf(test, constants.CV_ENODESSH, node,
1997
             "node hasn't returned node ssh connectivity data")
1998
    if not test:
1999
      if nresult[constants.NV_NODELIST]:
2000
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2001
          _ErrorIf(True, constants.CV_ENODESSH, node,
2002
                   "ssh communication with node '%s': %s", a_node, a_msg)
2003

    
2004
    test = constants.NV_NODENETTEST not in nresult
2005
    _ErrorIf(test, constants.CV_ENODENET, node,
2006
             "node hasn't returned node tcp connectivity data")
2007
    if not test:
2008
      if nresult[constants.NV_NODENETTEST]:
2009
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2010
        for anode in nlist:
2011
          _ErrorIf(True, constants.CV_ENODENET, node,
2012
                   "tcp communication with node '%s': %s",
2013
                   anode, nresult[constants.NV_NODENETTEST][anode])
2014

    
2015
    test = constants.NV_MASTERIP not in nresult
2016
    _ErrorIf(test, constants.CV_ENODENET, node,
2017
             "node hasn't returned node master IP reachability data")
2018
    if not test:
2019
      if not nresult[constants.NV_MASTERIP]:
2020
        if node == self.master_node:
2021
          msg = "the master node cannot reach the master IP (not configured?)"
2022
        else:
2023
          msg = "cannot reach the master IP"
2024
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2025

    
2026
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2027
                      diskstatus):
2028
    """Verify an instance.
2029

2030
    This function checks to see if the required block devices are
2031
    available on the instance's node.
2032

2033
    """
2034
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2035
    node_current = instanceconfig.primary_node
2036

    
2037
    node_vol_should = {}
2038
    instanceconfig.MapLVsByNode(node_vol_should)
2039

    
2040
    for node in node_vol_should:
2041
      n_img = node_image[node]
2042
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2043
        # ignore missing volumes on offline or broken nodes
2044
        continue
2045
      for volume in node_vol_should[node]:
2046
        test = volume not in n_img.volumes
2047
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2048
                 "volume %s missing on node %s", volume, node)
2049

    
2050
    if instanceconfig.admin_state == constants.ADMINST_UP:
2051
      pri_img = node_image[node_current]
2052
      test = instance not in pri_img.instances and not pri_img.offline
2053
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2054
               "instance not running on its primary node %s",
2055
               node_current)
2056

    
2057
    diskdata = [(nname, success, status, idx)
2058
                for (nname, disks) in diskstatus.items()
2059
                for idx, (success, status) in enumerate(disks)]
2060

    
2061
    for nname, success, bdev_status, idx in diskdata:
2062
      # the 'ghost node' construction in Exec() ensures that we have a
2063
      # node here
2064
      snode = node_image[nname]
2065
      bad_snode = snode.ghost or snode.offline
2066
      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2067
               not success and not bad_snode,
2068
               constants.CV_EINSTANCEFAULTYDISK, instance,
2069
               "couldn't retrieve status for disk/%s on %s: %s",
2070
               idx, nname, bdev_status)
2071
      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2072
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2073
               constants.CV_EINSTANCEFAULTYDISK, instance,
2074
               "disk/%s on %s is faulty", idx, nname)
2075

    
2076
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2077
    """Verify if there are any unknown volumes in the cluster.
2078

2079
    The .os, .swap and backup volumes are ignored. All other volumes are
2080
    reported as unknown.
2081

2082
    @type reserved: L{ganeti.utils.FieldSet}
2083
    @param reserved: a FieldSet of reserved volume names
2084

2085
    """
2086
    for node, n_img in node_image.items():
2087
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2088
        # skip non-healthy nodes
2089
        continue
2090
      for volume in n_img.volumes:
2091
        test = ((node not in node_vol_should or
2092
                volume not in node_vol_should[node]) and
2093
                not reserved.Matches(volume))
2094
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2095
                      "volume %s is unknown", volume)
2096

    
2097
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2098
    """Verify N+1 Memory Resilience.
2099

2100
    Check that if one single node dies we can still start all the
2101
    instances it was primary for.
2102

2103
    """
2104
    cluster_info = self.cfg.GetClusterInfo()
2105
    for node, n_img in node_image.items():
2106
      # This code checks that every node which is now listed as
2107
      # secondary has enough memory to host all instances it is
2108
      # supposed to should a single other node in the cluster fail.
2109
      # FIXME: not ready for failover to an arbitrary node
2110
      # FIXME: does not support file-backed instances
2111
      # WARNING: we currently take into account down instances as well
2112
      # as up ones, considering that even if they're down someone
2113
      # might want to start them even in the event of a node failure.
2114
      if n_img.offline:
2115
        # we're skipping offline nodes from the N+1 warning, since
2116
        # most likely we don't have good memory infromation from them;
2117
        # we already list instances living on such nodes, and that's
2118
        # enough warning
2119
        continue
2120
      for prinode, instances in n_img.sbp.items():
2121
        needed_mem = 0
2122
        for instance in instances:
2123
          bep = cluster_info.FillBE(instance_cfg[instance])
2124
          if bep[constants.BE_AUTO_BALANCE]:
2125
            needed_mem += bep[constants.BE_MEMORY]
2126
        test = n_img.mfree < needed_mem
2127
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2128
                      "not enough memory to accomodate instance failovers"
2129
                      " should node %s fail (%dMiB needed, %dMiB available)",
2130
                      prinode, needed_mem, n_img.mfree)
2131

    
2132
  @classmethod
2133
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2134
                   (files_all, files_opt, files_mc, files_vm)):
2135
    """Verifies file checksums collected from all nodes.
2136

2137
    @param errorif: Callback for reporting errors
2138
    @param nodeinfo: List of L{objects.Node} objects
2139
    @param master_node: Name of master node
2140
    @param all_nvinfo: RPC results
2141

2142
    """
2143
    # Define functions determining which nodes to consider for a file
2144
    files2nodefn = [
2145
      (files_all, None),
2146
      (files_mc, lambda node: (node.master_candidate or
2147
                               node.name == master_node)),
2148
      (files_vm, lambda node: node.vm_capable),
2149
      ]
2150

    
2151
    # Build mapping from filename to list of nodes which should have the file
2152
    nodefiles = {}
2153
    for (files, fn) in files2nodefn:
2154
      if fn is None:
2155
        filenodes = nodeinfo
2156
      else:
2157
        filenodes = filter(fn, nodeinfo)
2158
      nodefiles.update((filename,
2159
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2160
                       for filename in files)
2161

    
2162
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2163

    
2164
    fileinfo = dict((filename, {}) for filename in nodefiles)
2165
    ignore_nodes = set()
2166

    
2167
    for node in nodeinfo:
2168
      if node.offline:
2169
        ignore_nodes.add(node.name)
2170
        continue
2171

    
2172
      nresult = all_nvinfo[node.name]
2173

    
2174
      if nresult.fail_msg or not nresult.payload:
2175
        node_files = None
2176
      else:
2177
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2178

    
2179
      test = not (node_files and isinstance(node_files, dict))
2180
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2181
              "Node did not return file checksum data")
2182
      if test:
2183
        ignore_nodes.add(node.name)
2184
        continue
2185

    
2186
      # Build per-checksum mapping from filename to nodes having it
2187
      for (filename, checksum) in node_files.items():
2188
        assert filename in nodefiles
2189
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2190

    
2191
    for (filename, checksums) in fileinfo.items():
2192
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2193

    
2194
      # Nodes having the file
2195
      with_file = frozenset(node_name
2196
                            for nodes in fileinfo[filename].values()
2197
                            for node_name in nodes) - ignore_nodes
2198

    
2199
      expected_nodes = nodefiles[filename] - ignore_nodes
2200

    
2201
      # Nodes missing file
2202
      missing_file = expected_nodes - with_file
2203

    
2204
      if filename in files_opt:
2205
        # All or no nodes
2206
        errorif(missing_file and missing_file != expected_nodes,
2207
                constants.CV_ECLUSTERFILECHECK, None,
2208
                "File %s is optional, but it must exist on all or no"
2209
                " nodes (not found on %s)",
2210
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2211
      else:
2212
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2213
                "File %s is missing from node(s) %s", filename,
2214
                utils.CommaJoin(utils.NiceSort(missing_file)))
2215

    
2216
        # Warn if a node has a file it shouldn't
2217
        unexpected = with_file - expected_nodes
2218
        errorif(unexpected,
2219
                constants.CV_ECLUSTERFILECHECK, None,
2220
                "File %s should not exist on node(s) %s",
2221
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2222

    
2223
      # See if there are multiple versions of the file
2224
      test = len(checksums) > 1
2225
      if test:
2226
        variants = ["variant %s on %s" %
2227
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2228
                    for (idx, (checksum, nodes)) in
2229
                      enumerate(sorted(checksums.items()))]
2230
      else:
2231
        variants = []
2232

    
2233
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2234
              "File %s found with %s different checksums (%s)",
2235
              filename, len(checksums), "; ".join(variants))
2236

    
2237
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2238
                      drbd_map):
2239
    """Verifies and the node DRBD status.
2240

2241
    @type ninfo: L{objects.Node}
2242
    @param ninfo: the node to check
2243
    @param nresult: the remote results for the node
2244
    @param instanceinfo: the dict of instances
2245
    @param drbd_helper: the configured DRBD usermode helper
2246
    @param drbd_map: the DRBD map as returned by
2247
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2248

2249
    """
2250
    node = ninfo.name
2251
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2252

    
2253
    if drbd_helper:
2254
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2255
      test = (helper_result == None)
2256
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2257
               "no drbd usermode helper returned")
2258
      if helper_result:
2259
        status, payload = helper_result
2260
        test = not status
2261
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2262
                 "drbd usermode helper check unsuccessful: %s", payload)
2263
        test = status and (payload != drbd_helper)
2264
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2265
                 "wrong drbd usermode helper: %s", payload)
2266

    
2267
    # compute the DRBD minors
2268
    node_drbd = {}
2269
    for minor, instance in drbd_map[node].items():
2270
      test = instance not in instanceinfo
2271
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2272
               "ghost instance '%s' in temporary DRBD map", instance)
2273
        # ghost instance should not be running, but otherwise we
2274
        # don't give double warnings (both ghost instance and
2275
        # unallocated minor in use)
2276
      if test:
2277
        node_drbd[minor] = (instance, False)
2278
      else:
2279
        instance = instanceinfo[instance]
2280
        node_drbd[minor] = (instance.name,
2281
                            instance.admin_state == constants.ADMINST_UP)
2282

    
2283
    # and now check them
2284
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2285
    test = not isinstance(used_minors, (tuple, list))
2286
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2287
             "cannot parse drbd status file: %s", str(used_minors))
2288
    if test:
2289
      # we cannot check drbd status
2290
      return
2291

    
2292
    for minor, (iname, must_exist) in node_drbd.items():
2293
      test = minor not in used_minors and must_exist
2294
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2295
               "drbd minor %d of instance %s is not active", minor, iname)
2296
    for minor in used_minors:
2297
      test = minor not in node_drbd
2298
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2299
               "unallocated drbd minor %d is in use", minor)
2300

    
2301
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2302
    """Builds the node OS structures.
2303

2304
    @type ninfo: L{objects.Node}
2305
    @param ninfo: the node to check
2306
    @param nresult: the remote results for the node
2307
    @param nimg: the node image object
2308

2309
    """
2310
    node = ninfo.name
2311
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2312

    
2313
    remote_os = nresult.get(constants.NV_OSLIST, None)
2314
    test = (not isinstance(remote_os, list) or
2315
            not compat.all(isinstance(v, list) and len(v) == 7
2316
                           for v in remote_os))
2317

    
2318
    _ErrorIf(test, constants.CV_ENODEOS, node,
2319
             "node hasn't returned valid OS data")
2320

    
2321
    nimg.os_fail = test
2322

    
2323
    if test:
2324
      return
2325

    
2326
    os_dict = {}
2327

    
2328
    for (name, os_path, status, diagnose,
2329
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2330

    
2331
      if name not in os_dict:
2332
        os_dict[name] = []
2333

    
2334
      # parameters is a list of lists instead of list of tuples due to
2335
      # JSON lacking a real tuple type, fix it:
2336
      parameters = [tuple(v) for v in parameters]
2337
      os_dict[name].append((os_path, status, diagnose,
2338
                            set(variants), set(parameters), set(api_ver)))
2339

    
2340
    nimg.oslist = os_dict
2341

    
2342
  def _VerifyNodeOS(self, ninfo, nimg, base):
2343
    """Verifies the node OS list.
2344

2345
    @type ninfo: L{objects.Node}
2346
    @param ninfo: the node to check
2347
    @param nimg: the node image object
2348
    @param base: the 'template' node we match against (e.g. from the master)
2349

2350
    """
2351
    node = ninfo.name
2352
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2353

    
2354
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2355

    
2356
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2357
    for os_name, os_data in nimg.oslist.items():
2358
      assert os_data, "Empty OS status for OS %s?!" % os_name
2359
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2360
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2361
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2362
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2363
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2364
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2365
      # comparisons with the 'base' image
2366
      test = os_name not in base.oslist
2367
      _ErrorIf(test, constants.CV_ENODEOS, node,
2368
               "Extra OS %s not present on reference node (%s)",
2369
               os_name, base.name)
2370
      if test:
2371
        continue
2372
      assert base.oslist[os_name], "Base node has empty OS status?"
2373
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2374
      if not b_status:
2375
        # base OS is invalid, skipping
2376
        continue
2377
      for kind, a, b in [("API version", f_api, b_api),
2378
                         ("variants list", f_var, b_var),
2379
                         ("parameters", beautify_params(f_param),
2380
                          beautify_params(b_param))]:
2381
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2382
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2383
                 kind, os_name, base.name,
2384
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2385

    
2386
    # check any missing OSes
2387
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2388
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2389
             "OSes present on reference node %s but missing on this node: %s",
2390
             base.name, utils.CommaJoin(missing))
2391

    
2392
  def _VerifyOob(self, ninfo, nresult):
2393
    """Verifies out of band functionality of a node.
2394

2395
    @type ninfo: L{objects.Node}
2396
    @param ninfo: the node to check
2397
    @param nresult: the remote results for the node
2398

2399
    """
2400
    node = ninfo.name
2401
    # We just have to verify the paths on master and/or master candidates
2402
    # as the oob helper is invoked on the master
2403
    if ((ninfo.master_candidate or ninfo.master_capable) and
2404
        constants.NV_OOB_PATHS in nresult):
2405
      for path_result in nresult[constants.NV_OOB_PATHS]:
2406
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2407

    
2408
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2409
    """Verifies and updates the node volume data.
2410

2411
    This function will update a L{NodeImage}'s internal structures
2412
    with data from the remote call.
2413

2414
    @type ninfo: L{objects.Node}
2415
    @param ninfo: the node to check
2416
    @param nresult: the remote results for the node
2417
    @param nimg: the node image object
2418
    @param vg_name: the configured VG name
2419

2420
    """
2421
    node = ninfo.name
2422
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2423

    
2424
    nimg.lvm_fail = True
2425
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2426
    if vg_name is None:
2427
      pass
2428
    elif isinstance(lvdata, basestring):
2429
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2430
               utils.SafeEncode(lvdata))
2431
    elif not isinstance(lvdata, dict):
2432
      _ErrorIf(True, constants.CV_ENODELVM, node,
2433
               "rpc call to node failed (lvlist)")
2434
    else:
2435
      nimg.volumes = lvdata
2436
      nimg.lvm_fail = False
2437

    
2438
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2439
    """Verifies and updates the node instance list.
2440

2441
    If the listing was successful, then updates this node's instance
2442
    list. Otherwise, it marks the RPC call as failed for the instance
2443
    list key.
2444

2445
    @type ninfo: L{objects.Node}
2446
    @param ninfo: the node to check
2447
    @param nresult: the remote results for the node
2448
    @param nimg: the node image object
2449

2450
    """
2451
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2452
    test = not isinstance(idata, list)
2453
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2454
                  "rpc call to node failed (instancelist): %s",
2455
                  utils.SafeEncode(str(idata)))
2456
    if test:
2457
      nimg.hyp_fail = True
2458
    else:
2459
      nimg.instances = idata
2460

    
2461
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2462
    """Verifies and computes a node information map
2463

2464
    @type ninfo: L{objects.Node}
2465
    @param ninfo: the node to check
2466
    @param nresult: the remote results for the node
2467
    @param nimg: the node image object
2468
    @param vg_name: the configured VG name
2469

2470
    """
2471
    node = ninfo.name
2472
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2473

    
2474
    # try to read free memory (from the hypervisor)
2475
    hv_info = nresult.get(constants.NV_HVINFO, None)
2476
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2477
    _ErrorIf(test, constants.CV_ENODEHV, node,
2478
             "rpc call to node failed (hvinfo)")
2479
    if not test:
2480
      try:
2481
        nimg.mfree = int(hv_info["memory_free"])
2482
      except (ValueError, TypeError):
2483
        _ErrorIf(True, constants.CV_ENODERPC, node,
2484
                 "node returned invalid nodeinfo, check hypervisor")
2485

    
2486
    # FIXME: devise a free space model for file based instances as well
2487
    if vg_name is not None:
2488
      test = (constants.NV_VGLIST not in nresult or
2489
              vg_name not in nresult[constants.NV_VGLIST])
2490
      _ErrorIf(test, constants.CV_ENODELVM, node,
2491
               "node didn't return data for the volume group '%s'"
2492
               " - it is either missing or broken", vg_name)
2493
      if not test:
2494
        try:
2495
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2496
        except (ValueError, TypeError):
2497
          _ErrorIf(True, constants.CV_ENODERPC, node,
2498
                   "node returned invalid LVM info, check LVM status")
2499

    
2500
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2501
    """Gets per-disk status information for all instances.
2502

2503
    @type nodelist: list of strings
2504
    @param nodelist: Node names
2505
    @type node_image: dict of (name, L{objects.Node})
2506
    @param node_image: Node objects
2507
    @type instanceinfo: dict of (name, L{objects.Instance})
2508
    @param instanceinfo: Instance objects
2509
    @rtype: {instance: {node: [(succes, payload)]}}
2510
    @return: a dictionary of per-instance dictionaries with nodes as
2511
        keys and disk information as values; the disk information is a
2512
        list of tuples (success, payload)
2513

2514
    """
2515
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2516

    
2517
    node_disks = {}
2518
    node_disks_devonly = {}
2519
    diskless_instances = set()
2520
    diskless = constants.DT_DISKLESS
2521

    
2522
    for nname in nodelist:
2523
      node_instances = list(itertools.chain(node_image[nname].pinst,
2524
                                            node_image[nname].sinst))
2525
      diskless_instances.update(inst for inst in node_instances
2526
                                if instanceinfo[inst].disk_template == diskless)
2527
      disks = [(inst, disk)
2528
               for inst in node_instances
2529
               for disk in instanceinfo[inst].disks]
2530

    
2531
      if not disks:
2532
        # No need to collect data
2533
        continue
2534

    
2535
      node_disks[nname] = disks
2536

    
2537
      # Creating copies as SetDiskID below will modify the objects and that can
2538
      # lead to incorrect data returned from nodes
2539
      devonly = [dev.Copy() for (_, dev) in disks]
2540

    
2541
      for dev in devonly:
2542
        self.cfg.SetDiskID(dev, nname)
2543

    
2544
      node_disks_devonly[nname] = devonly
2545

    
2546
    assert len(node_disks) == len(node_disks_devonly)
2547

    
2548
    # Collect data from all nodes with disks
2549
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2550
                                                          node_disks_devonly)
2551

    
2552
    assert len(result) == len(node_disks)
2553

    
2554
    instdisk = {}
2555

    
2556
    for (nname, nres) in result.items():
2557
      disks = node_disks[nname]
2558

    
2559
      if nres.offline:
2560
        # No data from this node
2561
        data = len(disks) * [(False, "node offline")]
2562
      else:
2563
        msg = nres.fail_msg
2564
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2565
                 "while getting disk information: %s", msg)
2566
        if msg:
2567
          # No data from this node
2568
          data = len(disks) * [(False, msg)]
2569
        else:
2570
          data = []
2571
          for idx, i in enumerate(nres.payload):
2572
            if isinstance(i, (tuple, list)) and len(i) == 2:
2573
              data.append(i)
2574
            else:
2575
              logging.warning("Invalid result from node %s, entry %d: %s",
2576
                              nname, idx, i)
2577
              data.append((False, "Invalid result from the remote node"))
2578

    
2579
      for ((inst, _), status) in zip(disks, data):
2580
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2581

    
2582
    # Add empty entries for diskless instances.
2583
    for inst in diskless_instances:
2584
      assert inst not in instdisk
2585
      instdisk[inst] = {}
2586

    
2587
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2588
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2589
                      compat.all(isinstance(s, (tuple, list)) and
2590
                                 len(s) == 2 for s in statuses)
2591
                      for inst, nnames in instdisk.items()
2592
                      for nname, statuses in nnames.items())
2593
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2594

    
2595
    return instdisk
2596

    
2597
  @staticmethod
2598
  def _SshNodeSelector(group_uuid, all_nodes):
2599
    """Create endless iterators for all potential SSH check hosts.
2600

2601
    """
2602
    nodes = [node for node in all_nodes
2603
             if (node.group != group_uuid and
2604
                 not node.offline)]
2605
    keyfunc = operator.attrgetter("group")
2606

    
2607
    return map(itertools.cycle,
2608
               [sorted(map(operator.attrgetter("name"), names))
2609
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2610
                                                  keyfunc)])
2611

    
2612
  @classmethod
2613
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2614
    """Choose which nodes should talk to which other nodes.
2615

2616
    We will make nodes contact all nodes in their group, and one node from
2617
    every other group.
2618

2619
    @warning: This algorithm has a known issue if one node group is much
2620
      smaller than others (e.g. just one node). In such a case all other
2621
      nodes will talk to the single node.
2622

2623
    """
2624
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2625
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2626

    
2627
    return (online_nodes,
2628
            dict((name, sorted([i.next() for i in sel]))
2629
                 for name in online_nodes))
2630

    
2631
  def BuildHooksEnv(self):
2632
    """Build hooks env.
2633

2634
    Cluster-Verify hooks just ran in the post phase and their failure makes
2635
    the output be logged in the verify output and the verification to fail.
2636

2637
    """
2638
    env = {
2639
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2640
      }
2641

    
2642
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2643
               for node in self.my_node_info.values())
2644

    
2645
    return env
2646

    
2647
  def BuildHooksNodes(self):
2648
    """Build hooks nodes.
2649

2650
    """
2651
    return ([], self.my_node_names)
2652

    
2653
  def Exec(self, feedback_fn):
2654
    """Verify integrity of the node group, performing various test on nodes.
2655

2656
    """
2657
    # This method has too many local variables. pylint: disable=R0914
2658
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2659

    
2660
    if not self.my_node_names:
2661
      # empty node group
2662
      feedback_fn("* Empty node group, skipping verification")
2663
      return True
2664

    
2665
    self.bad = False
2666
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2667
    verbose = self.op.verbose
2668
    self._feedback_fn = feedback_fn
2669

    
2670
    vg_name = self.cfg.GetVGName()
2671
    drbd_helper = self.cfg.GetDRBDHelper()
2672
    cluster = self.cfg.GetClusterInfo()
2673
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2674
    hypervisors = cluster.enabled_hypervisors
2675
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2676

    
2677
    i_non_redundant = [] # Non redundant instances
2678
    i_non_a_balanced = [] # Non auto-balanced instances
2679
    i_offline = 0 # Count of offline instances
2680
    n_offline = 0 # Count of offline nodes
2681
    n_drained = 0 # Count of nodes being drained
2682
    node_vol_should = {}
2683

    
2684
    # FIXME: verify OS list
2685

    
2686
    # File verification
2687
    filemap = _ComputeAncillaryFiles(cluster, False)
2688

    
2689
    # do local checksums
2690
    master_node = self.master_node = self.cfg.GetMasterNode()
2691
    master_ip = self.cfg.GetMasterIP()
2692

    
2693
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2694

    
2695
    user_scripts = []
2696
    if self.cfg.GetUseExternalMipScript():
2697
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2698

    
2699
    node_verify_param = {
2700
      constants.NV_FILELIST:
2701
        utils.UniqueSequence(filename
2702
                             for files in filemap
2703
                             for filename in files),
2704
      constants.NV_NODELIST:
2705
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2706
                                  self.all_node_info.values()),
2707
      constants.NV_HYPERVISOR: hypervisors,
2708
      constants.NV_HVPARAMS:
2709
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2710
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2711
                                 for node in node_data_list
2712
                                 if not node.offline],
2713
      constants.NV_INSTANCELIST: hypervisors,
2714
      constants.NV_VERSION: None,
2715
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2716
      constants.NV_NODESETUP: None,
2717
      constants.NV_TIME: None,
2718
      constants.NV_MASTERIP: (master_node, master_ip),
2719
      constants.NV_OSLIST: None,
2720
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2721
      constants.NV_USERSCRIPTS: user_scripts,
2722
      }
2723

    
2724
    if vg_name is not None:
2725
      node_verify_param[constants.NV_VGLIST] = None
2726
      node_verify_param[constants.NV_LVLIST] = vg_name
2727
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2728
      node_verify_param[constants.NV_DRBDLIST] = None
2729

    
2730
    if drbd_helper:
2731
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2732

    
2733
    # bridge checks
2734
    # FIXME: this needs to be changed per node-group, not cluster-wide
2735
    bridges = set()
2736
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2737
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2738
      bridges.add(default_nicpp[constants.NIC_LINK])
2739
    for instance in self.my_inst_info.values():
2740
      for nic in instance.nics:
2741
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2742
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2743
          bridges.add(full_nic[constants.NIC_LINK])
2744

    
2745
    if bridges:
2746
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2747

    
2748
    # Build our expected cluster state
2749
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2750
                                                 name=node.name,
2751
                                                 vm_capable=node.vm_capable))
2752
                      for node in node_data_list)
2753

    
2754
    # Gather OOB paths
2755
    oob_paths = []
2756
    for node in self.all_node_info.values():
2757
      path = _SupportsOob(self.cfg, node)
2758
      if path and path not in oob_paths:
2759
        oob_paths.append(path)
2760

    
2761
    if oob_paths:
2762
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2763

    
2764
    for instance in self.my_inst_names:
2765
      inst_config = self.my_inst_info[instance]
2766

    
2767
      for nname in inst_config.all_nodes:
2768
        if nname not in node_image:
2769
          gnode = self.NodeImage(name=nname)
2770
          gnode.ghost = (nname not in self.all_node_info)
2771
          node_image[nname] = gnode
2772

    
2773
      inst_config.MapLVsByNode(node_vol_should)
2774

    
2775
      pnode = inst_config.primary_node
2776
      node_image[pnode].pinst.append(instance)
2777

    
2778
      for snode in inst_config.secondary_nodes:
2779
        nimg = node_image[snode]
2780
        nimg.sinst.append(instance)
2781
        if pnode not in nimg.sbp:
2782
          nimg.sbp[pnode] = []
2783
        nimg.sbp[pnode].append(instance)
2784

    
2785
    # At this point, we have the in-memory data structures complete,
2786
    # except for the runtime information, which we'll gather next
2787

    
2788
    # Due to the way our RPC system works, exact response times cannot be
2789
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2790
    # time before and after executing the request, we can at least have a time
2791
    # window.
2792
    nvinfo_starttime = time.time()
2793
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2794
                                           node_verify_param,
2795
                                           self.cfg.GetClusterName())
2796
    nvinfo_endtime = time.time()
2797

    
2798
    if self.extra_lv_nodes and vg_name is not None:
2799
      extra_lv_nvinfo = \
2800
          self.rpc.call_node_verify(self.extra_lv_nodes,
2801
                                    {constants.NV_LVLIST: vg_name},
2802
                                    self.cfg.GetClusterName())
2803
    else:
2804
      extra_lv_nvinfo = {}
2805

    
2806
    all_drbd_map = self.cfg.ComputeDRBDMap()
2807

    
2808
    feedback_fn("* Gathering disk information (%s nodes)" %
2809
                len(self.my_node_names))
2810
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2811
                                     self.my_inst_info)
2812

    
2813
    feedback_fn("* Verifying configuration file consistency")
2814

    
2815
    # If not all nodes are being checked, we need to make sure the master node
2816
    # and a non-checked vm_capable node are in the list.
2817
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2818
    if absent_nodes:
2819
      vf_nvinfo = all_nvinfo.copy()
2820
      vf_node_info = list(self.my_node_info.values())
2821
      additional_nodes = []
2822
      if master_node not in self.my_node_info:
2823
        additional_nodes.append(master_node)
2824
        vf_node_info.append(self.all_node_info[master_node])
2825
      # Add the first vm_capable node we find which is not included
2826
      for node in absent_nodes:
2827
        nodeinfo = self.all_node_info[node]
2828
        if nodeinfo.vm_capable and not nodeinfo.offline:
2829
          additional_nodes.append(node)
2830
          vf_node_info.append(self.all_node_info[node])
2831
          break
2832
      key = constants.NV_FILELIST
2833
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2834
                                                 {key: node_verify_param[key]},
2835
                                                 self.cfg.GetClusterName()))
2836
    else:
2837
      vf_nvinfo = all_nvinfo
2838
      vf_node_info = self.my_node_info.values()
2839

    
2840
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2841

    
2842
    feedback_fn("* Verifying node status")
2843

    
2844
    refos_img = None
2845

    
2846
    for node_i in node_data_list:
2847
      node = node_i.name
2848
      nimg = node_image[node]
2849

    
2850
      if node_i.offline:
2851
        if verbose:
2852
          feedback_fn("* Skipping offline node %s" % (node,))
2853
        n_offline += 1
2854
        continue
2855

    
2856
      if node == master_node:
2857
        ntype = "master"
2858
      elif node_i.master_candidate:
2859
        ntype = "master candidate"
2860
      elif node_i.drained:
2861
        ntype = "drained"
2862
        n_drained += 1
2863
      else:
2864
        ntype = "regular"
2865
      if verbose:
2866
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2867

    
2868
      msg = all_nvinfo[node].fail_msg
2869
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2870
               msg)
2871
      if msg:
2872
        nimg.rpc_fail = True
2873
        continue
2874

    
2875
      nresult = all_nvinfo[node].payload
2876

    
2877
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2878
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2879
      self._VerifyNodeNetwork(node_i, nresult)
2880
      self._VerifyNodeUserScripts(node_i, nresult)
2881
      self._VerifyOob(node_i, nresult)
2882

    
2883
      if nimg.vm_capable:
2884
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2885
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2886
                             all_drbd_map)
2887

    
2888
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2889
        self._UpdateNodeInstances(node_i, nresult, nimg)
2890
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2891
        self._UpdateNodeOS(node_i, nresult, nimg)
2892

    
2893
        if not nimg.os_fail:
2894
          if refos_img is None:
2895
            refos_img = nimg
2896
          self._VerifyNodeOS(node_i, nimg, refos_img)
2897
        self._VerifyNodeBridges(node_i, nresult, bridges)
2898

    
2899
        # Check whether all running instancies are primary for the node. (This
2900
        # can no longer be done from _VerifyInstance below, since some of the
2901
        # wrong instances could be from other node groups.)
2902
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2903

    
2904
        for inst in non_primary_inst:
2905
          # FIXME: investigate best way to handle offline insts
2906
          if inst.admin_state == constants.ADMINST_OFFLINE:
2907
            if verbose:
2908
              feedback_fn("* Skipping offline instance %s" % inst.name)
2909
            i_offline += 1
2910
            continue
2911
          test = inst in self.all_inst_info
2912
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2913
                   "instance should not run on node %s", node_i.name)
2914
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2915
                   "node is running unknown instance %s", inst)
2916

    
2917
    for node, result in extra_lv_nvinfo.items():
2918
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2919
                              node_image[node], vg_name)
2920

    
2921
    feedback_fn("* Verifying instance status")
2922
    for instance in self.my_inst_names:
2923
      if verbose:
2924
        feedback_fn("* Verifying instance %s" % instance)
2925
      inst_config = self.my_inst_info[instance]
2926
      self._VerifyInstance(instance, inst_config, node_image,
2927
                           instdisk[instance])
2928
      inst_nodes_offline = []
2929

    
2930
      pnode = inst_config.primary_node
2931
      pnode_img = node_image[pnode]
2932
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2933
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2934
               " primary node failed", instance)
2935

    
2936
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2937
               pnode_img.offline,
2938
               constants.CV_EINSTANCEBADNODE, instance,
2939
               "instance is marked as running and lives on offline node %s",
2940
               inst_config.primary_node)
2941

    
2942
      # If the instance is non-redundant we cannot survive losing its primary
2943
      # node, so we are not N+1 compliant. On the other hand we have no disk
2944
      # templates with more than one secondary so that situation is not well
2945
      # supported either.
2946
      # FIXME: does not support file-backed instances
2947
      if not inst_config.secondary_nodes:
2948
        i_non_redundant.append(instance)
2949

    
2950
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2951
               constants.CV_EINSTANCELAYOUT,
2952
               instance, "instance has multiple secondary nodes: %s",
2953
               utils.CommaJoin(inst_config.secondary_nodes),
2954
               code=self.ETYPE_WARNING)
2955

    
2956
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2957
        pnode = inst_config.primary_node
2958
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2959
        instance_groups = {}
2960

    
2961
        for node in instance_nodes:
2962
          instance_groups.setdefault(self.all_node_info[node].group,
2963
                                     []).append(node)
2964

    
2965
        pretty_list = [
2966
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2967
          # Sort so that we always list the primary node first.
2968
          for group, nodes in sorted(instance_groups.items(),
2969
                                     key=lambda (_, nodes): pnode in nodes,
2970
                                     reverse=True)]
2971

    
2972
        self._ErrorIf(len(instance_groups) > 1,
2973
                      constants.CV_EINSTANCESPLITGROUPS,
2974
                      instance, "instance has primary and secondary nodes in"
2975
                      " different groups: %s", utils.CommaJoin(pretty_list),
2976
                      code=self.ETYPE_WARNING)
2977

    
2978
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2979
        i_non_a_balanced.append(instance)
2980

    
2981
      for snode in inst_config.secondary_nodes:
2982
        s_img = node_image[snode]
2983
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2984
                 snode, "instance %s, connection to secondary node failed",
2985
                 instance)
2986

    
2987
        if s_img.offline:
2988
          inst_nodes_offline.append(snode)
2989

    
2990
      # warn that the instance lives on offline nodes
2991
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2992
               "instance has offline secondary node(s) %s",
2993
               utils.CommaJoin(inst_nodes_offline))
2994
      # ... or ghost/non-vm_capable nodes
2995
      for node in inst_config.all_nodes:
2996
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2997
                 instance, "instance lives on ghost node %s", node)
2998
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2999
                 instance, "instance lives on non-vm_capable node %s", node)
3000

    
3001
    feedback_fn("* Verifying orphan volumes")
3002
    reserved = utils.FieldSet(*cluster.reserved_lvs)
3003

    
3004
    # We will get spurious "unknown volume" warnings if any node of this group
3005
    # is secondary for an instance whose primary is in another group. To avoid
3006
    # them, we find these instances and add their volumes to node_vol_should.
3007
    for inst in self.all_inst_info.values():
3008
      for secondary in inst.secondary_nodes:
3009
        if (secondary in self.my_node_info
3010
            and inst.name not in self.my_inst_info):
3011
          inst.MapLVsByNode(node_vol_should)
3012
          break
3013

    
3014
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3015

    
3016
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3017
      feedback_fn("* Verifying N+1 Memory redundancy")
3018
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3019

    
3020
    feedback_fn("* Other Notes")
3021
    if i_non_redundant:
3022
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3023
                  % len(i_non_redundant))
3024

    
3025
    if i_non_a_balanced:
3026
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3027
                  % len(i_non_a_balanced))
3028

    
3029
    if i_offline:
3030
      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3031

    
3032
    if n_offline:
3033
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3034

    
3035
    if n_drained:
3036
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3037

    
3038
    return not self.bad
3039

    
3040
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3041
    """Analyze the post-hooks' result
3042

3043
    This method analyses the hook result, handles it, and sends some
3044
    nicely-formatted feedback back to the user.
3045

3046
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3047
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3048
    @param hooks_results: the results of the multi-node hooks rpc call
3049
    @param feedback_fn: function used send feedback back to the caller
3050
    @param lu_result: previous Exec result
3051
    @return: the new Exec result, based on the previous result
3052
        and hook results
3053

3054
    """
3055
    # We only really run POST phase hooks, only for non-empty groups,
3056
    # and are only interested in their results
3057
    if not self.my_node_names:
3058
      # empty node group
3059
      pass
3060
    elif phase == constants.HOOKS_PHASE_POST:
3061
      # Used to change hooks' output to proper indentation
3062
      feedback_fn("* Hooks Results")
3063
      assert hooks_results, "invalid result from hooks"
3064

    
3065
      for node_name in hooks_results:
3066
        res = hooks_results[node_name]
3067
        msg = res.fail_msg
3068
        test = msg and not res.offline
3069
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3070
                      "Communication failure in hooks execution: %s", msg)
3071
        if res.offline or msg:
3072
          # No need to investigate payload if node is offline or gave
3073
          # an error.
3074
          continue
3075
        for script, hkr, output in res.payload:
3076
          test = hkr == constants.HKR_FAIL
3077
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3078
                        "Script %s failed, output:", script)
3079
          if test:
3080
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3081
            feedback_fn("%s" % output)
3082
            lu_result = False
3083

    
3084
    return lu_result
3085

    
3086

    
3087
class LUClusterVerifyDisks(NoHooksLU):
3088
  """Verifies the cluster disks status.
3089

3090
  """
3091
  REQ_BGL = False
3092

    
3093
  def ExpandNames(self):
3094
    self.share_locks = _ShareAll()
3095
    self.needed_locks = {
3096
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3097
      }
3098

    
3099
  def Exec(self, feedback_fn):
3100
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3101

    
3102
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3103
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3104
                           for group in group_names])
3105

    
3106

    
3107
class LUGroupVerifyDisks(NoHooksLU):
3108
  """Verifies the status of all disks in a node group.
3109

3110
  """
3111
  REQ_BGL = False
3112

    
3113
  def ExpandNames(self):
3114
    # Raises errors.OpPrereqError on its own if group can't be found
3115
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3116

    
3117
    self.share_locks = _ShareAll()
3118
    self.needed_locks = {
3119
      locking.LEVEL_INSTANCE: [],
3120
      locking.LEVEL_NODEGROUP: [],
3121
      locking.LEVEL_NODE: [],
3122
      }
3123

    
3124
  def DeclareLocks(self, level):
3125
    if level == locking.LEVEL_INSTANCE:
3126
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3127

    
3128
      # Lock instances optimistically, needs verification once node and group
3129
      # locks have been acquired
3130
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3131
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3132

    
3133
    elif level == locking.LEVEL_NODEGROUP:
3134
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3135

    
3136
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3137
        set([self.group_uuid] +
3138
            # Lock all groups used by instances optimistically; this requires
3139
            # going via the node before it's locked, requiring verification
3140
            # later on
3141
            [group_uuid
3142
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3143
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3144

    
3145
    elif level == locking.LEVEL_NODE:
3146
      # This will only lock the nodes in the group to be verified which contain
3147
      # actual instances
3148
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3149
      self._LockInstancesNodes()
3150

    
3151
      # Lock all nodes in group to be verified
3152
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3153
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3154
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3155

    
3156
  def CheckPrereq(self):
3157
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3158
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3159
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3160

    
3161
    assert self.group_uuid in owned_groups
3162

    
3163
    # Check if locked instances are still correct
3164
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3165

    
3166
    # Get instance information
3167
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3168

    
3169
    # Check if node groups for locked instances are still correct
3170
    for (instance_name, inst) in self.instances.items():
3171
      assert owned_nodes.issuperset(inst.all_nodes), \
3172
        "Instance %s's nodes changed while we kept the lock" % instance_name
3173

    
3174
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3175
                                             owned_groups)
3176

    
3177
      assert self.group_uuid in inst_groups, \
3178
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3179

    
3180
  def Exec(self, feedback_fn):
3181
    """Verify integrity of cluster disks.
3182

3183
    @rtype: tuple of three items
3184
    @return: a tuple of (dict of node-to-node_error, list of instances
3185
        which need activate-disks, dict of instance: (node, volume) for
3186
        missing volumes
3187

3188
    """
3189
    res_nodes = {}
3190
    res_instances = set()
3191
    res_missing = {}
3192

    
3193
    nv_dict = _MapInstanceDisksToNodes([inst
3194
            for inst in self.instances.values()
3195
            if inst.admin_state == constants.ADMINST_UP])
3196

    
3197
    if nv_dict:
3198
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3199
                             set(self.cfg.GetVmCapableNodeList()))
3200

    
3201
      node_lvs = self.rpc.call_lv_list(nodes, [])
3202

    
3203
      for (node, node_res) in node_lvs.items():
3204
        if node_res.offline:
3205
          continue
3206

    
3207
        msg = node_res.fail_msg
3208
        if msg:
3209
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3210
          res_nodes[node] = msg
3211
          continue
3212

    
3213
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3214
          inst = nv_dict.pop((node, lv_name), None)
3215
          if not (lv_online or inst is None):
3216
            res_instances.add(inst)
3217

    
3218
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3219
      # better
3220
      for key, inst in nv_dict.iteritems():
3221
        res_missing.setdefault(inst, []).append(list(key))
3222

    
3223
    return (res_nodes, list(res_instances), res_missing)
3224

    
3225

    
3226
class LUClusterRepairDiskSizes(NoHooksLU):
3227
  """Verifies the cluster disks sizes.
3228

3229
  """
3230
  REQ_BGL = False
3231

    
3232
  def ExpandNames(self):
3233
    if self.op.instances:
3234
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3235
      self.needed_locks = {
3236
        locking.LEVEL_NODE_RES: [],
3237
        locking.LEVEL_INSTANCE: self.wanted_names,
3238
        }
3239
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3240
    else:
3241
      self.wanted_names = None
3242
      self.needed_locks = {
3243
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3244
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3245
        }
3246
    self.share_locks = _ShareAll()
3247

    
3248
  def DeclareLocks(self, level):
3249
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3250
      self._LockInstancesNodes(primary_only=True, level=level)
3251

    
3252
  def CheckPrereq(self):
3253
    """Check prerequisites.
3254

3255
    This only checks the optional instance list against the existing names.
3256

3257
    """
3258
    if self.wanted_names is None:
3259
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3260

    
3261
    self.wanted_instances = \
3262
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3263

    
3264
  def _EnsureChildSizes(self, disk):
3265
    """Ensure children of the disk have the needed disk size.
3266

3267
    This is valid mainly for DRBD8 and fixes an issue where the
3268
    children have smaller disk size.
3269

3270
    @param disk: an L{ganeti.objects.Disk} object
3271

3272
    """
3273
    if disk.dev_type == constants.LD_DRBD8:
3274
      assert disk.children, "Empty children for DRBD8?"
3275
      fchild = disk.children[0]
3276
      mismatch = fchild.size < disk.size
3277
      if mismatch:
3278
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3279
                     fchild.size, disk.size)
3280
        fchild.size = disk.size
3281

    
3282
      # and we recurse on this child only, not on the metadev
3283
      return self._EnsureChildSizes(fchild) or mismatch
3284
    else:
3285
      return False
3286

    
3287
  def Exec(self, feedback_fn):
3288
    """Verify the size of cluster disks.
3289

3290
    """
3291
    # TODO: check child disks too
3292
    # TODO: check differences in size between primary/secondary nodes
3293
    per_node_disks = {}
3294
    for instance in self.wanted_instances:
3295
      pnode = instance.primary_node
3296
      if pnode not in per_node_disks:
3297
        per_node_disks[pnode] = []
3298
      for idx, disk in enumerate(instance.disks):
3299
        per_node_disks[pnode].append((instance, idx, disk))
3300

    
3301
    assert not (frozenset(per_node_disks.keys()) -
3302
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3303
      "Not owning correct locks"
3304
    assert not self.owned_locks(locking.LEVEL_NODE)
3305

    
3306
    changed = []
3307
    for node, dskl in per_node_disks.items():
3308
      newl = [v[2].Copy() for v in dskl]
3309
      for dsk in newl:
3310
        self.cfg.SetDiskID(dsk, node)
3311
      result = self.rpc.call_blockdev_getsize(node, newl)
3312
      if result.fail_msg:
3313
        self.LogWarning("Failure in blockdev_getsize call to node"
3314
                        " %s, ignoring", node)
3315
        continue
3316
      if len(result.payload) != len(dskl):
3317
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3318
                        " result.payload=%s", node, len(dskl), result.payload)
3319
        self.LogWarning("Invalid result from node %s, ignoring node results",
3320
                        node)
3321
        continue
3322
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3323
        if size is None:
3324
          self.LogWarning("Disk %d of instance %s did not return size"
3325
                          " information, ignoring", idx, instance.name)
3326
          continue
3327
        if not isinstance(size, (int, long)):
3328
          self.LogWarning("Disk %d of instance %s did not return valid"
3329
                          " size information, ignoring", idx, instance.name)
3330
          continue
3331
        size = size >> 20
3332
        if size != disk.size:
3333
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3334
                       " correcting: recorded %d, actual %d", idx,
3335
                       instance.name, disk.size, size)
3336
          disk.size = size
3337
          self.cfg.Update(instance, feedback_fn)
3338
          changed.append((instance.name, idx, size))
3339
        if self._EnsureChildSizes(disk):
3340
          self.cfg.Update(instance, feedback_fn)
3341
          changed.append((instance.name, idx, disk.size))
3342
    return changed
3343

    
3344

    
3345
class LUClusterRename(LogicalUnit):
3346
  """Rename the cluster.
3347

3348
  """
3349
  HPATH = "cluster-rename"
3350
  HTYPE = constants.HTYPE_CLUSTER
3351

    
3352
  def BuildHooksEnv(self):
3353
    """Build hooks env.
3354

3355
    """
3356
    return {
3357
      "OP_TARGET": self.cfg.GetClusterName(),
3358
      "NEW_NAME": self.op.name,
3359
      }
3360

    
3361
  def BuildHooksNodes(self):
3362
    """Build hooks nodes.
3363

3364
    """
3365
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3366

    
3367
  def CheckPrereq(self):
3368
    """Verify that the passed name is a valid one.
3369

3370
    """
3371
    hostname = netutils.GetHostname(name=self.op.name,
3372
                                    family=self.cfg.GetPrimaryIPFamily())
3373

    
3374
    new_name = hostname.name
3375
    self.ip = new_ip = hostname.ip
3376
    old_name = self.cfg.GetClusterName()
3377
    old_ip = self.cfg.GetMasterIP()
3378
    if new_name == old_name and new_ip == old_ip:
3379
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3380
                                 " cluster has changed",
3381
                                 errors.ECODE_INVAL)
3382
    if new_ip != old_ip:
3383
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3384
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3385
                                   " reachable on the network" %
3386
                                   new_ip, errors.ECODE_NOTUNIQUE)
3387

    
3388
    self.op.name = new_name
3389

    
3390
  def Exec(self, feedback_fn):
3391
    """Rename the cluster.
3392

3393
    """
3394
    clustername = self.op.name
3395
    new_ip = self.ip
3396

    
3397
    # shutdown the master IP
3398
    master_params = self.cfg.GetMasterNetworkParameters()
3399
    ems = self.cfg.GetUseExternalMipScript()
3400
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3401
                                                     master_params, ems)
3402
    result.Raise("Could not disable the master role")
3403

    
3404
    try:
3405
      cluster = self.cfg.GetClusterInfo()
3406
      cluster.cluster_name = clustername
3407
      cluster.master_ip = new_ip
3408
      self.cfg.Update(cluster, feedback_fn)
3409

    
3410
      # update the known hosts file
3411
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3412
      node_list = self.cfg.GetOnlineNodeList()
3413
      try:
3414
        node_list.remove(master_params.name)
3415
      except ValueError:
3416
        pass
3417
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3418
    finally:
3419
      master_params.ip = new_ip
3420
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3421
                                                     master_params, ems)
3422
      msg = result.fail_msg
3423
      if msg:
3424
        self.LogWarning("Could not re-enable the master role on"
3425
                        " the master, please restart manually: %s", msg)
3426

    
3427
    return clustername
3428

    
3429

    
3430
def _ValidateNetmask(cfg, netmask):
3431
  """Checks if a netmask is valid.
3432

3433
  @type cfg: L{config.ConfigWriter}
3434
  @param cfg: The cluster configuration
3435
  @type netmask: int
3436
  @param netmask: the netmask to be verified
3437
  @raise errors.OpPrereqError: if the validation fails
3438

3439
  """
3440
  ip_family = cfg.GetPrimaryIPFamily()
3441
  try:
3442
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3443
  except errors.ProgrammerError:
3444
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3445
                               ip_family)
3446
  if not ipcls.ValidateNetmask(netmask):
3447
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3448
                                (netmask))
3449

    
3450

    
3451
class LUClusterSetParams(LogicalUnit):
3452
  """Change the parameters of the cluster.
3453

3454
  """
3455
  HPATH = "cluster-modify"
3456
  HTYPE = constants.HTYPE_CLUSTER
3457
  REQ_BGL = False
3458

    
3459
  def CheckArguments(self):
3460
    """Check parameters
3461

3462
    """
3463
    if self.op.uid_pool:
3464
      uidpool.CheckUidPool(self.op.uid_pool)
3465

    
3466
    if self.op.add_uids:
3467
      uidpool.CheckUidPool(self.op.add_uids)
3468

    
3469
    if self.op.remove_uids:
3470
      uidpool.CheckUidPool(self.op.remove_uids)
3471

    
3472
    if self.op.master_netmask is not None:
3473
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3474

    
3475
  def ExpandNames(self):
3476
    # FIXME: in the future maybe other cluster params won't require checking on
3477
    # all nodes to be modified.
3478
    self.needed_locks = {
3479
      locking.LEVEL_NODE: locking.ALL_SET,
3480
    }
3481
    self.share_locks[locking.LEVEL_NODE] = 1
3482

    
3483
  def BuildHooksEnv(self):
3484
    """Build hooks env.
3485

3486
    """
3487
    return {
3488
      "OP_TARGET": self.cfg.GetClusterName(),
3489
      "NEW_VG_NAME": self.op.vg_name,
3490
      }
3491

    
3492
  def BuildHooksNodes(self):
3493
    """Build hooks nodes.
3494

3495
    """
3496
    mn = self.cfg.GetMasterNode()
3497
    return ([mn], [mn])
3498

    
3499
  def CheckPrereq(self):
3500
    """Check prerequisites.
3501

3502
    This checks whether the given params don't conflict and
3503
    if the given volume group is valid.
3504

3505
    """
3506
    if self.op.vg_name is not None and not self.op.vg_name:
3507
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3508
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3509
                                   " instances exist", errors.ECODE_INVAL)
3510

    
3511
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3512
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3513
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3514
                                   " drbd-based instances exist",
3515
                                   errors.ECODE_INVAL)
3516

    
3517
    node_list = self.owned_locks(locking.LEVEL_NODE)
3518

    
3519
    # if vg_name not None, checks given volume group on all nodes
3520
    if self.op.vg_name:
3521
      vglist = self.rpc.call_vg_list(node_list)
3522
      for node in node_list:
3523
        msg = vglist[node].fail_msg
3524
        if msg:
3525
          # ignoring down node
3526
          self.LogWarning("Error while gathering data on node %s"
3527
                          " (ignoring node): %s", node, msg)
3528
          continue
3529
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3530
                                              self.op.vg_name,
3531
                                              constants.MIN_VG_SIZE)
3532
        if vgstatus:
3533
          raise errors.OpPrereqError("Error on node '%s': %s" %
3534
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3535

    
3536
    if self.op.drbd_helper:
3537
      # checks given drbd helper on all nodes
3538
      helpers = self.rpc.call_drbd_helper(node_list)
3539
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3540
        if ninfo.offline:
3541
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3542
          continue
3543
        msg = helpers[node].fail_msg
3544
        if msg:
3545
          raise errors.OpPrereqError("Error checking drbd helper on node"
3546
                                     " '%s': %s" % (node, msg),
3547
                                     errors.ECODE_ENVIRON)
3548
        node_helper = helpers[node].payload
3549
        if node_helper != self.op.drbd_helper:
3550
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3551
                                     (node, node_helper), errors.ECODE_ENVIRON)
3552

    
3553
    self.cluster = cluster = self.cfg.GetClusterInfo()
3554
    # validate params changes
3555
    if self.op.beparams:
3556
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3557
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3558

    
3559
    if self.op.ndparams:
3560
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3561
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3562

    
3563
      # TODO: we need a more general way to handle resetting
3564
      # cluster-level parameters to default values
3565
      if self.new_ndparams["oob_program"] == "":
3566
        self.new_ndparams["oob_program"] = \
3567
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3568

    
3569
    if self.op.nicparams:
3570
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3571
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3572
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3573
      nic_errors = []
3574

    
3575
      # check all instances for consistency
3576
      for instance in self.cfg.GetAllInstancesInfo().values():
3577
        for nic_idx, nic in enumerate(instance.nics):
3578
          params_copy = copy.deepcopy(nic.nicparams)
3579
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3580

    
3581
          # check parameter syntax
3582
          try:
3583
            objects.NIC.CheckParameterSyntax(params_filled)
3584
          except errors.ConfigurationError, err:
3585
            nic_errors.append("Instance %s, nic/%d: %s" %
3586
                              (instance.name, nic_idx, err))
3587

    
3588
          # if we're moving instances to routed, check that they have an ip
3589
          target_mode = params_filled[constants.NIC_MODE]
3590
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3591
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3592
                              " address" % (instance.name, nic_idx))
3593
      if nic_errors:
3594
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3595
                                   "\n".join(nic_errors))
3596

    
3597
    # hypervisor list/parameters
3598
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3599
    if self.op.hvparams:
3600
      for hv_name, hv_dict in self.op.hvparams.items():
3601
        if hv_name not in self.new_hvparams:
3602
          self.new_hvparams[hv_name] = hv_dict
3603
        else:
3604
          self.new_hvparams[hv_name].update(hv_dict)
3605

    
3606
    # os hypervisor parameters
3607
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3608
    if self.op.os_hvp:
3609
      for os_name, hvs in self.op.os_hvp.items():
3610
        if os_name not in self.new_os_hvp:
3611
          self.new_os_hvp[os_name] = hvs
3612
        else:
3613
          for hv_name, hv_dict in hvs.items():
3614
            if hv_name not in self.new_os_hvp[os_name]:
3615
              self.new_os_hvp[os_name][hv_name] = hv_dict
3616
            else:
3617
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3618

    
3619
    # os parameters
3620
    self.new_osp = objects.FillDict(cluster.osparams, {})
3621
    if self.op.osparams:
3622
      for os_name, osp in self.op.osparams.items():
3623
        if os_name not in self.new_osp:
3624
          self.new_osp[os_name] = {}
3625

    
3626
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3627
                                                  use_none=True)
3628

    
3629
        if not self.new_osp[os_name]:
3630
          # we removed all parameters
3631
          del self.new_osp[os_name]
3632
        else:
3633
          # check the parameter validity (remote check)
3634
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3635
                         os_name, self.new_osp[os_name])
3636

    
3637
    # changes to the hypervisor list
3638
    if self.op.enabled_hypervisors is not None:
3639
      self.hv_list = self.op.enabled_hypervisors
3640
      for hv in self.hv_list:
3641
        # if the hypervisor doesn't already exist in the cluster
3642
        # hvparams, we initialize it to empty, and then (in both
3643
        # cases) we make sure to fill the defaults, as we might not
3644
        # have a complete defaults list if the hypervisor wasn't
3645
        # enabled before
3646
        if hv not in new_hvp:
3647
          new_hvp[hv] = {}
3648
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3649
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3650
    else:
3651
      self.hv_list = cluster.enabled_hypervisors
3652

    
3653
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3654
      # either the enabled list has changed, or the parameters have, validate
3655
      for hv_name, hv_params in self.new_hvparams.items():
3656
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3657
            (self.op.enabled_hypervisors and
3658
             hv_name in self.op.enabled_hypervisors)):
3659
          # either this is a new hypervisor, or its parameters have changed
3660
          hv_class = hypervisor.GetHypervisor(hv_name)
3661
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3662
          hv_class.CheckParameterSyntax(hv_params)
3663
          _CheckHVParams(self, node_list, hv_name, hv_params)
3664

    
3665
    if self.op.os_hvp:
3666
      # no need to check any newly-enabled hypervisors, since the
3667
      # defaults have already been checked in the above code-block
3668
      for os_name, os_hvp in self.new_os_hvp.items():
3669
        for hv_name, hv_params in os_hvp.items():
3670
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3671
          # we need to fill in the new os_hvp on top of the actual hv_p
3672
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3673
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3674
          hv_class = hypervisor.GetHypervisor(hv_name)
3675
          hv_class.CheckParameterSyntax(new_osp)
3676
          _CheckHVParams(self, node_list, hv_name, new_osp)
3677

    
3678
    if self.op.default_iallocator:
3679
      alloc_script = utils.FindFile(self.op.default_iallocator,
3680
                                    constants.IALLOCATOR_SEARCH_PATH,
3681
                                    os.path.isfile)
3682
      if alloc_script is None:
3683
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3684
                                   " specified" % self.op.default_iallocator,
3685
                                   errors.ECODE_INVAL)
3686

    
3687
  def Exec(self, feedback_fn):
3688
    """Change the parameters of the cluster.
3689

3690
    """
3691
    if self.op.vg_name is not None:
3692
      new_volume = self.op.vg_name
3693
      if not new_volume:
3694
        new_volume = None
3695
      if new_volume != self.cfg.GetVGName():
3696
        self.cfg.SetVGName(new_volume)
3697
      else:
3698
        feedback_fn("Cluster LVM configuration already in desired"
3699
                    " state, not changing")
3700
    if self.op.drbd_helper is not None:
3701
      new_helper = self.op.drbd_helper
3702
      if not new_helper:
3703
        new_helper = None
3704
      if new_helper != self.cfg.GetDRBDHelper():
3705
        self.cfg.SetDRBDHelper(new_helper)
3706
      else:
3707
        feedback_fn("Cluster DRBD helper already in desired state,"
3708
                    " not changing")
3709
    if self.op.hvparams:
3710
      self.cluster.hvparams = self.new_hvparams
3711
    if self.op.os_hvp:
3712
      self.cluster.os_hvp = self.new_os_hvp
3713
    if self.op.enabled_hypervisors is not None:
3714
      self.cluster.hvparams = self.new_hvparams
3715
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3716
    if self.op.beparams:
3717
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3718
    if self.op.nicparams:
3719
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3720
    if self.op.osparams:
3721
      self.cluster.osparams = self.new_osp
3722
    if self.op.ndparams:
3723
      self.cluster.ndparams = self.new_ndparams
3724

    
3725
    if self.op.candidate_pool_size is not None:
3726
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3727
      # we need to update the pool size here, otherwise the save will fail
3728
      _AdjustCandidatePool(self, [])
3729

    
3730
    if self.op.maintain_node_health is not None:
3731
      self.cluster.maintain_node_health = self.op.maintain_node_health
3732

    
3733
    if self.op.prealloc_wipe_disks is not None:
3734
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3735

    
3736
    if self.op.add_uids is not None:
3737
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3738

    
3739
    if self.op.remove_uids is not None:
3740
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3741

    
3742
    if self.op.uid_pool is not None:
3743
      self.cluster.uid_pool = self.op.uid_pool
3744

    
3745
    if self.op.default_iallocator is not None:
3746
      self.cluster.default_iallocator = self.op.default_iallocator
3747

    
3748
    if self.op.reserved_lvs is not None:
3749
      self.cluster.reserved_lvs = self.op.reserved_lvs
3750

    
3751
    if self.op.use_external_mip_script is not None:
3752
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3753

    
3754
    def helper_os(aname, mods, desc):
3755
      desc += " OS list"
3756
      lst = getattr(self.cluster, aname)
3757
      for key, val in mods:
3758
        if key == constants.DDM_ADD:
3759
          if val in lst:
3760
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3761
          else:
3762
            lst.append(val)
3763
        elif key == constants.DDM_REMOVE:
3764
          if val in lst:
3765
            lst.remove(val)
3766
          else:
3767
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3768
        else:
3769
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3770

    
3771
    if self.op.hidden_os:
3772
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3773

    
3774
    if self.op.blacklisted_os:
3775
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3776

    
3777
    if self.op.master_netdev:
3778
      master_params = self.cfg.GetMasterNetworkParameters()
3779
      ems = self.cfg.GetUseExternalMipScript()
3780
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3781
                  self.cluster.master_netdev)
3782
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3783
                                                       master_params, ems)
3784
      result.Raise("Could not disable the master ip")
3785
      feedback_fn("Changing master_netdev from %s to %s" %
3786
                  (master_params.netdev, self.op.master_netdev))
3787
      self.cluster.master_netdev = self.op.master_netdev
3788

    
3789
    if self.op.master_netmask:
3790
      master_params = self.cfg.GetMasterNetworkParameters()
3791
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3792
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3793
                                                        master_params.netmask,
3794
                                                        self.op.master_netmask,
3795
                                                        master_params.ip,
3796
                                                        master_params.netdev)
3797
      if result.fail_msg:
3798
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3799
        feedback_fn(msg)
3800

    
3801
      self.cluster.master_netmask = self.op.master_netmask
3802

    
3803
    self.cfg.Update(self.cluster, feedback_fn)
3804

    
3805
    if self.op.master_netdev:
3806
      master_params = self.cfg.GetMasterNetworkParameters()
3807
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3808
                  self.op.master_netdev)
3809
      ems = self.cfg.GetUseExternalMipScript()
3810
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3811
                                                     master_params, ems)
3812
      if result.fail_msg:
3813
        self.LogWarning("Could not re-enable the master ip on"
3814
                        " the master, please restart manually: %s",
3815
                        result.fail_msg)
3816

    
3817

    
3818
def _UploadHelper(lu, nodes, fname):
3819
  """Helper for uploading a file and showing warnings.
3820

3821
  """
3822
  if os.path.exists(fname):
3823
    result = lu.rpc.call_upload_file(nodes, fname)
3824
    for to_node, to_result in result.items():
3825
      msg = to_result.fail_msg
3826
      if msg:
3827
        msg = ("Copy of file %s to node %s failed: %s" %
3828
               (fname, to_node, msg))
3829
        lu.proc.LogWarning(msg)
3830

    
3831

    
3832
def _ComputeAncillaryFiles(cluster, redist):
3833
  """Compute files external to Ganeti which need to be consistent.
3834

3835
  @type redist: boolean
3836
  @param redist: Whether to include files which need to be redistributed
3837

3838
  """
3839
  # Compute files for all nodes
3840
  files_all = set([
3841
    constants.SSH_KNOWN_HOSTS_FILE,
3842
    constants.CONFD_HMAC_KEY,
3843
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3844
    constants.SPICE_CERT_FILE,
3845
    constants.SPICE_CACERT_FILE,
3846
    constants.RAPI_USERS_FILE,
3847
    ])
3848

    
3849
  if not redist:
3850
    files_all.update(constants.ALL_CERT_FILES)
3851
    files_all.update(ssconf.SimpleStore().GetFileList())
3852
  else:
3853
    # we need to ship at least the RAPI certificate
3854
    files_all.add(constants.RAPI_CERT_FILE)
3855

    
3856
  if cluster.modify_etc_hosts:
3857
    files_all.add(constants.ETC_HOSTS)
3858

    
3859
  # Files which are optional, these must:
3860
  # - be present in one other category as well
3861
  # - either exist or not exist on all nodes of that category (mc, vm all)
3862
  files_opt = set([
3863
    constants.RAPI_USERS_FILE,
3864
    ])
3865

    
3866
  # Files which should only be on master candidates
3867
  files_mc = set()
3868

    
3869
  if not redist:
3870
    files_mc.add(constants.CLUSTER_CONF_FILE)
3871

    
3872
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3873
    # replication
3874
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3875

    
3876
  # Files which should only be on VM-capable nodes
3877
  files_vm = set(filename
3878
    for hv_name in cluster.enabled_hypervisors
3879
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3880

    
3881
  files_opt |= set(filename
3882
    for hv_name in cluster.enabled_hypervisors
3883
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3884

    
3885
  # Filenames in each category must be unique
3886
  all_files_set = files_all | files_mc | files_vm
3887
  assert (len(all_files_set) ==
3888
          sum(map(len, [files_all, files_mc, files_vm]))), \
3889
         "Found file listed in more than one file list"
3890

    
3891
  # Optional files must be present in one other category
3892
  assert all_files_set.issuperset(files_opt), \
3893
         "Optional file not in a different required list"
3894

    
3895
  return (files_all, files_opt, files_mc, files_vm)
3896

    
3897

    
3898
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3899
  """Distribute additional files which are part of the cluster configuration.
3900

3901
  ConfigWriter takes care of distributing the config and ssconf files, but
3902
  there are more files which should be distributed to all nodes. This function
3903
  makes sure those are copied.
3904

3905
  @param lu: calling logical unit
3906
  @param additional_nodes: list of nodes not in the config to distribute to
3907
  @type additional_vm: boolean
3908
  @param additional_vm: whether the additional nodes are vm-capable or not
3909

3910
  """
3911
  # Gather target nodes
3912
  cluster = lu.cfg.GetClusterInfo()
3913
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3914

    
3915
  online_nodes = lu.cfg.GetOnlineNodeList()
3916
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3917

    
3918
  if additional_nodes is not None:
3919
    online_nodes.extend(additional_nodes)
3920
    if additional_vm:
3921
      vm_nodes.extend(additional_nodes)
3922

    
3923
  # Never distribute to master node
3924
  for nodelist in [online_nodes, vm_nodes]:
3925
    if master_info.name in nodelist:
3926
      nodelist.remove(master_info.name)
3927

    
3928
  # Gather file lists
3929
  (files_all, _, files_mc, files_vm) = \
3930
    _ComputeAncillaryFiles(cluster, True)
3931

    
3932
  # Never re-distribute configuration file from here
3933
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3934
              constants.CLUSTER_CONF_FILE in files_vm)
3935
  assert not files_mc, "Master candidates not handled in this function"
3936

    
3937
  filemap = [
3938
    (online_nodes, files_all),
3939
    (vm_nodes, files_vm),
3940
    ]
3941

    
3942
  # Upload the files
3943
  for (node_list, files) in filemap:
3944
    for fname in files:
3945
      _UploadHelper(lu, node_list, fname)
3946

    
3947

    
3948
class LUClusterRedistConf(NoHooksLU):
3949
  """Force the redistribution of cluster configuration.
3950

3951
  This is a very simple LU.
3952

3953
  """
3954
  REQ_BGL = False
3955

    
3956
  def ExpandNames(self):
3957
    self.needed_locks = {
3958
      locking.LEVEL_NODE: locking.ALL_SET,
3959
    }
3960
    self.share_locks[locking.LEVEL_NODE] = 1
3961

    
3962
  def Exec(self, feedback_fn):
3963
    """Redistribute the configuration.
3964

3965
    """
3966
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3967
    _RedistributeAncillaryFiles(self)
3968

    
3969

    
3970
class LUClusterActivateMasterIp(NoHooksLU):
3971
  """Activate the master IP on the master node.
3972

3973
  """
3974
  def Exec(self, feedback_fn):
3975
    """Activate the master IP.
3976

3977
    """
3978
    master_params = self.cfg.GetMasterNetworkParameters()
3979
    ems = self.cfg.GetUseExternalMipScript()
3980
    self.rpc.call_node_activate_master_ip(master_params.name,
3981
                                          master_params, ems)
3982

    
3983

    
3984
class LUClusterDeactivateMasterIp(NoHooksLU):
3985
  """Deactivate the master IP on the master node.
3986

3987
  """
3988
  def Exec(self, feedback_fn):
3989
    """Deactivate the master IP.
3990

3991
    """
3992
    master_params = self.cfg.GetMasterNetworkParameters()
3993
    ems = self.cfg.GetUseExternalMipScript()
3994
    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
3995
                                            ems)
3996

    
3997

    
3998
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3999
  """Sleep and poll for an instance's disk to sync.
4000

4001
  """
4002
  if not instance.disks or disks is not None and not disks:
4003
    return True
4004

    
4005
  disks = _ExpandCheckDisks(instance, disks)
4006

    
4007
  if not oneshot:
4008
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4009

    
4010
  node = instance.primary_node
4011

    
4012
  for dev in disks:
4013
    lu.cfg.SetDiskID(dev, node)
4014

    
4015
  # TODO: Convert to utils.Retry
4016

    
4017
  retries = 0
4018
  degr_retries = 10 # in seconds, as we sleep 1 second each time
4019
  while True:
4020
    max_time = 0
4021
    done = True
4022
    cumul_degraded = False
4023
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4024
    msg = rstats.fail_msg
4025
    if msg:
4026
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4027
      retries += 1
4028
      if retries >= 10:
4029
        raise errors.RemoteError("Can't contact node %s for mirror data,"
4030
                                 " aborting." % node)
4031
      time.sleep(6)
4032
      continue
4033
    rstats = rstats.payload
4034
    retries = 0
4035
    for i, mstat in enumerate(rstats):
4036
      if mstat is None:
4037
        lu.LogWarning("Can't compute data for node %s/%s",
4038
                           node, disks[i].iv_name)
4039
        continue
4040

    
4041
      cumul_degraded = (cumul_degraded or
4042
                        (mstat.is_degraded and mstat.sync_percent is None))
4043
      if mstat.sync_percent is not None:
4044
        done = False
4045
        if mstat.estimated_time is not None:
4046
          rem_time = ("%s remaining (estimated)" %
4047
                      utils.FormatSeconds(mstat.estimated_time))
4048
          max_time = mstat.estimated_time
4049
        else:
4050
          rem_time = "no time estimate"
4051
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4052
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4053

    
4054
    # if we're done but degraded, let's do a few small retries, to
4055
    # make sure we see a stable and not transient situation; therefore
4056
    # we force restart of the loop
4057
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4058
      logging.info("Degraded disks found, %d retries left", degr_retries)
4059
      degr_retries -= 1
4060
      time.sleep(1)
4061
      continue
4062

    
4063
    if done or oneshot:
4064
      break
4065

    
4066
    time.sleep(min(60, max_time))
4067

    
4068
  if done:
4069
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4070
  return not cumul_degraded
4071

    
4072

    
4073
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4074
  """Check that mirrors are not degraded.
4075

4076
  The ldisk parameter, if True, will change the test from the
4077
  is_degraded attribute (which represents overall non-ok status for
4078
  the device(s)) to the ldisk (representing the local storage status).
4079

4080
  """
4081
  lu.cfg.SetDiskID(dev, node)
4082

    
4083
  result = True
4084

    
4085
  if on_primary or dev.AssembleOnSecondary():
4086
    rstats = lu.rpc.call_blockdev_find(node, dev)
4087
    msg = rstats.fail_msg
4088
    if msg:
4089
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4090
      result = False
4091
    elif not rstats.payload:
4092
      lu.LogWarning("Can't find disk on node %s", node)
4093
      result = False
4094
    else:
4095
      if ldisk:
4096
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4097
      else:
4098
        result = result and not rstats.payload.is_degraded
4099

    
4100
  if dev.children:
4101
    for child in dev.children:
4102
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4103

    
4104
  return result
4105

    
4106

    
4107
class LUOobCommand(NoHooksLU):
4108
  """Logical unit for OOB handling.
4109

4110
  """
4111
  REG_BGL = False
4112
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4113

    
4114
  def ExpandNames(self):
4115
    """Gather locks we need.
4116

4117
    """
4118
    if self.op.node_names:
4119
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4120
      lock_names = self.op.node_names
4121
    else:
4122
      lock_names = locking.ALL_SET
4123

    
4124
    self.needed_locks = {
4125
      locking.LEVEL_NODE: lock_names,
4126
      }
4127

    
4128
  def CheckPrereq(self):
4129
    """Check prerequisites.
4130

4131
    This checks:
4132
     - the node exists in the configuration
4133
     - OOB is supported
4134

4135
    Any errors are signaled by raising errors.OpPrereqError.
4136

4137
    """
4138
    self.nodes = []
4139
    self.master_node = self.cfg.GetMasterNode()
4140

    
4141
    assert self.op.power_delay >= 0.0
4142

    
4143
    if self.op.node_names:
4144
      if (self.op.command in self._SKIP_MASTER and
4145
          self.master_node in self.op.node_names):
4146
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4147
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4148

    
4149
        if master_oob_handler:
4150
          additional_text = ("run '%s %s %s' if you want to operate on the"
4151
                             " master regardless") % (master_oob_handler,
4152
                                                      self.op.command,
4153
                                                      self.master_node)
4154
        else:
4155
          additional_text = "it does not support out-of-band operations"
4156

    
4157
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4158
                                    " allowed for %s; %s") %
4159
                                   (self.master_node, self.op.command,
4160
                                    additional_text), errors.ECODE_INVAL)
4161
    else:
4162
      self.op.node_names = self.cfg.GetNodeList()
4163
      if self.op.command in self._SKIP_MASTER:
4164
        self.op.node_names.remove(self.master_node)
4165

    
4166
    if self.op.command in self._SKIP_MASTER:
4167
      assert self.master_node not in self.op.node_names
4168

    
4169
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4170
      if node is None:
4171
        raise errors.OpPrereqError("Node %s not found" % node_name,
4172
                                   errors.ECODE_NOENT)
4173
      else:
4174
        self.nodes.append(node)
4175

    
4176
      if (not self.op.ignore_status and
4177
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4178
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4179
                                    " not marked offline") % node_name,
4180
                                   errors.ECODE_STATE)
4181

    
4182
  def Exec(self, feedback_fn):
4183
    """Execute OOB and return result if we expect any.
4184

4185
    """
4186
    master_node = self.master_node
4187
    ret = []
4188

    
4189
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4190
                                              key=lambda node: node.name)):
4191
      node_entry = [(constants.RS_NORMAL, node.name)]
4192
      ret.append(node_entry)
4193

    
4194
      oob_program = _SupportsOob(self.cfg, node)
4195

    
4196
      if not oob_program:
4197
        node_entry.append((constants.RS_UNAVAIL, None))
4198
        continue
4199

    
4200
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4201
                   self.op.command, oob_program, node.name)
4202
      result = self.rpc.call_run_oob(master_node, oob_program,
4203
                                     self.op.command, node.name,
4204
                                     self.op.timeout)
4205

    
4206
      if result.fail_msg:
4207
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4208
                        node.name, result.fail_msg)
4209
        node_entry.append((constants.RS_NODATA, None))
4210
      else:
4211
        try:
4212
          self._CheckPayload(result)
4213
        except errors.OpExecError, err:
4214
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4215
                          node.name, err)
4216
          node_entry.append((constants.RS_NODATA, None))
4217
        else:
4218
          if self.op.command == constants.OOB_HEALTH:
4219
            # For health we should log important events
4220
            for item, status in result.payload:
4221
              if status in [constants.OOB_STATUS_WARNING,
4222
                            constants.OOB_STATUS_CRITICAL]:
4223
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4224
                                item, node.name, status)
4225

    
4226
          if self.op.command == constants.OOB_POWER_ON:
4227
            node.powered = True
4228
          elif self.op.command == constants.OOB_POWER_OFF:
4229
            node.powered = False
4230
          elif self.op.command == constants.OOB_POWER_STATUS:
4231
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4232
            if powered != node.powered:
4233
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4234
                               " match actual power state (%s)"), node.powered,
4235
                              node.name, powered)
4236

    
4237
          # For configuration changing commands we should update the node
4238
          if self.op.command in (constants.OOB_POWER_ON,
4239
                                 constants.OOB_POWER_OFF):
4240
            self.cfg.Update(node, feedback_fn)
4241

    
4242
          node_entry.append((constants.RS_NORMAL, result.payload))
4243

    
4244
          if (self.op.command == constants.OOB_POWER_ON and
4245
              idx < len(self.nodes) - 1):
4246
            time.sleep(self.op.power_delay)
4247

    
4248
    return ret
4249

    
4250
  def _CheckPayload(self, result):
4251
    """Checks if the payload is valid.
4252

4253
    @param result: RPC result
4254
    @raises errors.OpExecError: If payload is not valid
4255

4256
    """
4257
    errs = []
4258
    if self.op.command == constants.OOB_HEALTH:
4259
      if not isinstance(result.payload, list):
4260
        errs.append("command 'health' is expected to return a list but got %s" %
4261
                    type(result.payload))
4262
      else:
4263
        for item, status in result.payload:
4264
          if status not in constants.OOB_STATUSES:
4265
            errs.append("health item '%s' has invalid status '%s'" %
4266
                        (item, status))
4267

    
4268
    if self.op.command == constants.OOB_POWER_STATUS:
4269
      if not isinstance(result.payload, dict):
4270
        errs.append("power-status is expected to return a dict but got %s" %
4271
                    type(result.payload))
4272

    
4273
    if self.op.command in [
4274
        constants.OOB_POWER_ON,
4275
        constants.OOB_POWER_OFF,
4276
        constants.OOB_POWER_CYCLE,
4277
        ]:
4278
      if result.payload is not None:
4279
        errs.append("%s is expected to not return payload but got '%s'" %
4280
                    (self.op.command, result.payload))
4281

    
4282
    if errs:
4283
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4284
                               utils.CommaJoin(errs))
4285

    
4286

    
4287
class _OsQuery(_QueryBase):
4288
  FIELDS = query.OS_FIELDS
4289

    
4290
  def ExpandNames(self, lu):
4291
    # Lock all nodes in shared mode
4292
    # Temporary removal of locks, should be reverted later
4293
    # TODO: reintroduce locks when they are lighter-weight
4294
    lu.needed_locks = {}
4295
    #self.share_locks[locking.LEVEL_NODE] = 1
4296
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4297

    
4298
    # The following variables interact with _QueryBase._GetNames
4299
    if self.names:
4300
      self.wanted = self.names
4301
    else:
4302
      self.wanted = locking.ALL_SET
4303

    
4304
    self.do_locking = self.use_locking
4305

    
4306
  def DeclareLocks(self, lu, level):
4307
    pass
4308

    
4309
  @staticmethod
4310
  def _DiagnoseByOS(rlist):
4311
    """Remaps a per-node return list into an a per-os per-node dictionary
4312

4313
    @param rlist: a map with node names as keys and OS objects as values
4314

4315
    @rtype: dict
4316
    @return: a dictionary with osnames as keys and as value another
4317
        map, with nodes as keys and tuples of (path, status, diagnose,
4318
        variants, parameters, api_versions) as values, eg::
4319

4320
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4321
                                     (/srv/..., False, "invalid api")],
4322
                           "node2": [(/srv/..., True, "", [], [])]}
4323
          }
4324

4325
    """
4326
    all_os = {}
4327
    # we build here the list of nodes that didn't fail the RPC (at RPC
4328
    # level), so that nodes with a non-responding node daemon don't
4329
    # make all OSes invalid
4330
    good_nodes = [node_name for node_name in rlist
4331
                  if not rlist[node_name].fail_msg]
4332
    for node_name, nr in rlist.items():
4333
      if nr.fail_msg or not nr.payload:
4334
        continue
4335
      for (name, path, status, diagnose, variants,
4336
           params, api_versions) in nr.payload:
4337
        if name not in all_os:
4338
          # build a list of nodes for this os containing empty lists
4339
          # for each node in node_list
4340
          all_os[name] = {}
4341
          for nname in good_nodes:
4342
            all_os[name][nname] = []
4343
        # convert params from [name, help] to (name, help)
4344
        params = [tuple(v) for v in params]
4345
        all_os[name][node_name].append((path, status, diagnose,
4346
                                        variants, params, api_versions))
4347
    return all_os
4348

    
4349
  def _GetQueryData(self, lu):
4350
    """Computes the list of nodes and their attributes.
4351

4352
    """
4353
    # Locking is not used
4354
    assert not (compat.any(lu.glm.is_owned(level)
4355
                           for level in locking.LEVELS
4356
                           if level != locking.LEVEL_CLUSTER) or
4357
                self.do_locking or self.use_locking)
4358

    
4359
    valid_nodes = [node.name
4360
                   for node in lu.cfg.GetAllNodesInfo().values()
4361
                   if not node.offline and node.vm_capable]
4362
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4363
    cluster = lu.cfg.GetClusterInfo()
4364

    
4365
    data = {}
4366

    
4367
    for (os_name, os_data) in pol.items():
4368
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4369
                          hidden=(os_name in cluster.hidden_os),
4370
                          blacklisted=(os_name in cluster.blacklisted_os))
4371

    
4372
      variants = set()
4373
      parameters = set()
4374
      api_versions = set()
4375

    
4376
      for idx, osl in enumerate(os_data.values()):
4377
        info.valid = bool(info.valid and osl and osl[0][1])
4378
        if not info.valid:
4379
          break
4380

    
4381
        (node_variants, node_params, node_api) = osl[0][3:6]
4382
        if idx == 0:
4383
          # First entry
4384
          variants.update(node_variants)
4385
          parameters.update(node_params)
4386
          api_versions.update(node_api)
4387
        else:
4388
          # Filter out inconsistent values
4389
          variants.intersection_update(node_variants)
4390
          parameters.intersection_update(node_params)
4391
          api_versions.intersection_update(node_api)
4392

    
4393
      info.variants = list(variants)
4394
      info.parameters = list(parameters)
4395
      info.api_versions = list(api_versions)
4396

    
4397
      data[os_name] = info
4398

    
4399
    # Prepare data in requested order
4400
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4401
            if name in data]
4402

    
4403

    
4404
class LUOsDiagnose(NoHooksLU):
4405
  """Logical unit for OS diagnose/query.
4406

4407
  """
4408
  REQ_BGL = False
4409

    
4410
  @staticmethod
4411
  def _BuildFilter(fields, names):
4412
    """Builds a filter for querying OSes.
4413

4414
    """
4415
    name_filter = qlang.MakeSimpleFilter("name", names)
4416

    
4417
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4418
    # respective field is not requested
4419
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4420
                     for fname in ["hidden", "blacklisted"]
4421
                     if fname not in fields]
4422
    if "valid" not in fields:
4423
      status_filter.append([qlang.OP_TRUE, "valid"])
4424

    
4425
    if status_filter:
4426
      status_filter.insert(0, qlang.OP_AND)
4427
    else:
4428
      status_filter = None
4429

    
4430
    if name_filter and status_filter:
4431
      return [qlang.OP_AND, name_filter, status_filter]
4432
    elif name_filter:
4433
      return name_filter
4434
    else:
4435
      return status_filter
4436

    
4437
  def CheckArguments(self):
4438
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4439
                       self.op.output_fields, False)
4440

    
4441
  def ExpandNames(self):
4442
    self.oq.ExpandNames(self)
4443

    
4444
  def Exec(self, feedback_fn):
4445
    return self.oq.OldStyleQuery(self)
4446

    
4447

    
4448
class LUNodeRemove(LogicalUnit):
4449
  """Logical unit for removing a node.
4450

4451
  """
4452
  HPATH = "node-remove"
4453
  HTYPE = constants.HTYPE_NODE
4454

    
4455
  def BuildHooksEnv(self):
4456
    """Build hooks env.
4457

4458
    This doesn't run on the target node in the pre phase as a failed
4459
    node would then be impossible to remove.
4460

4461
    """
4462
    return {
4463
      "OP_TARGET": self.op.node_name,
4464
      "NODE_NAME": self.op.node_name,
4465
      }
4466

    
4467
  def BuildHooksNodes(self):
4468
    """Build hooks nodes.
4469

4470
    """
4471
    all_nodes = self.cfg.GetNodeList()
4472
    try:
4473
      all_nodes.remove(self.op.node_name)
4474
    except ValueError:
4475
      logging.warning("Node '%s', which is about to be removed, was not found"
4476
                      " in the list of all nodes", self.op.node_name)
4477
    return (all_nodes, all_nodes)
4478

    
4479
  def CheckPrereq(self):
4480
    """Check prerequisites.
4481

4482
    This checks:
4483
     - the node exists in the configuration
4484
     - it does not have primary or secondary instances
4485
     - it's not the master
4486

4487
    Any errors are signaled by raising errors.OpPrereqError.
4488

4489
    """
4490
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4491
    node = self.cfg.GetNodeInfo(self.op.node_name)
4492
    assert node is not None
4493

    
4494
    masternode = self.cfg.GetMasterNode()
4495
    if node.name == masternode:
4496
      raise errors.OpPrereqError("Node is the master node, failover to another"
4497
                                 " node is required", errors.ECODE_INVAL)
4498

    
4499
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4500
      if node.name in instance.all_nodes:
4501
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4502
                                   " please remove first" % instance_name,
4503
                                   errors.ECODE_INVAL)
4504
    self.op.node_name = node.name
4505
    self.node = node
4506

    
4507
  def Exec(self, feedback_fn):
4508
    """Removes the node from the cluster.
4509

4510
    """
4511
    node = self.node
4512
    logging.info("Stopping the node daemon and removing configs from node %s",
4513
                 node.name)
4514

    
4515
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4516

    
4517
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4518
      "Not owning BGL"
4519

    
4520
    # Promote nodes to master candidate as needed
4521
    _AdjustCandidatePool(self, exceptions=[node.name])
4522
    self.context.RemoveNode(node.name)
4523

    
4524
    # Run post hooks on the node before it's removed
4525
    _RunPostHook(self, node.name)
4526

    
4527
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4528
    msg = result.fail_msg
4529
    if msg:
4530
      self.LogWarning("Errors encountered on the remote node while leaving"
4531
                      " the cluster: %s", msg)
4532

    
4533
    # Remove node from our /etc/hosts
4534
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4535
      master_node = self.cfg.GetMasterNode()
4536
      result = self.rpc.call_etc_hosts_modify(master_node,
4537
                                              constants.ETC_HOSTS_REMOVE,
4538
                                              node.name, None)
4539
      result.Raise("Can't update hosts file with new host data")
4540
      _RedistributeAncillaryFiles(self)
4541

    
4542

    
4543
class _NodeQuery(_QueryBase):
4544
  FIELDS = query.NODE_FIELDS
4545

    
4546
  def ExpandNames(self, lu):
4547
    lu.needed_locks = {}
4548
    lu.share_locks = _ShareAll()
4549

    
4550
    if self.names:
4551
      self.wanted = _GetWantedNodes(lu, self.names)
4552
    else:
4553
      self.wanted = locking.ALL_SET
4554

    
4555
    self.do_locking = (self.use_locking and
4556
                       query.NQ_LIVE in self.requested_data)
4557

    
4558
    if self.do_locking:
4559
      # If any non-static field is requested we need to lock the nodes
4560
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4561

    
4562
  def DeclareLocks(self, lu, level):
4563
    pass
4564

    
4565
  def _GetQueryData(self, lu):
4566
    """Computes the list of nodes and their attributes.
4567

4568
    """
4569
    all_info = lu.cfg.GetAllNodesInfo()
4570

    
4571
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4572

    
4573
    # Gather data as requested
4574
    if query.NQ_LIVE in self.requested_data:
4575
      # filter out non-vm_capable nodes
4576
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4577

    
4578
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4579
                                        lu.cfg.GetHypervisorType())
4580
      live_data = dict((name, nresult.payload)
4581
                       for (name, nresult) in node_data.items()
4582
                       if not nresult.fail_msg and nresult.payload)
4583
    else:
4584
      live_data = None
4585

    
4586
    if query.NQ_INST in self.requested_data:
4587
      node_to_primary = dict([(name, set()) for name in nodenames])
4588
      node_to_secondary = dict([(name, set()) for name in nodenames])
4589

    
4590
      inst_data = lu.cfg.GetAllInstancesInfo()
4591

    
4592
      for inst in inst_data.values():
4593
        if inst.primary_node in node_to_primary:
4594
          node_to_primary[inst.primary_node].add(inst.name)
4595
        for secnode in inst.secondary_nodes:
4596
          if secnode in node_to_secondary:
4597
            node_to_secondary[secnode].add(inst.name)
4598
    else:
4599
      node_to_primary = None
4600
      node_to_secondary = None
4601

    
4602
    if query.NQ_OOB in self.requested_data:
4603
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4604
                         for name, node in all_info.iteritems())
4605
    else:
4606
      oob_support = None
4607

    
4608
    if query.NQ_GROUP in self.requested_data:
4609
      groups = lu.cfg.GetAllNodeGroupsInfo()
4610
    else:
4611
      groups = {}
4612

    
4613
    return query.NodeQueryData([all_info[name] for name in nodenames],
4614
                               live_data, lu.cfg.GetMasterNode(),
4615
                               node_to_primary, node_to_secondary, groups,
4616
                               oob_support, lu.cfg.GetClusterInfo())
4617

    
4618

    
4619
class LUNodeQuery(NoHooksLU):
4620
  """Logical unit for querying nodes.
4621

4622
  """
4623
  # pylint: disable=W0142
4624
  REQ_BGL = False
4625

    
4626
  def CheckArguments(self):
4627
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4628
                         self.op.output_fields, self.op.use_locking)
4629

    
4630
  def ExpandNames(self):
4631
    self.nq.ExpandNames(self)
4632

    
4633
  def DeclareLocks(self, level):
4634
    self.nq.DeclareLocks(self, level)
4635

    
4636
  def Exec(self, feedback_fn):
4637
    return self.nq.OldStyleQuery(self)
4638

    
4639

    
4640
class LUNodeQueryvols(NoHooksLU):
4641
  """Logical unit for getting volumes on node(s).
4642

4643
  """
4644
  REQ_BGL = False
4645
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4646
  _FIELDS_STATIC = utils.FieldSet("node")
4647

    
4648
  def CheckArguments(self):
4649
    _CheckOutputFields(static=self._FIELDS_STATIC,
4650
                       dynamic=self._FIELDS_DYNAMIC,
4651
                       selected=self.op.output_fields)
4652

    
4653
  def ExpandNames(self):
4654
    self.share_locks = _ShareAll()
4655
    self.needed_locks = {}
4656

    
4657
    if not self.op.nodes:
4658
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4659
    else:
4660
      self.needed_locks[locking.LEVEL_NODE] = \
4661
        _GetWantedNodes(self, self.op.nodes)
4662

    
4663
  def Exec(self, feedback_fn):
4664
    """Computes the list of nodes and their attributes.
4665

4666
    """
4667
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4668
    volumes = self.rpc.call_node_volumes(nodenames)
4669

    
4670
    ilist = self.cfg.GetAllInstancesInfo()
4671
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4672

    
4673
    output = []
4674
    for node in nodenames:
4675
      nresult = volumes[node]
4676
      if nresult.offline:
4677
        continue
4678
      msg = nresult.fail_msg
4679
      if msg:
4680
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4681
        continue
4682

    
4683
      node_vols = sorted(nresult.payload,
4684
                         key=operator.itemgetter("dev"))
4685

    
4686
      for vol in node_vols:
4687
        node_output = []
4688
        for field in self.op.output_fields:
4689
          if field == "node":
4690
            val = node
4691
          elif field == "phys":
4692
            val = vol["dev"]
4693
          elif field == "vg":
4694
            val = vol["vg"]
4695
          elif field == "name":
4696
            val = vol["name"]
4697
          elif field == "size":
4698
            val = int(float(vol["size"]))
4699
          elif field == "instance":
4700
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4701
          else:
4702
            raise errors.ParameterError(field)
4703
          node_output.append(str(val))
4704

    
4705
        output.append(node_output)
4706

    
4707
    return output
4708

    
4709

    
4710
class LUNodeQueryStorage(NoHooksLU):
4711
  """Logical unit for getting information on storage units on node(s).
4712

4713
  """
4714
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4715
  REQ_BGL = False
4716

    
4717
  def CheckArguments(self):
4718
    _CheckOutputFields(static=self._FIELDS_STATIC,
4719
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4720
                       selected=self.op.output_fields)
4721

    
4722
  def ExpandNames(self):
4723
    self.share_locks = _ShareAll()
4724
    self.needed_locks = {}
4725

    
4726
    if self.op.nodes:
4727
      self.needed_locks[locking.LEVEL_NODE] = \
4728
        _GetWantedNodes(self, self.op.nodes)
4729
    else:
4730
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4731

    
4732
  def Exec(self, feedback_fn):
4733
    """Computes the list of nodes and their attributes.
4734

4735
    """
4736
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4737

    
4738
    # Always get name to sort by
4739
    if constants.SF_NAME in self.op.output_fields:
4740
      fields = self.op.output_fields[:]
4741
    else:
4742
      fields = [constants.SF_NAME] + self.op.output_fields
4743

    
4744
    # Never ask for node or type as it's only known to the LU
4745
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4746
      while extra in fields:
4747
        fields.remove(extra)
4748

    
4749
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4750
    name_idx = field_idx[constants.SF_NAME]
4751

    
4752
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4753
    data = self.rpc.call_storage_list(self.nodes,
4754
                                      self.op.storage_type, st_args,
4755
                                      self.op.name, fields)
4756

    
4757
    result = []
4758

    
4759
    for node in utils.NiceSort(self.nodes):
4760
      nresult = data[node]
4761
      if nresult.offline:
4762
        continue
4763

    
4764
      msg = nresult.fail_msg
4765
      if msg:
4766
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4767
        continue
4768

    
4769
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4770

    
4771
      for name in utils.NiceSort(rows.keys()):
4772
        row = rows[name]
4773

    
4774
        out = []
4775

    
4776
        for field in self.op.output_fields:
4777
          if field == constants.SF_NODE:
4778
            val = node
4779
          elif field == constants.SF_TYPE:
4780
            val = self.op.storage_type
4781
          elif field in field_idx:
4782
            val = row[field_idx[field]]
4783
          else:
4784
            raise errors.ParameterError(field)
4785

    
4786
          out.append(val)
4787

    
4788
        result.append(out)
4789

    
4790
    return result
4791

    
4792

    
4793
class _InstanceQuery(_QueryBase):
4794
  FIELDS = query.INSTANCE_FIELDS
4795

    
4796
  def ExpandNames(self, lu):
4797
    lu.needed_locks = {}
4798
    lu.share_locks = _ShareAll()
4799

    
4800
    if self.names:
4801
      self.wanted = _GetWantedInstances(lu, self.names)
4802
    else:
4803
      self.wanted = locking.ALL_SET
4804

    
4805
    self.do_locking = (self.use_locking and
4806
                       query.IQ_LIVE in self.requested_data)
4807
    if self.do_locking:
4808
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4809
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4810
      lu.needed_locks[locking.LEVEL_NODE] = []
4811
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4812

    
4813
    self.do_grouplocks = (self.do_locking and
4814
                          query.IQ_NODES in self.requested_data)
4815

    
4816
  def DeclareLocks(self, lu, level):
4817
    if self.do_locking:
4818
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4819
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4820

    
4821
        # Lock all groups used by instances optimistically; this requires going
4822
        # via the node before it's locked, requiring verification later on
4823
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4824
          set(group_uuid
4825
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4826
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4827
      elif level == locking.LEVEL_NODE:
4828
        lu._LockInstancesNodes() # pylint: disable=W0212
4829

    
4830
  @staticmethod
4831
  def _CheckGroupLocks(lu):
4832
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4833
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4834

    
4835
    # Check if node groups for locked instances are still correct
4836
    for instance_name in owned_instances:
4837
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4838

    
4839
  def _GetQueryData(self, lu):
4840
    """Computes the list of instances and their attributes.
4841

4842
    """
4843
    if self.do_grouplocks:
4844
      self._CheckGroupLocks(lu)
4845

    
4846
    cluster = lu.cfg.GetClusterInfo()
4847
    all_info = lu.cfg.GetAllInstancesInfo()
4848

    
4849
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4850

    
4851
    instance_list = [all_info[name] for name in instance_names]
4852
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4853
                                        for inst in instance_list)))
4854
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4855
    bad_nodes = []
4856
    offline_nodes = []
4857
    wrongnode_inst = set()
4858

    
4859
    # Gather data as requested
4860
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4861
      live_data = {}
4862
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4863
      for name in nodes:
4864
        result = node_data[name]
4865
        if result.offline:
4866
          # offline nodes will be in both lists
4867
          assert result.fail_msg
4868
          offline_nodes.append(name)
4869
        if result.fail_msg:
4870
          bad_nodes.append(name)
4871
        elif result.payload:
4872
          for inst in result.payload:
4873
            if inst in all_info:
4874
              if all_info[inst].primary_node == name:
4875
                live_data.update(result.payload)
4876
              else:
4877
                wrongnode_inst.add(inst)
4878
            else:
4879
              # orphan instance; we don't list it here as we don't
4880
              # handle this case yet in the output of instance listing
4881
              logging.warning("Orphan instance '%s' found on node %s",
4882
                              inst, name)
4883
        # else no instance is alive
4884
    else:
4885
      live_data = {}
4886

    
4887
    if query.IQ_DISKUSAGE in self.requested_data:
4888
      disk_usage = dict((inst.name,
4889
                         _ComputeDiskSize(inst.disk_template,
4890
                                          [{constants.IDISK_SIZE: disk.size}
4891
                                           for disk in inst.disks]))
4892
                        for inst in instance_list)
4893
    else:
4894
      disk_usage = None
4895

    
4896
    if query.IQ_CONSOLE in self.requested_data:
4897
      consinfo = {}
4898
      for inst in instance_list:
4899
        if inst.name in live_data:
4900
          # Instance is running
4901
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4902
        else:
4903
          consinfo[inst.name] = None
4904
      assert set(consinfo.keys()) == set(instance_names)
4905
    else:
4906
      consinfo = None
4907

    
4908
    if query.IQ_NODES in self.requested_data:
4909
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4910
                                            instance_list)))
4911
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4912
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4913
                    for uuid in set(map(operator.attrgetter("group"),
4914
                                        nodes.values())))
4915
    else:
4916
      nodes = None
4917
      groups = None
4918

    
4919
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4920
                                   disk_usage, offline_nodes, bad_nodes,
4921
                                   live_data, wrongnode_inst, consinfo,
4922
                                   nodes, groups)
4923

    
4924

    
4925
class LUQuery(NoHooksLU):
4926
  """Query for resources/items of a certain kind.
4927

4928
  """
4929
  # pylint: disable=W0142
4930
  REQ_BGL = False
4931

    
4932
  def CheckArguments(self):
4933
    qcls = _GetQueryImplementation(self.op.what)
4934

    
4935
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4936

    
4937
  def ExpandNames(self):
4938
    self.impl.ExpandNames(self)
4939

    
4940
  def DeclareLocks(self, level):
4941
    self.impl.DeclareLocks(self, level)
4942

    
4943
  def Exec(self, feedback_fn):
4944
    return self.impl.NewStyleQuery(self)
4945

    
4946

    
4947
class LUQueryFields(NoHooksLU):
4948
  """Query for resources/items of a certain kind.
4949

4950
  """
4951
  # pylint: disable=W0142
4952
  REQ_BGL = False
4953

    
4954
  def CheckArguments(self):
4955
    self.qcls = _GetQueryImplementation(self.op.what)
4956

    
4957
  def ExpandNames(self):
4958
    self.needed_locks = {}
4959

    
4960
  def Exec(self, feedback_fn):
4961
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4962

    
4963

    
4964
class LUNodeModifyStorage(NoHooksLU):
4965
  """Logical unit for modifying a storage volume on a node.
4966

4967
  """
4968
  REQ_BGL = False
4969

    
4970
  def CheckArguments(self):
4971
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4972

    
4973
    storage_type = self.op.storage_type
4974

    
4975
    try:
4976
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4977
    except KeyError:
4978
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4979
                                 " modified" % storage_type,
4980
                                 errors.ECODE_INVAL)
4981

    
4982
    diff = set(self.op.changes.keys()) - modifiable
4983
    if diff:
4984
      raise errors.OpPrereqError("The following fields can not be modified for"
4985
                                 " storage units of type '%s': %r" %
4986
                                 (storage_type, list(diff)),
4987
                                 errors.ECODE_INVAL)
4988

    
4989
  def ExpandNames(self):
4990
    self.needed_locks = {
4991
      locking.LEVEL_NODE: self.op.node_name,
4992
      }
4993

    
4994
  def Exec(self, feedback_fn):
4995
    """Computes the list of nodes and their attributes.
4996

4997
    """
4998
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4999
    result = self.rpc.call_storage_modify(self.op.node_name,
5000
                                          self.op.storage_type, st_args,
5001
                                          self.op.name, self.op.changes)
5002
    result.Raise("Failed to modify storage unit '%s' on %s" %
5003
                 (self.op.name, self.op.node_name))
5004

    
5005

    
5006
class LUNodeAdd(LogicalUnit):
5007
  """Logical unit for adding node to the cluster.
5008

5009
  """
5010
  HPATH = "node-add"
5011
  HTYPE = constants.HTYPE_NODE
5012
  _NFLAGS = ["master_capable", "vm_capable"]
5013

    
5014
  def CheckArguments(self):
5015
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5016
    # validate/normalize the node name
5017
    self.hostname = netutils.GetHostname(name=self.op.node_name,
5018
                                         family=self.primary_ip_family)
5019
    self.op.node_name = self.hostname.name
5020

    
5021
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5022
      raise errors.OpPrereqError("Cannot readd the master node",
5023
                                 errors.ECODE_STATE)
5024

    
5025
    if self.op.readd and self.op.group:
5026
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
5027
                                 " being readded", errors.ECODE_INVAL)
5028

    
5029
  def BuildHooksEnv(self):
5030
    """Build hooks env.
5031

5032
    This will run on all nodes before, and on all nodes + the new node after.
5033

5034
    """
5035
    return {
5036
      "OP_TARGET": self.op.node_name,
5037
      "NODE_NAME": self.op.node_name,
5038
      "NODE_PIP": self.op.primary_ip,
5039
      "NODE_SIP": self.op.secondary_ip,
5040
      "MASTER_CAPABLE": str(self.op.master_capable),
5041
      "VM_CAPABLE": str(self.op.vm_capable),
5042
      }
5043

    
5044
  def BuildHooksNodes(self):
5045
    """Build hooks nodes.
5046

5047
    """
5048
    # Exclude added node
5049
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5050
    post_nodes = pre_nodes + [self.op.node_name, ]
5051

    
5052
    return (pre_nodes, post_nodes)
5053

    
5054
  def CheckPrereq(self):
5055
    """Check prerequisites.
5056

5057
    This checks:
5058
     - the new node is not already in the config
5059
     - it is resolvable
5060
     - its parameters (single/dual homed) matches the cluster
5061

5062
    Any errors are signaled by raising errors.OpPrereqError.
5063

5064
    """
5065
    cfg = self.cfg
5066
    hostname = self.hostname
5067
    node = hostname.name
5068
    primary_ip = self.op.primary_ip = hostname.ip
5069
    if self.op.secondary_ip is None:
5070
      if self.primary_ip_family == netutils.IP6Address.family:
5071
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5072
                                   " IPv4 address must be given as secondary",
5073
                                   errors.ECODE_INVAL)
5074
      self.op.secondary_ip = primary_ip
5075

    
5076
    secondary_ip = self.op.secondary_ip
5077
    if not netutils.IP4Address.IsValid(secondary_ip):
5078
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5079
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5080

    
5081
    node_list = cfg.GetNodeList()
5082
    if not self.op.readd and node in node_list:
5083
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5084
                                 node, errors.ECODE_EXISTS)
5085
    elif self.op.readd and node not in node_list:
5086
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5087
                                 errors.ECODE_NOENT)
5088

    
5089
    self.changed_primary_ip = False
5090

    
5091
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5092
      if self.op.readd and node == existing_node_name:
5093
        if existing_node.secondary_ip != secondary_ip:
5094
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5095
                                     " address configuration as before",
5096
                                     errors.ECODE_INVAL)
5097
        if existing_node.primary_ip != primary_ip:
5098
          self.changed_primary_ip = True
5099

    
5100
        continue
5101

    
5102
      if (existing_node.primary_ip == primary_ip or
5103
          existing_node.secondary_ip == primary_ip or
5104
          existing_node.primary_ip == secondary_ip or
5105
          existing_node.secondary_ip == secondary_ip):
5106
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5107
                                   " existing node %s" % existing_node.name,
5108
                                   errors.ECODE_NOTUNIQUE)
5109

    
5110
    # After this 'if' block, None is no longer a valid value for the
5111
    # _capable op attributes
5112
    if self.op.readd:
5113
      old_node = self.cfg.GetNodeInfo(node)
5114
      assert old_node is not None, "Can't retrieve locked node %s" % node
5115
      for attr in self._NFLAGS:
5116
        if getattr(self.op, attr) is None:
5117
          setattr(self.op, attr, getattr(old_node, attr))
5118
    else:
5119
      for attr in self._NFLAGS:
5120
        if getattr(self.op, attr) is None:
5121
          setattr(self.op, attr, True)
5122

    
5123
    if self.op.readd and not self.op.vm_capable:
5124
      pri, sec = cfg.GetNodeInstances(node)
5125
      if pri or sec:
5126
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5127
                                   " flag set to false, but it already holds"
5128
                                   " instances" % node,
5129
                                   errors.ECODE_STATE)
5130

    
5131
    # check that the type of the node (single versus dual homed) is the
5132
    # same as for the master
5133
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5134
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5135
    newbie_singlehomed = secondary_ip == primary_ip
5136
    if master_singlehomed != newbie_singlehomed:
5137
      if master_singlehomed:
5138
        raise errors.OpPrereqError("The master has no secondary ip but the"
5139
                                   " new node has one",
5140
                                   errors.ECODE_INVAL)
5141
      else:
5142
        raise errors.OpPrereqError("The master has a secondary ip but the"
5143
                                   " new node doesn't have one",
5144
                                   errors.ECODE_INVAL)
5145

    
5146
    # checks reachability
5147
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5148
      raise errors.OpPrereqError("Node not reachable by ping",
5149
                                 errors.ECODE_ENVIRON)
5150

    
5151
    if not newbie_singlehomed:
5152
      # check reachability from my secondary ip to newbie's secondary ip
5153
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5154
                           source=myself.secondary_ip):
5155
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5156
                                   " based ping to node daemon port",
5157
                                   errors.ECODE_ENVIRON)
5158

    
5159
    if self.op.readd:
5160
      exceptions = [node]
5161
    else:
5162
      exceptions = []
5163

    
5164
    if self.op.master_capable:
5165
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5166
    else:
5167
      self.master_candidate = False
5168

    
5169
    if self.op.readd:
5170
      self.new_node = old_node
5171
    else:
5172
      node_group = cfg.LookupNodeGroup(self.op.group)
5173
      self.new_node = objects.Node(name=node,
5174
                                   primary_ip=primary_ip,
5175
                                   secondary_ip=secondary_ip,
5176
                                   master_candidate=self.master_candidate,
5177
                                   offline=False, drained=False,
5178
                                   group=node_group)
5179

    
5180
    if self.op.ndparams:
5181
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5182

    
5183
  def Exec(self, feedback_fn):
5184
    """Adds the new node to the cluster.
5185

5186
    """
5187
    new_node = self.new_node
5188
    node = new_node.name
5189

    
5190
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5191
      "Not owning BGL"
5192

    
5193
    # We adding a new node so we assume it's powered
5194
    new_node.powered = True
5195

    
5196
    # for re-adds, reset the offline/drained/master-candidate flags;
5197
    # we need to reset here, otherwise offline would prevent RPC calls
5198
    # later in the procedure; this also means that if the re-add
5199
    # fails, we are left with a non-offlined, broken node
5200
    if self.op.readd:
5201
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5202
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5203
      # if we demote the node, we do cleanup later in the procedure
5204
      new_node.master_candidate = self.master_candidate
5205
      if self.changed_primary_ip:
5206
        new_node.primary_ip = self.op.primary_ip
5207

    
5208
    # copy the master/vm_capable flags
5209
    for attr in self._NFLAGS:
5210
      setattr(new_node, attr, getattr(self.op, attr))
5211

    
5212
    # notify the user about any possible mc promotion
5213
    if new_node.master_candidate:
5214
      self.LogInfo("Node will be a master candidate")
5215

    
5216
    if self.op.ndparams:
5217
      new_node.ndparams = self.op.ndparams
5218
    else:
5219
      new_node.ndparams = {}
5220

    
5221
    # check connectivity
5222
    result = self.rpc.call_version([node])[node]
5223
    result.Raise("Can't get version information from node %s" % node)
5224
    if constants.PROTOCOL_VERSION == result.payload:
5225
      logging.info("Communication to node %s fine, sw version %s match",
5226
                   node, result.payload)
5227
    else:
5228
      raise errors.OpExecError("Version mismatch master version %s,"
5229
                               " node version %s" %
5230
                               (constants.PROTOCOL_VERSION, result.payload))
5231

    
5232
    # Add node to our /etc/hosts, and add key to known_hosts
5233
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5234
      master_node = self.cfg.GetMasterNode()
5235
      result = self.rpc.call_etc_hosts_modify(master_node,
5236
                                              constants.ETC_HOSTS_ADD,
5237
                                              self.hostname.name,
5238
                                              self.hostname.ip)
5239
      result.Raise("Can't update hosts file with new host data")
5240

    
5241
    if new_node.secondary_ip != new_node.primary_ip:
5242
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5243
                               False)
5244

    
5245
    node_verify_list = [self.cfg.GetMasterNode()]
5246
    node_verify_param = {
5247
      constants.NV_NODELIST: ([node], {}),
5248
      # TODO: do a node-net-test as well?
5249
    }
5250

    
5251
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5252
                                       self.cfg.GetClusterName())
5253
    for verifier in node_verify_list:
5254
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5255
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5256
      if nl_payload:
5257
        for failed in nl_payload:
5258
          feedback_fn("ssh/hostname verification failed"
5259
                      " (checking from %s): %s" %
5260
                      (verifier, nl_payload[failed]))
5261
        raise errors.OpExecError("ssh/hostname verification failed")
5262

    
5263
    if self.op.readd:
5264
      _RedistributeAncillaryFiles(self)
5265
      self.context.ReaddNode(new_node)
5266
      # make sure we redistribute the config
5267
      self.cfg.Update(new_node, feedback_fn)
5268
      # and make sure the new node will not have old files around
5269
      if not new_node.master_candidate:
5270
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5271
        msg = result.fail_msg
5272
        if msg:
5273
          self.LogWarning("Node failed to demote itself from master"
5274
                          " candidate status: %s" % msg)
5275
    else:
5276
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5277
                                  additional_vm=self.op.vm_capable)
5278
      self.context.AddNode(new_node, self.proc.GetECId())
5279

    
5280

    
5281
class LUNodeSetParams(LogicalUnit):
5282
  """Modifies the parameters of a node.
5283

5284
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5285
      to the node role (as _ROLE_*)
5286
  @cvar _R2F: a dictionary from node role to tuples of flags
5287
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5288

5289
  """
5290
  HPATH = "node-modify"
5291
  HTYPE = constants.HTYPE_NODE
5292
  REQ_BGL = False
5293
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5294
  _F2R = {
5295
    (True, False, False): _ROLE_CANDIDATE,
5296
    (False, True, False): _ROLE_DRAINED,
5297
    (False, False, True): _ROLE_OFFLINE,
5298
    (False, False, False): _ROLE_REGULAR,
5299
    }
5300
  _R2F = dict((v, k) for k, v in _F2R.items())
5301
  _FLAGS = ["master_candidate", "drained", "offline"]
5302

    
5303
  def CheckArguments(self):
5304
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5305
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5306
                self.op.master_capable, self.op.vm_capable,
5307
                self.op.secondary_ip, self.op.ndparams]
5308
    if all_mods.count(None) == len(all_mods):
5309
      raise errors.OpPrereqError("Please pass at least one modification",
5310
                                 errors.ECODE_INVAL)
5311
    if all_mods.count(True) > 1:
5312
      raise errors.OpPrereqError("Can't set the node into more than one"
5313
                                 " state at the same time",
5314
                                 errors.ECODE_INVAL)
5315

    
5316
    # Boolean value that tells us whether we might be demoting from MC
5317
    self.might_demote = (self.op.master_candidate == False or
5318
                         self.op.offline == True or
5319
                         self.op.drained == True or
5320
                         self.op.master_capable == False)
5321

    
5322
    if self.op.secondary_ip:
5323
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5324
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5325
                                   " address" % self.op.secondary_ip,
5326
                                   errors.ECODE_INVAL)
5327

    
5328
    self.lock_all = self.op.auto_promote and self.might_demote
5329
    self.lock_instances = self.op.secondary_ip is not None
5330

    
5331
  def _InstanceFilter(self, instance):
5332
    """Filter for getting affected instances.
5333

5334
    """
5335
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5336
            self.op.node_name in instance.all_nodes)
5337

    
5338
  def ExpandNames(self):
5339
    if self.lock_all:
5340
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5341
    else:
5342
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5343

    
5344
    # Since modifying a node can have severe effects on currently running
5345
    # operations the resource lock is at least acquired in shared mode
5346
    self.needed_locks[locking.LEVEL_NODE_RES] = \
5347
      self.needed_locks[locking.LEVEL_NODE]
5348

    
5349
    # Get node resource and instance locks in shared mode; they are not used
5350
    # for anything but read-only access
5351
    self.share_locks[locking.LEVEL_NODE_RES] = 1
5352
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5353

    
5354
    if self.lock_instances:
5355
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5356
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5357

    
5358
  def BuildHooksEnv(self):
5359
    """Build hooks env.
5360

5361
    This runs on the master node.
5362

5363
    """
5364
    return {
5365
      "OP_TARGET": self.op.node_name,
5366
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5367
      "OFFLINE": str(self.op.offline),
5368
      "DRAINED": str(self.op.drained),
5369
      "MASTER_CAPABLE": str(self.op.master_capable),
5370
      "VM_CAPABLE": str(self.op.vm_capable),
5371
      }
5372

    
5373
  def BuildHooksNodes(self):
5374
    """Build hooks nodes.
5375

5376
    """
5377
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5378
    return (nl, nl)
5379

    
5380
  def CheckPrereq(self):
5381
    """Check prerequisites.
5382

5383
    This only checks the instance list against the existing names.
5384

5385
    """
5386
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5387

    
5388
    if self.lock_instances:
5389
      affected_instances = \
5390
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5391

    
5392
      # Verify instance locks
5393
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5394
      wanted_instances = frozenset(affected_instances.keys())
5395
      if wanted_instances - owned_instances:
5396
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5397
                                   " secondary IP address have changed since"
5398
                                   " locks were acquired, wanted '%s', have"
5399
                                   " '%s'; retry the operation" %
5400
                                   (self.op.node_name,
5401
                                    utils.CommaJoin(wanted_instances),
5402
                                    utils.CommaJoin(owned_instances)),
5403
                                   errors.ECODE_STATE)
5404
    else:
5405
      affected_instances = None
5406

    
5407
    if (self.op.master_candidate is not None or
5408
        self.op.drained is not None or
5409
        self.op.offline is not None):
5410
      # we can't change the master's node flags
5411
      if self.op.node_name == self.cfg.GetMasterNode():
5412
        raise errors.OpPrereqError("The master role can be changed"
5413
                                   " only via master-failover",
5414
                                   errors.ECODE_INVAL)
5415

    
5416
    if self.op.master_candidate and not node.master_capable:
5417
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5418
                                 " it a master candidate" % node.name,
5419
                                 errors.ECODE_STATE)
5420

    
5421
    if self.op.vm_capable == False:
5422
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5423
      if ipri or isec:
5424
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5425
                                   " the vm_capable flag" % node.name,
5426
                                   errors.ECODE_STATE)
5427

    
5428
    if node.master_candidate and self.might_demote and not self.lock_all:
5429
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5430
      # check if after removing the current node, we're missing master
5431
      # candidates
5432
      (mc_remaining, mc_should, _) = \
5433
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5434
      if mc_remaining < mc_should:
5435
        raise errors.OpPrereqError("Not enough master candidates, please"
5436
                                   " pass auto promote option to allow"
5437
                                   " promotion", errors.ECODE_STATE)
5438

    
5439
    self.old_flags = old_flags = (node.master_candidate,
5440
                                  node.drained, node.offline)
5441
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5442
    self.old_role = old_role = self._F2R[old_flags]
5443

    
5444
    # Check for ineffective changes
5445
    for attr in self._FLAGS:
5446
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5447
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5448
        setattr(self.op, attr, None)
5449

    
5450
    # Past this point, any flag change to False means a transition
5451
    # away from the respective state, as only real changes are kept
5452

    
5453
    # TODO: We might query the real power state if it supports OOB
5454
    if _SupportsOob(self.cfg, node):
5455
      if self.op.offline is False and not (node.powered or
5456
                                           self.op.powered == True):
5457
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5458
                                    " offline status can be reset") %
5459
                                   self.op.node_name)
5460
    elif self.op.powered is not None:
5461
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5462
                                  " as it does not support out-of-band"
5463
                                  " handling") % self.op.node_name)
5464

    
5465
    # If we're being deofflined/drained, we'll MC ourself if needed
5466
    if (self.op.drained == False or self.op.offline == False or
5467
        (self.op.master_capable and not node.master_capable)):
5468
      if _DecideSelfPromotion(self):
5469
        self.op.master_candidate = True
5470
        self.LogInfo("Auto-promoting node to master candidate")
5471

    
5472
    # If we're no longer master capable, we'll demote ourselves from MC
5473
    if self.op.master_capable == False and node.master_candidate:
5474
      self.LogInfo("Demoting from master candidate")
5475
      self.op.master_candidate = False
5476

    
5477
    # Compute new role
5478
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5479
    if self.op.master_candidate:
5480
      new_role = self._ROLE_CANDIDATE
5481
    elif self.op.drained:
5482
      new_role = self._ROLE_DRAINED
5483
    elif self.op.offline:
5484
      new_role = self._ROLE_OFFLINE
5485
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5486
      # False is still in new flags, which means we're un-setting (the
5487
      # only) True flag
5488
      new_role = self._ROLE_REGULAR
5489
    else: # no new flags, nothing, keep old role
5490
      new_role = old_role
5491

    
5492
    self.new_role = new_role
5493

    
5494
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5495
      # Trying to transition out of offline status
5496
      # TODO: Use standard RPC runner, but make sure it works when the node is
5497
      # still marked offline
5498
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5499
      if result.fail_msg:
5500
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5501
                                   " to report its version: %s" %
5502
                                   (node.name, result.fail_msg),
5503
                                   errors.ECODE_STATE)
5504
      else:
5505
        self.LogWarning("Transitioning node from offline to online state"
5506
                        " without using re-add. Please make sure the node"
5507
                        " is healthy!")
5508

    
5509
    if self.op.secondary_ip:
5510
      # Ok even without locking, because this can't be changed by any LU
5511
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5512
      master_singlehomed = master.secondary_ip == master.primary_ip
5513
      if master_singlehomed and self.op.secondary_ip:
5514
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5515
                                   " homed cluster", errors.ECODE_INVAL)
5516

    
5517
      assert not (frozenset(affected_instances) -
5518
                  self.owned_locks(locking.LEVEL_INSTANCE))
5519

    
5520
      if node.offline:
5521
        if affected_instances:
5522
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5523
                                     " offline node has instances (%s)"
5524
                                     " configured to use it" %
5525
                                     utils.CommaJoin(affected_instances.keys()))
5526
      else:
5527
        # On online nodes, check that no instances are running, and that
5528
        # the node has the new ip and we can reach it.
5529
        for instance in affected_instances.values():
5530
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
5531
                              msg="cannot change secondary ip")
5532

    
5533
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5534
        if master.name != node.name:
5535
          # check reachability from master secondary ip to new secondary ip
5536
          if not netutils.TcpPing(self.op.secondary_ip,
5537
                                  constants.DEFAULT_NODED_PORT,
5538
                                  source=master.secondary_ip):
5539
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5540
                                       " based ping to node daemon port",
5541
                                       errors.ECODE_ENVIRON)
5542

    
5543
    if self.op.ndparams:
5544
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5545
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5546
      self.new_ndparams = new_ndparams
5547

    
5548
  def Exec(self, feedback_fn):
5549
    """Modifies a node.
5550

5551
    """
5552
    node = self.node
5553
    old_role = self.old_role
5554
    new_role = self.new_role
5555

    
5556
    result = []
5557

    
5558
    if self.op.ndparams:
5559
      node.ndparams = self.new_ndparams
5560

    
5561
    if self.op.powered is not None:
5562
      node.powered = self.op.powered
5563

    
5564
    for attr in ["master_capable", "vm_capable"]:
5565
      val = getattr(self.op, attr)
5566
      if val is not None:
5567
        setattr(node, attr, val)
5568
        result.append((attr, str(val)))
5569

    
5570
    if new_role != old_role:
5571
      # Tell the node to demote itself, if no longer MC and not offline
5572
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5573
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5574
        if msg:
5575
          self.LogWarning("Node failed to demote itself: %s", msg)
5576

    
5577
      new_flags = self._R2F[new_role]
5578
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5579
        if of != nf:
5580
          result.append((desc, str(nf)))
5581
      (node.master_candidate, node.drained, node.offline) = new_flags
5582

    
5583
      # we locked all nodes, we adjust the CP before updating this node
5584
      if self.lock_all:
5585
        _AdjustCandidatePool(self, [node.name])
5586

    
5587
    if self.op.secondary_ip:
5588
      node.secondary_ip = self.op.secondary_ip
5589
      result.append(("secondary_ip", self.op.secondary_ip))
5590

    
5591
    # this will trigger configuration file update, if needed
5592
    self.cfg.Update(node, feedback_fn)
5593

    
5594
    # this will trigger job queue propagation or cleanup if the mc
5595
    # flag changed
5596
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5597
      self.context.ReaddNode(node)
5598

    
5599
    return result
5600

    
5601

    
5602
class LUNodePowercycle(NoHooksLU):
5603
  """Powercycles a node.
5604

5605
  """
5606
  REQ_BGL = False
5607

    
5608
  def CheckArguments(self):
5609
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5610
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5611
      raise errors.OpPrereqError("The node is the master and the force"
5612
                                 " parameter was not set",
5613
                                 errors.ECODE_INVAL)
5614

    
5615
  def ExpandNames(self):
5616
    """Locking for PowercycleNode.
5617

5618
    This is a last-resort option and shouldn't block on other
5619
    jobs. Therefore, we grab no locks.
5620

5621
    """
5622
    self.needed_locks = {}
5623

    
5624
  def Exec(self, feedback_fn):
5625
    """Reboots a node.
5626

5627
    """
5628
    result = self.rpc.call_node_powercycle(self.op.node_name,
5629
                                           self.cfg.GetHypervisorType())
5630
    result.Raise("Failed to schedule the reboot")
5631
    return result.payload
5632

    
5633

    
5634
class LUClusterQuery(NoHooksLU):
5635
  """Query cluster configuration.
5636

5637
  """
5638
  REQ_BGL = False
5639

    
5640
  def ExpandNames(self):
5641
    self.needed_locks = {}
5642

    
5643
  def Exec(self, feedback_fn):
5644
    """Return cluster config.
5645

5646
    """
5647
    cluster = self.cfg.GetClusterInfo()
5648
    os_hvp = {}
5649

    
5650
    # Filter just for enabled hypervisors
5651
    for os_name, hv_dict in cluster.os_hvp.items():
5652
      os_hvp[os_name] = {}
5653
      for hv_name, hv_params in hv_dict.items():
5654
        if hv_name in cluster.enabled_hypervisors:
5655
          os_hvp[os_name][hv_name] = hv_params
5656

    
5657
    # Convert ip_family to ip_version
5658
    primary_ip_version = constants.IP4_VERSION
5659
    if cluster.primary_ip_family == netutils.IP6Address.family:
5660
      primary_ip_version = constants.IP6_VERSION
5661

    
5662
    result = {
5663
      "software_version": constants.RELEASE_VERSION,
5664
      "protocol_version": constants.PROTOCOL_VERSION,
5665
      "config_version": constants.CONFIG_VERSION,
5666
      "os_api_version": max(constants.OS_API_VERSIONS),
5667
      "export_version": constants.EXPORT_VERSION,
5668
      "architecture": (platform.architecture()[0], platform.machine()),
5669
      "name": cluster.cluster_name,
5670
      "master": cluster.master_node,
5671
      "default_hypervisor": cluster.enabled_hypervisors[0],
5672
      "enabled_hypervisors": cluster.enabled_hypervisors,
5673
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5674
                        for hypervisor_name in cluster.enabled_hypervisors]),
5675
      "os_hvp": os_hvp,
5676
      "beparams": cluster.beparams,
5677
      "osparams": cluster.osparams,
5678
      "nicparams": cluster.nicparams,
5679
      "ndparams": cluster.ndparams,
5680
      "candidate_pool_size": cluster.candidate_pool_size,
5681
      "master_netdev": cluster.master_netdev,
5682
      "master_netmask": cluster.master_netmask,
5683
      "use_external_mip_script": cluster.use_external_mip_script,
5684
      "volume_group_name": cluster.volume_group_name,
5685
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5686
      "file_storage_dir": cluster.file_storage_dir,
5687
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5688
      "maintain_node_health": cluster.maintain_node_health,
5689
      "ctime": cluster.ctime,
5690
      "mtime": cluster.mtime,
5691
      "uuid": cluster.uuid,
5692
      "tags": list(cluster.GetTags()),
5693
      "uid_pool": cluster.uid_pool,
5694
      "default_iallocator": cluster.default_iallocator,
5695
      "reserved_lvs": cluster.reserved_lvs,
5696
      "primary_ip_version": primary_ip_version,
5697
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5698
      "hidden_os": cluster.hidden_os,
5699
      "blacklisted_os": cluster.blacklisted_os,
5700
      }
5701

    
5702
    return result
5703

    
5704

    
5705
class LUClusterConfigQuery(NoHooksLU):
5706
  """Return configuration values.
5707

5708
  """
5709
  REQ_BGL = False
5710
  _FIELDS_DYNAMIC = utils.FieldSet()
5711
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5712
                                  "watcher_pause", "volume_group_name")
5713

    
5714
  def CheckArguments(self):
5715
    _CheckOutputFields(static=self._FIELDS_STATIC,
5716
                       dynamic=self._FIELDS_DYNAMIC,
5717
                       selected=self.op.output_fields)
5718

    
5719
  def ExpandNames(self):
5720
    self.needed_locks = {}
5721

    
5722
  def Exec(self, feedback_fn):
5723
    """Dump a representation of the cluster config to the standard output.
5724

5725
    """
5726
    values = []
5727
    for field in self.op.output_fields:
5728
      if field == "cluster_name":
5729
        entry = self.cfg.GetClusterName()
5730
      elif field == "master_node":
5731
        entry = self.cfg.GetMasterNode()
5732
      elif field == "drain_flag":
5733
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5734
      elif field == "watcher_pause":
5735
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5736
      elif field == "volume_group_name":
5737
        entry = self.cfg.GetVGName()
5738
      else:
5739
        raise errors.ParameterError(field)
5740
      values.append(entry)
5741
    return values
5742

    
5743

    
5744
class LUInstanceActivateDisks(NoHooksLU):
5745
  """Bring up an instance's disks.
5746

5747
  """
5748
  REQ_BGL = False
5749

    
5750
  def ExpandNames(self):
5751
    self._ExpandAndLockInstance()
5752
    self.needed_locks[locking.LEVEL_NODE] = []
5753
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5754

    
5755
  def DeclareLocks(self, level):
5756
    if level == locking.LEVEL_NODE:
5757
      self._LockInstancesNodes()
5758

    
5759
  def CheckPrereq(self):
5760
    """Check prerequisites.
5761

5762
    This checks that the instance is in the cluster.
5763

5764
    """
5765
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5766
    assert self.instance is not None, \
5767
      "Cannot retrieve locked instance %s" % self.op.instance_name
5768
    _CheckNodeOnline(self, self.instance.primary_node)
5769

    
5770
  def Exec(self, feedback_fn):
5771
    """Activate the disks.
5772

5773
    """
5774
    disks_ok, disks_info = \
5775
              _AssembleInstanceDisks(self, self.instance,
5776
                                     ignore_size=self.op.ignore_size)
5777
    if not disks_ok:
5778
      raise errors.OpExecError("Cannot activate block devices")
5779

    
5780
    return disks_info
5781

    
5782

    
5783
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5784
                           ignore_size=False):
5785
  """Prepare the block devices for an instance.
5786

5787
  This sets up the block devices on all nodes.
5788

5789
  @type lu: L{LogicalUnit}
5790
  @param lu: the logical unit on whose behalf we execute
5791
  @type instance: L{objects.Instance}
5792
  @param instance: the instance for whose disks we assemble
5793
  @type disks: list of L{objects.Disk} or None
5794
  @param disks: which disks to assemble (or all, if None)
5795
  @type ignore_secondaries: boolean
5796
  @param ignore_secondaries: if true, errors on secondary nodes
5797
      won't result in an error return from the function
5798
  @type ignore_size: boolean
5799
  @param ignore_size: if true, the current known size of the disk
5800
      will not be used during the disk activation, useful for cases
5801
      when the size is wrong
5802
  @return: False if the operation failed, otherwise a list of
5803
      (host, instance_visible_name, node_visible_name)
5804
      with the mapping from node devices to instance devices
5805

5806
  """
5807
  device_info = []
5808
  disks_ok = True
5809
  iname = instance.name
5810
  disks = _ExpandCheckDisks(instance, disks)
5811

    
5812
  # With the two passes mechanism we try to reduce the window of
5813
  # opportunity for the race condition of switching DRBD to primary
5814
  # before handshaking occured, but we do not eliminate it
5815

    
5816
  # The proper fix would be to wait (with some limits) until the
5817
  # connection has been made and drbd transitions from WFConnection
5818
  # into any other network-connected state (Connected, SyncTarget,
5819
  # SyncSource, etc.)
5820

    
5821
  # 1st pass, assemble on all nodes in secondary mode
5822
  for idx, inst_disk in enumerate(disks):
5823
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5824
      if ignore_size:
5825
        node_disk = node_disk.Copy()
5826
        node_disk.UnsetSize()
5827
      lu.cfg.SetDiskID(node_disk, node)
5828
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5829
      msg = result.fail_msg
5830
      if msg:
5831
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5832
                           " (is_primary=False, pass=1): %s",
5833
                           inst_disk.iv_name, node, msg)
5834
        if not ignore_secondaries:
5835
          disks_ok = False
5836

    
5837
  # FIXME: race condition on drbd migration to primary
5838

    
5839
  # 2nd pass, do only the primary node
5840
  for idx, inst_disk in enumerate(disks):
5841
    dev_path = None
5842

    
5843
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5844
      if node != instance.primary_node:
5845
        continue
5846
      if ignore_size:
5847
        node_disk = node_disk.Copy()
5848
        node_disk.UnsetSize()
5849
      lu.cfg.SetDiskID(node_disk, node)
5850
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5851
      msg = result.fail_msg
5852
      if msg:
5853
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5854
                           " (is_primary=True, pass=2): %s",
5855
                           inst_disk.iv_name, node, msg)
5856
        disks_ok = False
5857
      else:
5858
        dev_path = result.payload
5859

    
5860
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5861

    
5862
  # leave the disks configured for the primary node
5863
  # this is a workaround that would be fixed better by
5864
  # improving the logical/physical id handling
5865
  for disk in disks:
5866
    lu.cfg.SetDiskID(disk, instance.primary_node)
5867

    
5868
  return disks_ok, device_info
5869

    
5870

    
5871
def _StartInstanceDisks(lu, instance, force):
5872
  """Start the disks of an instance.
5873

5874
  """
5875
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5876
                                           ignore_secondaries=force)
5877
  if not disks_ok:
5878
    _ShutdownInstanceDisks(lu, instance)
5879
    if force is not None and not force:
5880
      lu.proc.LogWarning("", hint="If the message above refers to a"
5881
                         " secondary node,"
5882
                         " you can retry the operation using '--force'.")
5883
    raise errors.OpExecError("Disk consistency error")
5884

    
5885

    
5886
class LUInstanceDeactivateDisks(NoHooksLU):
5887
  """Shutdown an instance's disks.
5888

5889
  """
5890
  REQ_BGL = False
5891

    
5892
  def ExpandNames(self):
5893
    self._ExpandAndLockInstance()
5894
    self.needed_locks[locking.LEVEL_NODE] = []
5895
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5896

    
5897
  def DeclareLocks(self, level):
5898
    if level == locking.LEVEL_NODE:
5899
      self._LockInstancesNodes()
5900

    
5901
  def CheckPrereq(self):
5902
    """Check prerequisites.
5903

5904
    This checks that the instance is in the cluster.
5905

5906
    """
5907
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5908
    assert self.instance is not None, \
5909
      "Cannot retrieve locked instance %s" % self.op.instance_name
5910

    
5911
  def Exec(self, feedback_fn):
5912
    """Deactivate the disks
5913

5914
    """
5915
    instance = self.instance
5916
    if self.op.force:
5917
      _ShutdownInstanceDisks(self, instance)
5918
    else:
5919
      _SafeShutdownInstanceDisks(self, instance)
5920

    
5921

    
5922
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5923
  """Shutdown block devices of an instance.
5924

5925
  This function checks if an instance is running, before calling
5926
  _ShutdownInstanceDisks.
5927

5928
  """
5929
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5930
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5931

    
5932

    
5933
def _ExpandCheckDisks(instance, disks):
5934
  """Return the instance disks selected by the disks list
5935

5936
  @type disks: list of L{objects.Disk} or None
5937
  @param disks: selected disks
5938
  @rtype: list of L{objects.Disk}
5939
  @return: selected instance disks to act on
5940

5941
  """
5942
  if disks is None:
5943
    return instance.disks
5944
  else:
5945
    if not set(disks).issubset(instance.disks):
5946
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5947
                                   " target instance")
5948
    return disks
5949

    
5950

    
5951
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5952
  """Shutdown block devices of an instance.
5953

5954
  This does the shutdown on all nodes of the instance.
5955

5956
  If the ignore_primary is false, errors on the primary node are
5957
  ignored.
5958

5959
  """
5960
  all_result = True
5961
  disks = _ExpandCheckDisks(instance, disks)
5962

    
5963
  for disk in disks:
5964
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5965
      lu.cfg.SetDiskID(top_disk, node)
5966
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5967
      msg = result.fail_msg
5968
      if msg:
5969
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5970
                      disk.iv_name, node, msg)
5971
        if ((node == instance.primary_node and not ignore_primary) or
5972
            (node != instance.primary_node and not result.offline)):
5973
          all_result = False
5974
  return all_result
5975

    
5976

    
5977
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5978
  """Checks if a node has enough free memory.
5979

5980
  This function check if a given node has the needed amount of free
5981
  memory. In case the node has less memory or we cannot get the
5982
  information from the node, this function raise an OpPrereqError
5983
  exception.
5984

5985
  @type lu: C{LogicalUnit}
5986
  @param lu: a logical unit from which we get configuration data
5987
  @type node: C{str}
5988
  @param node: the node to check
5989
  @type reason: C{str}
5990
  @param reason: string to use in the error message
5991
  @type requested: C{int}
5992
  @param requested: the amount of memory in MiB to check for
5993
  @type hypervisor_name: C{str}
5994
  @param hypervisor_name: the hypervisor to ask for memory stats
5995
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5996
      we cannot check the node
5997

5998
  """
5999
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
6000
  nodeinfo[node].Raise("Can't get data from node %s" % node,
6001
                       prereq=True, ecode=errors.ECODE_ENVIRON)
6002
  free_mem = nodeinfo[node].payload.get("memory_free", None)
6003
  if not isinstance(free_mem, int):
6004
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6005
                               " was '%s'" % (node, free_mem),
6006
                               errors.ECODE_ENVIRON)
6007
  if requested > free_mem:
6008
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6009
                               " needed %s MiB, available %s MiB" %
6010
                               (node, reason, requested, free_mem),
6011
                               errors.ECODE_NORES)
6012

    
6013

    
6014
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6015
  """Checks if nodes have enough free disk space in the all VGs.
6016

6017
  This function check if all given nodes have the needed amount of
6018
  free disk. In case any node has less disk or we cannot get the
6019
  information from the node, this function raise an OpPrereqError
6020
  exception.
6021

6022
  @type lu: C{LogicalUnit}
6023
  @param lu: a logical unit from which we get configuration data
6024
  @type nodenames: C{list}
6025
  @param nodenames: the list of node names to check
6026
  @type req_sizes: C{dict}
6027
  @param req_sizes: the hash of vg and corresponding amount of disk in
6028
      MiB to check for
6029
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6030
      or we cannot check the node
6031

6032
  """
6033
  for vg, req_size in req_sizes.items():
6034
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6035

    
6036

    
6037
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6038
  """Checks if nodes have enough free disk space in the specified VG.
6039

6040
  This function check if all given nodes have the needed amount of
6041
  free disk. In case any node has less disk or we cannot get the
6042
  information from the node, this function raise an OpPrereqError
6043
  exception.
6044

6045
  @type lu: C{LogicalUnit}
6046
  @param lu: a logical unit from which we get configuration data
6047
  @type nodenames: C{list}
6048
  @param nodenames: the list of node names to check
6049
  @type vg: C{str}
6050
  @param vg: the volume group to check
6051
  @type requested: C{int}
6052
  @param requested: the amount of disk in MiB to check for
6053
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6054
      or we cannot check the node
6055

6056
  """
6057
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6058
  for node in nodenames:
6059
    info = nodeinfo[node]
6060
    info.Raise("Cannot get current information from node %s" % node,
6061
               prereq=True, ecode=errors.ECODE_ENVIRON)
6062
    vg_free = info.payload.get("vg_free", None)
6063
    if not isinstance(vg_free, int):
6064
      raise errors.OpPrereqError("Can't compute free disk space on node"
6065
                                 " %s for vg %s, result was '%s'" %
6066
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6067
    if requested > vg_free:
6068
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6069
                                 " vg %s: required %d MiB, available %d MiB" %
6070
                                 (node, vg, requested, vg_free),
6071
                                 errors.ECODE_NORES)
6072

    
6073

    
6074
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6075
  """Checks if nodes have enough physical CPUs
6076

6077
  This function checks if all given nodes have the needed number of
6078
  physical CPUs. In case any node has less CPUs or we cannot get the
6079
  information from the node, this function raises an OpPrereqError
6080
  exception.
6081

6082
  @type lu: C{LogicalUnit}
6083
  @param lu: a logical unit from which we get configuration data
6084
  @type nodenames: C{list}
6085
  @param nodenames: the list of node names to check
6086
  @type requested: C{int}
6087
  @param requested: the minimum acceptable number of physical CPUs
6088
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6089
      or we cannot check the node
6090

6091
  """
6092
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6093
  for node in nodenames:
6094
    info = nodeinfo[node]
6095
    info.Raise("Cannot get current information from node %s" % node,
6096
               prereq=True, ecode=errors.ECODE_ENVIRON)
6097
    num_cpus = info.payload.get("cpu_total", None)
6098
    if not isinstance(num_cpus, int):
6099
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6100
                                 " on node %s, result was '%s'" %
6101
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6102
    if requested > num_cpus:
6103
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6104
                                 "required" % (node, num_cpus, requested),
6105
                                 errors.ECODE_NORES)
6106

    
6107

    
6108
class LUInstanceStartup(LogicalUnit):
6109
  """Starts an instance.
6110

6111
  """
6112
  HPATH = "instance-start"
6113
  HTYPE = constants.HTYPE_INSTANCE
6114
  REQ_BGL = False
6115

    
6116
  def CheckArguments(self):
6117
    # extra beparams
6118
    if self.op.beparams:
6119
      # fill the beparams dict
6120
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6121

    
6122
  def ExpandNames(self):
6123
    self._ExpandAndLockInstance()
6124

    
6125
  def BuildHooksEnv(self):
6126
    """Build hooks env.
6127

6128
    This runs on master, primary and secondary nodes of the instance.
6129

6130
    """
6131
    env = {
6132
      "FORCE": self.op.force,
6133
      }
6134

    
6135
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6136

    
6137
    return env
6138

    
6139
  def BuildHooksNodes(self):
6140
    """Build hooks nodes.
6141

6142
    """
6143
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6144
    return (nl, nl)
6145

    
6146
  def CheckPrereq(self):
6147
    """Check prerequisites.
6148

6149
    This checks that the instance is in the cluster.
6150

6151
    """
6152
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6153
    assert self.instance is not None, \
6154
      "Cannot retrieve locked instance %s" % self.op.instance_name
6155

    
6156
    # extra hvparams
6157
    if self.op.hvparams:
6158
      # check hypervisor parameter syntax (locally)
6159
      cluster = self.cfg.GetClusterInfo()
6160
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6161
      filled_hvp = cluster.FillHV(instance)
6162
      filled_hvp.update(self.op.hvparams)
6163
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6164
      hv_type.CheckParameterSyntax(filled_hvp)
6165
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6166

    
6167
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6168

    
6169
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6170

    
6171
    if self.primary_offline and self.op.ignore_offline_nodes:
6172
      self.proc.LogWarning("Ignoring offline primary node")
6173

    
6174
      if self.op.hvparams or self.op.beparams:
6175
        self.proc.LogWarning("Overridden parameters are ignored")
6176
    else:
6177
      _CheckNodeOnline(self, instance.primary_node)
6178

    
6179
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6180

    
6181
      # check bridges existence
6182
      _CheckInstanceBridgesExist(self, instance)
6183

    
6184
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6185
                                                instance.name,
6186
                                                instance.hypervisor)
6187
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6188
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6189
      if not remote_info.payload: # not running already
6190
        _CheckNodeFreeMemory(self, instance.primary_node,
6191
                             "starting instance %s" % instance.name,
6192
                             bep[constants.BE_MEMORY], instance.hypervisor)
6193

    
6194
  def Exec(self, feedback_fn):
6195
    """Start the instance.
6196

6197
    """
6198
    instance = self.instance
6199
    force = self.op.force
6200

    
6201
    if not self.op.no_remember:
6202
      self.cfg.MarkInstanceUp(instance.name)
6203

    
6204
    if self.primary_offline:
6205
      assert self.op.ignore_offline_nodes
6206
      self.proc.LogInfo("Primary node offline, marked instance as started")
6207
    else:
6208
      node_current = instance.primary_node
6209

    
6210
      _StartInstanceDisks(self, instance, force)
6211

    
6212
      result = \
6213
        self.rpc.call_instance_start(node_current,
6214
                                     (instance, self.op.hvparams,
6215
                                      self.op.beparams),
6216
                                     self.op.startup_paused)
6217
      msg = result.fail_msg
6218
      if msg:
6219
        _ShutdownInstanceDisks(self, instance)
6220
        raise errors.OpExecError("Could not start instance: %s" % msg)
6221

    
6222

    
6223
class LUInstanceReboot(LogicalUnit):
6224
  """Reboot an instance.
6225

6226
  """
6227
  HPATH = "instance-reboot"
6228
  HTYPE = constants.HTYPE_INSTANCE
6229
  REQ_BGL = False
6230

    
6231
  def ExpandNames(self):
6232
    self._ExpandAndLockInstance()
6233

    
6234
  def BuildHooksEnv(self):
6235
    """Build hooks env.
6236

6237
    This runs on master, primary and secondary nodes of the instance.
6238

6239
    """
6240
    env = {
6241
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6242
      "REBOOT_TYPE": self.op.reboot_type,
6243
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6244
      }
6245

    
6246
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6247

    
6248
    return env
6249

    
6250
  def BuildHooksNodes(self):
6251
    """Build hooks nodes.
6252

6253
    """
6254
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6255
    return (nl, nl)
6256

    
6257
  def CheckPrereq(self):
6258
    """Check prerequisites.
6259

6260
    This checks that the instance is in the cluster.
6261

6262
    """
6263
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6264
    assert self.instance is not None, \
6265
      "Cannot retrieve locked instance %s" % self.op.instance_name
6266
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6267
    _CheckNodeOnline(self, instance.primary_node)
6268

    
6269
    # check bridges existence
6270
    _CheckInstanceBridgesExist(self, instance)
6271

    
6272
  def Exec(self, feedback_fn):
6273
    """Reboot the instance.
6274

6275
    """
6276
    instance = self.instance
6277
    ignore_secondaries = self.op.ignore_secondaries
6278
    reboot_type = self.op.reboot_type
6279

    
6280
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6281
                                              instance.name,
6282
                                              instance.hypervisor)
6283
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6284
    instance_running = bool(remote_info.payload)
6285

    
6286
    node_current = instance.primary_node
6287

    
6288
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6289
                                            constants.INSTANCE_REBOOT_HARD]:
6290
      for disk in instance.disks:
6291
        self.cfg.SetDiskID(disk, node_current)
6292
      result = self.rpc.call_instance_reboot(node_current, instance,
6293
                                             reboot_type,
6294
                                             self.op.shutdown_timeout)
6295
      result.Raise("Could not reboot instance")
6296
    else:
6297
      if instance_running:
6298
        result = self.rpc.call_instance_shutdown(node_current, instance,
6299
                                                 self.op.shutdown_timeout)
6300
        result.Raise("Could not shutdown instance for full reboot")
6301
        _ShutdownInstanceDisks(self, instance)
6302
      else:
6303
        self.LogInfo("Instance %s was already stopped, starting now",
6304
                     instance.name)
6305
      _StartInstanceDisks(self, instance, ignore_secondaries)
6306
      result = self.rpc.call_instance_start(node_current,
6307
                                            (instance, None, None), False)
6308
      msg = result.fail_msg
6309
      if msg:
6310
        _ShutdownInstanceDisks(self, instance)
6311
        raise errors.OpExecError("Could not start instance for"
6312
                                 " full reboot: %s" % msg)
6313

    
6314
    self.cfg.MarkInstanceUp(instance.name)
6315

    
6316

    
6317
class LUInstanceShutdown(LogicalUnit):
6318
  """Shutdown an instance.
6319

6320
  """
6321
  HPATH = "instance-stop"
6322
  HTYPE = constants.HTYPE_INSTANCE
6323
  REQ_BGL = False
6324

    
6325
  def ExpandNames(self):
6326
    self._ExpandAndLockInstance()
6327

    
6328
  def BuildHooksEnv(self):
6329
    """Build hooks env.
6330

6331
    This runs on master, primary and secondary nodes of the instance.
6332

6333
    """
6334
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6335
    env["TIMEOUT"] = self.op.timeout
6336
    return env
6337

    
6338
  def BuildHooksNodes(self):
6339
    """Build hooks nodes.
6340

6341
    """
6342
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6343
    return (nl, nl)
6344

    
6345
  def CheckPrereq(self):
6346
    """Check prerequisites.
6347

6348
    This checks that the instance is in the cluster.
6349

6350
    """
6351
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6352
    assert self.instance is not None, \
6353
      "Cannot retrieve locked instance %s" % self.op.instance_name
6354

    
6355
    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6356

    
6357
    self.primary_offline = \
6358
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6359

    
6360
    if self.primary_offline and self.op.ignore_offline_nodes:
6361
      self.proc.LogWarning("Ignoring offline primary node")
6362
    else:
6363
      _CheckNodeOnline(self, self.instance.primary_node)
6364

    
6365
  def Exec(self, feedback_fn):
6366
    """Shutdown the instance.
6367

6368
    """
6369
    instance = self.instance
6370
    node_current = instance.primary_node
6371
    timeout = self.op.timeout
6372

    
6373
    if not self.op.no_remember:
6374
      self.cfg.MarkInstanceDown(instance.name)
6375

    
6376
    if self.primary_offline:
6377
      assert self.op.ignore_offline_nodes
6378
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6379
    else:
6380
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6381
      msg = result.fail_msg
6382
      if msg:
6383
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6384

    
6385
      _ShutdownInstanceDisks(self, instance)
6386

    
6387

    
6388
class LUInstanceReinstall(LogicalUnit):
6389
  """Reinstall an instance.
6390

6391
  """
6392
  HPATH = "instance-reinstall"
6393
  HTYPE = constants.HTYPE_INSTANCE
6394
  REQ_BGL = False
6395

    
6396
  def ExpandNames(self):
6397
    self._ExpandAndLockInstance()
6398

    
6399
  def BuildHooksEnv(self):
6400
    """Build hooks env.
6401

6402
    This runs on master, primary and secondary nodes of the instance.
6403

6404
    """
6405
    return _BuildInstanceHookEnvByObject(self, self.instance)
6406

    
6407
  def BuildHooksNodes(self):
6408
    """Build hooks nodes.
6409

6410
    """
6411
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6412
    return (nl, nl)
6413

    
6414
  def CheckPrereq(self):
6415
    """Check prerequisites.
6416

6417
    This checks that the instance is in the cluster and is not running.
6418

6419
    """
6420
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6421
    assert instance is not None, \
6422
      "Cannot retrieve locked instance %s" % self.op.instance_name
6423
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6424
                     " offline, cannot reinstall")
6425
    for node in instance.secondary_nodes:
6426
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6427
                       " cannot reinstall")
6428

    
6429
    if instance.disk_template == constants.DT_DISKLESS:
6430
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6431
                                 self.op.instance_name,
6432
                                 errors.ECODE_INVAL)
6433
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6434

    
6435
    if self.op.os_type is not None:
6436
      # OS verification
6437
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6438
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6439
      instance_os = self.op.os_type
6440
    else:
6441
      instance_os = instance.os
6442

    
6443
    nodelist = list(instance.all_nodes)
6444

    
6445
    if self.op.osparams:
6446
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6447
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6448
      self.os_inst = i_osdict # the new dict (without defaults)
6449
    else:
6450
      self.os_inst = None
6451

    
6452
    self.instance = instance
6453

    
6454
  def Exec(self, feedback_fn):
6455
    """Reinstall the instance.
6456

6457
    """
6458
    inst = self.instance
6459

    
6460
    if self.op.os_type is not None:
6461
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6462
      inst.os = self.op.os_type
6463
      # Write to configuration
6464
      self.cfg.Update(inst, feedback_fn)
6465

    
6466
    _StartInstanceDisks(self, inst, None)
6467
    try:
6468
      feedback_fn("Running the instance OS create scripts...")
6469
      # FIXME: pass debug option from opcode to backend
6470
      result = self.rpc.call_instance_os_add(inst.primary_node,
6471
                                             (inst, self.os_inst), True,
6472
                                             self.op.debug_level)
6473
      result.Raise("Could not install OS for instance %s on node %s" %
6474
                   (inst.name, inst.primary_node))
6475
    finally:
6476
      _ShutdownInstanceDisks(self, inst)
6477

    
6478

    
6479
class LUInstanceRecreateDisks(LogicalUnit):
6480
  """Recreate an instance's missing disks.
6481

6482
  """
6483
  HPATH = "instance-recreate-disks"
6484
  HTYPE = constants.HTYPE_INSTANCE
6485
  REQ_BGL = False
6486

    
6487
  def CheckArguments(self):
6488
    # normalise the disk list
6489
    self.op.disks = sorted(frozenset(self.op.disks))
6490

    
6491
  def ExpandNames(self):
6492
    self._ExpandAndLockInstance()
6493
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6494
    if self.op.nodes:
6495
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6496
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6497
    else:
6498
      self.needed_locks[locking.LEVEL_NODE] = []
6499

    
6500
  def DeclareLocks(self, level):
6501
    if level == locking.LEVEL_NODE:
6502
      # if we replace the nodes, we only need to lock the old primary,
6503
      # otherwise we need to lock all nodes for disk re-creation
6504
      primary_only = bool(self.op.nodes)
6505
      self._LockInstancesNodes(primary_only=primary_only)
6506
    elif level == locking.LEVEL_NODE_RES:
6507
      # Copy node locks
6508
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6509
        self.needed_locks[locking.LEVEL_NODE][:]
6510

    
6511
  def BuildHooksEnv(self):
6512
    """Build hooks env.
6513

6514
    This runs on master, primary and secondary nodes of the instance.
6515

6516
    """
6517
    return _BuildInstanceHookEnvByObject(self, self.instance)
6518

    
6519
  def BuildHooksNodes(self):
6520
    """Build hooks nodes.
6521

6522
    """
6523
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6524
    return (nl, nl)
6525

    
6526
  def CheckPrereq(self):
6527
    """Check prerequisites.
6528

6529
    This checks that the instance is in the cluster and is not running.
6530

6531
    """
6532
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6533
    assert instance is not None, \
6534
      "Cannot retrieve locked instance %s" % self.op.instance_name
6535
    if self.op.nodes:
6536
      if len(self.op.nodes) != len(instance.all_nodes):
6537
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6538
                                   " %d replacement nodes were specified" %
6539
                                   (instance.name, len(instance.all_nodes),
6540
                                    len(self.op.nodes)),
6541
                                   errors.ECODE_INVAL)
6542
      assert instance.disk_template != constants.DT_DRBD8 or \
6543
          len(self.op.nodes) == 2
6544
      assert instance.disk_template != constants.DT_PLAIN or \
6545
          len(self.op.nodes) == 1
6546
      primary_node = self.op.nodes[0]
6547
    else:
6548
      primary_node = instance.primary_node
6549
    _CheckNodeOnline(self, primary_node)
6550

    
6551
    if instance.disk_template == constants.DT_DISKLESS:
6552
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6553
                                 self.op.instance_name, errors.ECODE_INVAL)
6554
    # if we replace nodes *and* the old primary is offline, we don't
6555
    # check
6556
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6557
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6558
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6559
    if not (self.op.nodes and old_pnode.offline):
6560
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6561
                          msg="cannot recreate disks")
6562

    
6563
    if not self.op.disks:
6564
      self.op.disks = range(len(instance.disks))
6565
    else:
6566
      for idx in self.op.disks:
6567
        if idx >= len(instance.disks):
6568
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6569
                                     errors.ECODE_INVAL)
6570
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6571
      raise errors.OpPrereqError("Can't recreate disks partially and"
6572
                                 " change the nodes at the same time",
6573
                                 errors.ECODE_INVAL)
6574
    self.instance = instance
6575

    
6576
  def Exec(self, feedback_fn):
6577
    """Recreate the disks.
6578

6579
    """
6580
    instance = self.instance
6581

    
6582
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6583
            self.owned_locks(locking.LEVEL_NODE_RES))
6584

    
6585
    to_skip = []
6586
    mods = [] # keeps track of needed logical_id changes
6587

    
6588
    for idx, disk in enumerate(instance.disks):
6589
      if idx not in self.op.disks: # disk idx has not been passed in
6590
        to_skip.append(idx)
6591
        continue
6592
      # update secondaries for disks, if needed
6593
      if self.op.nodes:
6594
        if disk.dev_type == constants.LD_DRBD8:
6595
          # need to update the nodes and minors
6596
          assert len(self.op.nodes) == 2
6597
          assert len(disk.logical_id) == 6 # otherwise disk internals
6598
                                           # have changed
6599
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6600
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6601
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6602
                    new_minors[0], new_minors[1], old_secret)
6603
          assert len(disk.logical_id) == len(new_id)
6604
          mods.append((idx, new_id))
6605

    
6606
    # now that we have passed all asserts above, we can apply the mods
6607
    # in a single run (to avoid partial changes)
6608
    for idx, new_id in mods:
6609
      instance.disks[idx].logical_id = new_id
6610

    
6611
    # change primary node, if needed
6612
    if self.op.nodes:
6613
      instance.primary_node = self.op.nodes[0]
6614
      self.LogWarning("Changing the instance's nodes, you will have to"
6615
                      " remove any disks left on the older nodes manually")
6616

    
6617
    if self.op.nodes:
6618
      self.cfg.Update(instance, feedback_fn)
6619

    
6620
    _CreateDisks(self, instance, to_skip=to_skip)
6621

    
6622

    
6623
class LUInstanceRename(LogicalUnit):
6624
  """Rename an instance.
6625

6626
  """
6627
  HPATH = "instance-rename"
6628
  HTYPE = constants.HTYPE_INSTANCE
6629

    
6630
  def CheckArguments(self):
6631
    """Check arguments.
6632

6633
    """
6634
    if self.op.ip_check and not self.op.name_check:
6635
      # TODO: make the ip check more flexible and not depend on the name check
6636
      raise errors.OpPrereqError("IP address check requires a name check",
6637
                                 errors.ECODE_INVAL)
6638

    
6639
  def BuildHooksEnv(self):
6640
    """Build hooks env.
6641

6642
    This runs on master, primary and secondary nodes of the instance.
6643

6644
    """
6645
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6646
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6647
    return env
6648

    
6649
  def BuildHooksNodes(self):
6650
    """Build hooks nodes.
6651

6652
    """
6653
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6654
    return (nl, nl)
6655

    
6656
  def CheckPrereq(self):
6657
    """Check prerequisites.
6658

6659
    This checks that the instance is in the cluster and is not running.
6660

6661
    """
6662
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6663
                                                self.op.instance_name)
6664
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6665
    assert instance is not None
6666
    _CheckNodeOnline(self, instance.primary_node)
6667
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6668
                        msg="cannot rename")
6669
    self.instance = instance
6670

    
6671
    new_name = self.op.new_name
6672
    if self.op.name_check:
6673
      hostname = netutils.GetHostname(name=new_name)
6674
      if hostname != new_name:
6675
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6676
                     hostname.name)
6677
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6678
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6679
                                    " same as given hostname '%s'") %
6680
                                    (hostname.name, self.op.new_name),
6681
                                    errors.ECODE_INVAL)
6682
      new_name = self.op.new_name = hostname.name
6683
      if (self.op.ip_check and
6684
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6685
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6686
                                   (hostname.ip, new_name),
6687
                                   errors.ECODE_NOTUNIQUE)
6688

    
6689
    instance_list = self.cfg.GetInstanceList()
6690
    if new_name in instance_list and new_name != instance.name:
6691
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6692
                                 new_name, errors.ECODE_EXISTS)
6693

    
6694
  def Exec(self, feedback_fn):
6695
    """Rename the instance.
6696

6697
    """
6698
    inst = self.instance
6699
    old_name = inst.name
6700

    
6701
    rename_file_storage = False
6702
    if (inst.disk_template in constants.DTS_FILEBASED and
6703
        self.op.new_name != inst.name):
6704
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6705
      rename_file_storage = True
6706

    
6707
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6708
    # Change the instance lock. This is definitely safe while we hold the BGL.
6709
    # Otherwise the new lock would have to be added in acquired mode.
6710
    assert self.REQ_BGL
6711
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6712
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6713

    
6714
    # re-read the instance from the configuration after rename
6715
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6716

    
6717
    if rename_file_storage:
6718
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6719
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6720
                                                     old_file_storage_dir,
6721
                                                     new_file_storage_dir)
6722
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6723
                   " (but the instance has been renamed in Ganeti)" %
6724
                   (inst.primary_node, old_file_storage_dir,
6725
                    new_file_storage_dir))
6726

    
6727
    _StartInstanceDisks(self, inst, None)
6728
    try:
6729
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6730
                                                 old_name, self.op.debug_level)
6731
      msg = result.fail_msg
6732
      if msg:
6733
        msg = ("Could not run OS rename script for instance %s on node %s"
6734
               " (but the instance has been renamed in Ganeti): %s" %
6735
               (inst.name, inst.primary_node, msg))
6736
        self.proc.LogWarning(msg)
6737
    finally:
6738
      _ShutdownInstanceDisks(self, inst)
6739

    
6740
    return inst.name
6741

    
6742

    
6743
class LUInstanceRemove(LogicalUnit):
6744
  """Remove an instance.
6745

6746
  """
6747
  HPATH = "instance-remove"
6748
  HTYPE = constants.HTYPE_INSTANCE
6749
  REQ_BGL = False
6750

    
6751
  def ExpandNames(self):
6752
    self._ExpandAndLockInstance()
6753
    self.needed_locks[locking.LEVEL_NODE] = []
6754
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6755
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6756

    
6757
  def DeclareLocks(self, level):
6758
    if level == locking.LEVEL_NODE:
6759
      self._LockInstancesNodes()
6760
    elif level == locking.LEVEL_NODE_RES:
6761
      # Copy node locks
6762
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6763
        self.needed_locks[locking.LEVEL_NODE][:]
6764

    
6765
  def BuildHooksEnv(self):
6766
    """Build hooks env.
6767

6768
    This runs on master, primary and secondary nodes of the instance.
6769

6770
    """
6771
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6772
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6773
    return env
6774

    
6775
  def BuildHooksNodes(self):
6776
    """Build hooks nodes.
6777

6778
    """
6779
    nl = [self.cfg.GetMasterNode()]
6780
    nl_post = list(self.instance.all_nodes) + nl
6781
    return (nl, nl_post)
6782

    
6783
  def CheckPrereq(self):
6784
    """Check prerequisites.
6785

6786
    This checks that the instance is in the cluster.
6787

6788
    """
6789
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6790
    assert self.instance is not None, \
6791
      "Cannot retrieve locked instance %s" % self.op.instance_name
6792

    
6793
  def Exec(self, feedback_fn):
6794
    """Remove the instance.
6795

6796
    """
6797
    instance = self.instance
6798
    logging.info("Shutting down instance %s on node %s",
6799
                 instance.name, instance.primary_node)
6800

    
6801
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6802
                                             self.op.shutdown_timeout)
6803
    msg = result.fail_msg
6804
    if msg:
6805
      if self.op.ignore_failures:
6806
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6807
      else:
6808
        raise errors.OpExecError("Could not shutdown instance %s on"
6809
                                 " node %s: %s" %
6810
                                 (instance.name, instance.primary_node, msg))
6811

    
6812
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6813
            self.owned_locks(locking.LEVEL_NODE_RES))
6814
    assert not (set(instance.all_nodes) -
6815
                self.owned_locks(locking.LEVEL_NODE)), \
6816
      "Not owning correct locks"
6817

    
6818
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6819

    
6820

    
6821
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6822
  """Utility function to remove an instance.
6823

6824
  """
6825
  logging.info("Removing block devices for instance %s", instance.name)
6826

    
6827
  if not _RemoveDisks(lu, instance):
6828
    if not ignore_failures:
6829
      raise errors.OpExecError("Can't remove instance's disks")
6830
    feedback_fn("Warning: can't remove instance's disks")
6831

    
6832
  logging.info("Removing instance %s out of cluster config", instance.name)
6833

    
6834
  lu.cfg.RemoveInstance(instance.name)
6835

    
6836
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6837
    "Instance lock removal conflict"
6838

    
6839
  # Remove lock for the instance
6840
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6841

    
6842

    
6843
class LUInstanceQuery(NoHooksLU):
6844
  """Logical unit for querying instances.
6845

6846
  """
6847
  # pylint: disable=W0142
6848
  REQ_BGL = False
6849

    
6850
  def CheckArguments(self):
6851
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6852
                             self.op.output_fields, self.op.use_locking)
6853

    
6854
  def ExpandNames(self):
6855
    self.iq.ExpandNames(self)
6856

    
6857
  def DeclareLocks(self, level):
6858
    self.iq.DeclareLocks(self, level)
6859

    
6860
  def Exec(self, feedback_fn):
6861
    return self.iq.OldStyleQuery(self)
6862

    
6863

    
6864
class LUInstanceFailover(LogicalUnit):
6865
  """Failover an instance.
6866

6867
  """
6868
  HPATH = "instance-failover"
6869
  HTYPE = constants.HTYPE_INSTANCE
6870
  REQ_BGL = False
6871

    
6872
  def CheckArguments(self):
6873
    """Check the arguments.
6874

6875
    """
6876
    self.iallocator = getattr(self.op, "iallocator", None)
6877
    self.target_node = getattr(self.op, "target_node", None)
6878

    
6879
  def ExpandNames(self):
6880
    self._ExpandAndLockInstance()
6881

    
6882
    if self.op.target_node is not None:
6883
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6884

    
6885
    self.needed_locks[locking.LEVEL_NODE] = []
6886
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6887

    
6888
    ignore_consistency = self.op.ignore_consistency
6889
    shutdown_timeout = self.op.shutdown_timeout
6890
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6891
                                       cleanup=False,
6892
                                       failover=True,
6893
                                       ignore_consistency=ignore_consistency,
6894
                                       shutdown_timeout=shutdown_timeout)
6895
    self.tasklets = [self._migrater]
6896

    
6897
  def DeclareLocks(self, level):
6898
    if level == locking.LEVEL_NODE:
6899
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6900
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6901
        if self.op.target_node is None:
6902
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6903
        else:
6904
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6905
                                                   self.op.target_node]
6906
        del self.recalculate_locks[locking.LEVEL_NODE]
6907
      else:
6908
        self._LockInstancesNodes()
6909

    
6910
  def BuildHooksEnv(self):
6911
    """Build hooks env.
6912

6913
    This runs on master, primary and secondary nodes of the instance.
6914

6915
    """
6916
    instance = self._migrater.instance
6917
    source_node = instance.primary_node
6918
    target_node = self.op.target_node
6919
    env = {
6920
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6921
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6922
      "OLD_PRIMARY": source_node,
6923
      "NEW_PRIMARY": target_node,
6924
      }
6925

    
6926
    if instance.disk_template in constants.DTS_INT_MIRROR:
6927
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6928
      env["NEW_SECONDARY"] = source_node
6929
    else:
6930
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6931

    
6932
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6933

    
6934
    return env
6935

    
6936
  def BuildHooksNodes(self):
6937
    """Build hooks nodes.
6938

6939
    """
6940
    instance = self._migrater.instance
6941
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6942
    return (nl, nl + [instance.primary_node])
6943

    
6944

    
6945
class LUInstanceMigrate(LogicalUnit):
6946
  """Migrate an instance.
6947

6948
  This is migration without shutting down, compared to the failover,
6949
  which is done with shutdown.
6950

6951
  """
6952
  HPATH = "instance-migrate"
6953
  HTYPE = constants.HTYPE_INSTANCE
6954
  REQ_BGL = False
6955

    
6956
  def ExpandNames(self):
6957
    self._ExpandAndLockInstance()
6958

    
6959
    if self.op.target_node is not None:
6960
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6961

    
6962
    self.needed_locks[locking.LEVEL_NODE] = []
6963
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6964

    
6965
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6966
                                       cleanup=self.op.cleanup,
6967
                                       failover=False,
6968
                                       fallback=self.op.allow_failover)
6969
    self.tasklets = [self._migrater]
6970

    
6971
  def DeclareLocks(self, level):
6972
    if level == locking.LEVEL_NODE:
6973
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6974
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6975
        if self.op.target_node is None:
6976
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6977
        else:
6978
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6979
                                                   self.op.target_node]
6980
        del self.recalculate_locks[locking.LEVEL_NODE]
6981
      else:
6982
        self._LockInstancesNodes()
6983

    
6984
  def BuildHooksEnv(self):
6985
    """Build hooks env.
6986

6987
    This runs on master, primary and secondary nodes of the instance.
6988

6989
    """
6990
    instance = self._migrater.instance
6991
    source_node = instance.primary_node
6992
    target_node = self.op.target_node
6993
    env = _BuildInstanceHookEnvByObject(self, instance)
6994
    env.update({
6995
      "MIGRATE_LIVE": self._migrater.live,
6996
      "MIGRATE_CLEANUP": self.op.cleanup,
6997
      "OLD_PRIMARY": source_node,
6998
      "NEW_PRIMARY": target_node,
6999
      })
7000

    
7001
    if instance.disk_template in constants.DTS_INT_MIRROR:
7002
      env["OLD_SECONDARY"] = target_node
7003
      env["NEW_SECONDARY"] = source_node
7004
    else:
7005
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7006

    
7007
    return env
7008

    
7009
  def BuildHooksNodes(self):
7010
    """Build hooks nodes.
7011

7012
    """
7013
    instance = self._migrater.instance
7014
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7015
    return (nl, nl + [instance.primary_node])
7016

    
7017

    
7018
class LUInstanceMove(LogicalUnit):
7019
  """Move an instance by data-copying.
7020

7021
  """
7022
  HPATH = "instance-move"
7023
  HTYPE = constants.HTYPE_INSTANCE
7024
  REQ_BGL = False
7025

    
7026
  def ExpandNames(self):
7027
    self._ExpandAndLockInstance()
7028
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7029
    self.op.target_node = target_node
7030
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
7031
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7032
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7033

    
7034
  def DeclareLocks(self, level):
7035
    if level == locking.LEVEL_NODE:
7036
      self._LockInstancesNodes(primary_only=True)
7037
    elif level == locking.LEVEL_NODE_RES:
7038
      # Copy node locks
7039
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7040
        self.needed_locks[locking.LEVEL_NODE][:]
7041

    
7042
  def BuildHooksEnv(self):
7043
    """Build hooks env.
7044

7045
    This runs on master, primary and secondary nodes of the instance.
7046

7047
    """
7048
    env = {
7049
      "TARGET_NODE": self.op.target_node,
7050
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7051
      }
7052
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7053
    return env
7054

    
7055
  def BuildHooksNodes(self):
7056
    """Build hooks nodes.
7057

7058
    """
7059
    nl = [
7060
      self.cfg.GetMasterNode(),
7061
      self.instance.primary_node,
7062
      self.op.target_node,
7063
      ]
7064
    return (nl, nl)
7065

    
7066
  def CheckPrereq(self):
7067
    """Check prerequisites.
7068

7069
    This checks that the instance is in the cluster.
7070

7071
    """
7072
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7073
    assert self.instance is not None, \
7074
      "Cannot retrieve locked instance %s" % self.op.instance_name
7075

    
7076
    node = self.cfg.GetNodeInfo(self.op.target_node)
7077
    assert node is not None, \
7078
      "Cannot retrieve locked node %s" % self.op.target_node
7079

    
7080
    self.target_node = target_node = node.name
7081

    
7082
    if target_node == instance.primary_node:
7083
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7084
                                 (instance.name, target_node),
7085
                                 errors.ECODE_STATE)
7086

    
7087
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7088

    
7089
    for idx, dsk in enumerate(instance.disks):
7090
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7091
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7092
                                   " cannot copy" % idx, errors.ECODE_STATE)
7093

    
7094
    _CheckNodeOnline(self, target_node)
7095
    _CheckNodeNotDrained(self, target_node)
7096
    _CheckNodeVmCapable(self, target_node)
7097

    
7098
    if instance.admin_state == constants.ADMINST_UP:
7099
      # check memory requirements on the secondary node
7100
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7101
                           instance.name, bep[constants.BE_MEMORY],
7102
                           instance.hypervisor)
7103
    else:
7104
      self.LogInfo("Not checking memory on the secondary node as"
7105
                   " instance will not be started")
7106

    
7107
    # check bridge existance
7108
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7109

    
7110
  def Exec(self, feedback_fn):
7111
    """Move an instance.
7112

7113
    The move is done by shutting it down on its present node, copying
7114
    the data over (slow) and starting it on the new node.
7115

7116
    """
7117
    instance = self.instance
7118

    
7119
    source_node = instance.primary_node
7120
    target_node = self.target_node
7121

    
7122
    self.LogInfo("Shutting down instance %s on source node %s",
7123
                 instance.name, source_node)
7124

    
7125
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7126
            self.owned_locks(locking.LEVEL_NODE_RES))
7127

    
7128
    result = self.rpc.call_instance_shutdown(source_node, instance,
7129
                                             self.op.shutdown_timeout)
7130
    msg = result.fail_msg
7131
    if msg:
7132
      if self.op.ignore_consistency:
7133
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7134
                             " Proceeding anyway. Please make sure node"
7135
                             " %s is down. Error details: %s",
7136
                             instance.name, source_node, source_node, msg)
7137
      else:
7138
        raise errors.OpExecError("Could not shutdown instance %s on"
7139
                                 " node %s: %s" %
7140
                                 (instance.name, source_node, msg))
7141

    
7142
    # create the target disks
7143
    try:
7144
      _CreateDisks(self, instance, target_node=target_node)
7145
    except errors.OpExecError:
7146
      self.LogWarning("Device creation failed, reverting...")
7147
      try:
7148
        _RemoveDisks(self, instance, target_node=target_node)
7149
      finally:
7150
        self.cfg.ReleaseDRBDMinors(instance.name)
7151
        raise
7152

    
7153
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7154

    
7155
    errs = []
7156
    # activate, get path, copy the data over
7157
    for idx, disk in enumerate(instance.disks):
7158
      self.LogInfo("Copying data for disk %d", idx)
7159
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7160
                                               instance.name, True, idx)
7161
      if result.fail_msg:
7162
        self.LogWarning("Can't assemble newly created disk %d: %s",
7163
                        idx, result.fail_msg)
7164
        errs.append(result.fail_msg)
7165
        break
7166
      dev_path = result.payload
7167
      result = self.rpc.call_blockdev_export(source_node, disk,
7168
                                             target_node, dev_path,
7169
                                             cluster_name)
7170
      if result.fail_msg:
7171
        self.LogWarning("Can't copy data over for disk %d: %s",
7172
                        idx, result.fail_msg)
7173
        errs.append(result.fail_msg)
7174
        break
7175

    
7176
    if errs:
7177
      self.LogWarning("Some disks failed to copy, aborting")
7178
      try:
7179
        _RemoveDisks(self, instance, target_node=target_node)
7180
      finally:
7181
        self.cfg.ReleaseDRBDMinors(instance.name)
7182
        raise errors.OpExecError("Errors during disk copy: %s" %
7183
                                 (",".join(errs),))
7184

    
7185
    instance.primary_node = target_node
7186
    self.cfg.Update(instance, feedback_fn)
7187

    
7188
    self.LogInfo("Removing the disks on the original node")
7189
    _RemoveDisks(self, instance, target_node=source_node)
7190

    
7191
    # Only start the instance if it's marked as up
7192
    if instance.admin_state == constants.ADMINST_UP:
7193
      self.LogInfo("Starting instance %s on node %s",
7194
                   instance.name, target_node)
7195

    
7196
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7197
                                           ignore_secondaries=True)
7198
      if not disks_ok:
7199
        _ShutdownInstanceDisks(self, instance)
7200
        raise errors.OpExecError("Can't activate the instance's disks")
7201

    
7202
      result = self.rpc.call_instance_start(target_node,
7203
                                            (instance, None, None), False)
7204
      msg = result.fail_msg
7205
      if msg:
7206
        _ShutdownInstanceDisks(self, instance)
7207
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7208
                                 (instance.name, target_node, msg))
7209

    
7210

    
7211
class LUNodeMigrate(LogicalUnit):
7212
  """Migrate all instances from a node.
7213

7214
  """
7215
  HPATH = "node-migrate"
7216
  HTYPE = constants.HTYPE_NODE
7217
  REQ_BGL = False
7218

    
7219
  def CheckArguments(self):
7220
    pass
7221

    
7222
  def ExpandNames(self):
7223
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7224

    
7225
    self.share_locks = _ShareAll()
7226
    self.needed_locks = {
7227
      locking.LEVEL_NODE: [self.op.node_name],
7228
      }
7229

    
7230
  def BuildHooksEnv(self):
7231
    """Build hooks env.
7232

7233
    This runs on the master, the primary and all the secondaries.
7234

7235
    """
7236
    return {
7237
      "NODE_NAME": self.op.node_name,
7238
      }
7239

    
7240
  def BuildHooksNodes(self):
7241
    """Build hooks nodes.
7242

7243
    """
7244
    nl = [self.cfg.GetMasterNode()]
7245
    return (nl, nl)
7246

    
7247
  def CheckPrereq(self):
7248
    pass
7249

    
7250
  def Exec(self, feedback_fn):
7251
    # Prepare jobs for migration instances
7252
    jobs = [
7253
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7254
                                 mode=self.op.mode,
7255
                                 live=self.op.live,
7256
                                 iallocator=self.op.iallocator,
7257
                                 target_node=self.op.target_node)]
7258
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7259
      ]
7260

    
7261
    # TODO: Run iallocator in this opcode and pass correct placement options to
7262
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7263
    # running the iallocator and the actual migration, a good consistency model
7264
    # will have to be found.
7265

    
7266
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7267
            frozenset([self.op.node_name]))
7268

    
7269
    return ResultWithJobs(jobs)
7270

    
7271

    
7272
class TLMigrateInstance(Tasklet):
7273
  """Tasklet class for instance migration.
7274

7275
  @type live: boolean
7276
  @ivar live: whether the migration will be done live or non-live;
7277
      this variable is initalized only after CheckPrereq has run
7278
  @type cleanup: boolean
7279
  @ivar cleanup: Wheater we cleanup from a failed migration
7280
  @type iallocator: string
7281
  @ivar iallocator: The iallocator used to determine target_node
7282
  @type target_node: string
7283
  @ivar target_node: If given, the target_node to reallocate the instance to
7284
  @type failover: boolean
7285
  @ivar failover: Whether operation results in failover or migration
7286
  @type fallback: boolean
7287
  @ivar fallback: Whether fallback to failover is allowed if migration not
7288
                  possible
7289
  @type ignore_consistency: boolean
7290
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7291
                            and target node
7292
  @type shutdown_timeout: int
7293
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7294

7295
  """
7296

    
7297
  # Constants
7298
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7299
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7300

    
7301
  def __init__(self, lu, instance_name, cleanup=False,
7302
               failover=False, fallback=False,
7303
               ignore_consistency=False,
7304
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7305
    """Initializes this class.
7306

7307
    """
7308
    Tasklet.__init__(self, lu)
7309

    
7310
    # Parameters
7311
    self.instance_name = instance_name
7312
    self.cleanup = cleanup
7313
    self.live = False # will be overridden later
7314
    self.failover = failover
7315
    self.fallback = fallback
7316
    self.ignore_consistency = ignore_consistency
7317
    self.shutdown_timeout = shutdown_timeout
7318

    
7319
  def CheckPrereq(self):
7320
    """Check prerequisites.
7321

7322
    This checks that the instance is in the cluster.
7323

7324
    """
7325
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7326
    instance = self.cfg.GetInstanceInfo(instance_name)
7327
    assert instance is not None
7328
    self.instance = instance
7329

    
7330
    if (not self.cleanup and
7331
        not instance.admin_state == constants.ADMINST_UP and
7332
        not self.failover and self.fallback):
7333
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7334
                      " switching to failover")
7335
      self.failover = True
7336

    
7337
    if instance.disk_template not in constants.DTS_MIRRORED:
7338
      if self.failover:
7339
        text = "failovers"
7340
      else:
7341
        text = "migrations"
7342
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7343
                                 " %s" % (instance.disk_template, text),
7344
                                 errors.ECODE_STATE)
7345

    
7346
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7347
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7348

    
7349
      if self.lu.op.iallocator:
7350
        self._RunAllocator()
7351
      else:
7352
        # We set set self.target_node as it is required by
7353
        # BuildHooksEnv
7354
        self.target_node = self.lu.op.target_node
7355

    
7356
      # self.target_node is already populated, either directly or by the
7357
      # iallocator run
7358
      target_node = self.target_node
7359
      if self.target_node == instance.primary_node:
7360
        raise errors.OpPrereqError("Cannot migrate instance %s"
7361
                                   " to its primary (%s)" %
7362
                                   (instance.name, instance.primary_node))
7363

    
7364
      if len(self.lu.tasklets) == 1:
7365
        # It is safe to release locks only when we're the only tasklet
7366
        # in the LU
7367
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7368
                      keep=[instance.primary_node, self.target_node])
7369

    
7370
    else:
7371
      secondary_nodes = instance.secondary_nodes
7372
      if not secondary_nodes:
7373
        raise errors.ConfigurationError("No secondary node but using"
7374
                                        " %s disk template" %
7375
                                        instance.disk_template)
7376
      target_node = secondary_nodes[0]
7377
      if self.lu.op.iallocator or (self.lu.op.target_node and
7378
                                   self.lu.op.target_node != target_node):
7379
        if self.failover:
7380
          text = "failed over"
7381
        else:
7382
          text = "migrated"
7383
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7384
                                   " be %s to arbitrary nodes"
7385
                                   " (neither an iallocator nor a target"
7386
                                   " node can be passed)" %
7387
                                   (instance.disk_template, text),
7388
                                   errors.ECODE_INVAL)
7389

    
7390
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7391

    
7392
    # check memory requirements on the secondary node
7393
    if not self.failover or instance.admin_state == constants.ADMINST_UP:
7394
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7395
                           instance.name, i_be[constants.BE_MEMORY],
7396
                           instance.hypervisor)
7397
    else:
7398
      self.lu.LogInfo("Not checking memory on the secondary node as"
7399
                      " instance will not be started")
7400

    
7401
    # check bridge existance
7402
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7403

    
7404
    if not self.cleanup:
7405
      _CheckNodeNotDrained(self.lu, target_node)
7406
      if not self.failover:
7407
        result = self.rpc.call_instance_migratable(instance.primary_node,
7408
                                                   instance)
7409
        if result.fail_msg and self.fallback:
7410
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7411
                          " failover")
7412
          self.failover = True
7413
        else:
7414
          result.Raise("Can't migrate, please use failover",
7415
                       prereq=True, ecode=errors.ECODE_STATE)
7416

    
7417
    assert not (self.failover and self.cleanup)
7418

    
7419
    if not self.failover:
7420
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7421
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7422
                                   " parameters are accepted",
7423
                                   errors.ECODE_INVAL)
7424
      if self.lu.op.live is not None:
7425
        if self.lu.op.live:
7426
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7427
        else:
7428
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7429
        # reset the 'live' parameter to None so that repeated
7430
        # invocations of CheckPrereq do not raise an exception
7431
        self.lu.op.live = None
7432
      elif self.lu.op.mode is None:
7433
        # read the default value from the hypervisor
7434
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7435
                                                skip_globals=False)
7436
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7437

    
7438
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7439
    else:
7440
      # Failover is never live
7441
      self.live = False
7442

    
7443
  def _RunAllocator(self):
7444
    """Run the allocator based on input opcode.
7445

7446
    """
7447
    ial = IAllocator(self.cfg, self.rpc,
7448
                     mode=constants.IALLOCATOR_MODE_RELOC,
7449
                     name=self.instance_name,
7450
                     # TODO See why hail breaks with a single node below
7451
                     relocate_from=[self.instance.primary_node,
7452
                                    self.instance.primary_node],
7453
                     )
7454

    
7455
    ial.Run(self.lu.op.iallocator)
7456

    
7457
    if not ial.success:
7458
      raise errors.OpPrereqError("Can't compute nodes using"
7459
                                 " iallocator '%s': %s" %
7460
                                 (self.lu.op.iallocator, ial.info),
7461
                                 errors.ECODE_NORES)
7462
    if len(ial.result) != ial.required_nodes:
7463
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7464
                                 " of nodes (%s), required %s" %
7465
                                 (self.lu.op.iallocator, len(ial.result),
7466
                                  ial.required_nodes), errors.ECODE_FAULT)
7467
    self.target_node = ial.result[0]
7468
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7469
                 self.instance_name, self.lu.op.iallocator,
7470
                 utils.CommaJoin(ial.result))
7471

    
7472
  def _WaitUntilSync(self):
7473
    """Poll with custom rpc for disk sync.
7474

7475
    This uses our own step-based rpc call.
7476

7477
    """
7478
    self.feedback_fn("* wait until resync is done")
7479
    all_done = False
7480
    while not all_done:
7481
      all_done = True
7482
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7483
                                            self.nodes_ip,
7484
                                            self.instance.disks)
7485
      min_percent = 100
7486
      for node, nres in result.items():
7487
        nres.Raise("Cannot resync disks on node %s" % node)
7488
        node_done, node_percent = nres.payload
7489
        all_done = all_done and node_done
7490
        if node_percent is not None:
7491
          min_percent = min(min_percent, node_percent)
7492
      if not all_done:
7493
        if min_percent < 100:
7494
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7495
        time.sleep(2)
7496

    
7497
  def _EnsureSecondary(self, node):
7498
    """Demote a node to secondary.
7499

7500
    """
7501
    self.feedback_fn("* switching node %s to secondary mode" % node)
7502

    
7503
    for dev in self.instance.disks:
7504
      self.cfg.SetDiskID(dev, node)
7505

    
7506
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7507
                                          self.instance.disks)
7508
    result.Raise("Cannot change disk to secondary on node %s" % node)
7509

    
7510
  def _GoStandalone(self):
7511
    """Disconnect from the network.
7512

7513
    """
7514
    self.feedback_fn("* changing into standalone mode")
7515
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7516
                                               self.instance.disks)
7517
    for node, nres in result.items():
7518
      nres.Raise("Cannot disconnect disks node %s" % node)
7519

    
7520
  def _GoReconnect(self, multimaster):
7521
    """Reconnect to the network.
7522

7523
    """
7524
    if multimaster:
7525
      msg = "dual-master"
7526
    else:
7527
      msg = "single-master"
7528
    self.feedback_fn("* changing disks into %s mode" % msg)
7529
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7530
                                           self.instance.disks,
7531
                                           self.instance.name, multimaster)
7532
    for node, nres in result.items():
7533
      nres.Raise("Cannot change disks config on node %s" % node)
7534

    
7535
  def _ExecCleanup(self):
7536
    """Try to cleanup after a failed migration.
7537

7538
    The cleanup is done by:
7539
      - check that the instance is running only on one node
7540
        (and update the config if needed)
7541
      - change disks on its secondary node to secondary
7542
      - wait until disks are fully synchronized
7543
      - disconnect from the network
7544
      - change disks into single-master mode
7545
      - wait again until disks are fully synchronized
7546

7547
    """
7548
    instance = self.instance
7549
    target_node = self.target_node
7550
    source_node = self.source_node
7551

    
7552
    # check running on only one node
7553
    self.feedback_fn("* checking where the instance actually runs"
7554
                     " (if this hangs, the hypervisor might be in"
7555
                     " a bad state)")
7556
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7557
    for node, result in ins_l.items():
7558
      result.Raise("Can't contact node %s" % node)
7559

    
7560
    runningon_source = instance.name in ins_l[source_node].payload
7561
    runningon_target = instance.name in ins_l[target_node].payload
7562

    
7563
    if runningon_source and runningon_target:
7564
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7565
                               " or the hypervisor is confused; you will have"
7566
                               " to ensure manually that it runs only on one"
7567
                               " and restart this operation")
7568

    
7569
    if not (runningon_source or runningon_target):
7570
      raise errors.OpExecError("Instance does not seem to be running at all;"
7571
                               " in this case it's safer to repair by"
7572
                               " running 'gnt-instance stop' to ensure disk"
7573
                               " shutdown, and then restarting it")
7574

    
7575
    if runningon_target:
7576
      # the migration has actually succeeded, we need to update the config
7577
      self.feedback_fn("* instance running on secondary node (%s),"
7578
                       " updating config" % target_node)
7579
      instance.primary_node = target_node
7580
      self.cfg.Update(instance, self.feedback_fn)
7581
      demoted_node = source_node
7582
    else:
7583
      self.feedback_fn("* instance confirmed to be running on its"
7584
                       " primary node (%s)" % source_node)
7585
      demoted_node = target_node
7586

    
7587
    if instance.disk_template in constants.DTS_INT_MIRROR:
7588
      self._EnsureSecondary(demoted_node)
7589
      try:
7590
        self._WaitUntilSync()
7591
      except errors.OpExecError:
7592
        # we ignore here errors, since if the device is standalone, it
7593
        # won't be able to sync
7594
        pass
7595
      self._GoStandalone()
7596
      self._GoReconnect(False)
7597
      self._WaitUntilSync()
7598

    
7599
    self.feedback_fn("* done")
7600

    
7601
  def _RevertDiskStatus(self):
7602
    """Try to revert the disk status after a failed migration.
7603

7604
    """
7605
    target_node = self.target_node
7606
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7607
      return
7608

    
7609
    try:
7610
      self._EnsureSecondary(target_node)
7611
      self._GoStandalone()
7612
      self._GoReconnect(False)
7613
      self._WaitUntilSync()
7614
    except errors.OpExecError, err:
7615
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7616
                         " please try to recover the instance manually;"
7617
                         " error '%s'" % str(err))
7618

    
7619
  def _AbortMigration(self):
7620
    """Call the hypervisor code to abort a started migration.
7621

7622
    """
7623
    instance = self.instance
7624
    target_node = self.target_node
7625
    source_node = self.source_node
7626
    migration_info = self.migration_info
7627

    
7628
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7629
                                                                 instance,
7630
                                                                 migration_info,
7631
                                                                 False)
7632
    abort_msg = abort_result.fail_msg
7633
    if abort_msg:
7634
      logging.error("Aborting migration failed on target node %s: %s",
7635
                    target_node, abort_msg)
7636
      # Don't raise an exception here, as we stil have to try to revert the
7637
      # disk status, even if this step failed.
7638

    
7639
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7640
        instance, False, self.live)
7641
    abort_msg = abort_result.fail_msg
7642
    if abort_msg:
7643
      logging.error("Aborting migration failed on source node %s: %s",
7644
                    source_node, abort_msg)
7645

    
7646
  def _ExecMigration(self):
7647
    """Migrate an instance.
7648

7649
    The migrate is done by:
7650
      - change the disks into dual-master mode
7651
      - wait until disks are fully synchronized again
7652
      - migrate the instance
7653
      - change disks on the new secondary node (the old primary) to secondary
7654
      - wait until disks are fully synchronized
7655
      - change disks into single-master mode
7656

7657
    """
7658
    instance = self.instance
7659
    target_node = self.target_node
7660
    source_node = self.source_node
7661

    
7662
    # Check for hypervisor version mismatch and warn the user.
7663
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7664
                                       None, self.instance.hypervisor)
7665
    src_info = nodeinfo[source_node]
7666
    dst_info = nodeinfo[target_node]
7667

    
7668
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7669
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7670
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7671
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7672
      if src_version != dst_version:
7673
        self.feedback_fn("* warning: hypervisor version mismatch between"
7674
                         " source (%s) and target (%s) node" %
7675
                         (src_version, dst_version))
7676

    
7677
    self.feedback_fn("* checking disk consistency between source and target")
7678
    for dev in instance.disks:
7679
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7680
        raise errors.OpExecError("Disk %s is degraded or not fully"
7681
                                 " synchronized on target node,"
7682
                                 " aborting migration" % dev.iv_name)
7683

    
7684
    # First get the migration information from the remote node
7685
    result = self.rpc.call_migration_info(source_node, instance)
7686
    msg = result.fail_msg
7687
    if msg:
7688
      log_err = ("Failed fetching source migration information from %s: %s" %
7689
                 (source_node, msg))
7690
      logging.error(log_err)
7691
      raise errors.OpExecError(log_err)
7692

    
7693
    self.migration_info = migration_info = result.payload
7694

    
7695
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7696
      # Then switch the disks to master/master mode
7697
      self._EnsureSecondary(target_node)
7698
      self._GoStandalone()
7699
      self._GoReconnect(True)
7700
      self._WaitUntilSync()
7701

    
7702
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7703
    result = self.rpc.call_accept_instance(target_node,
7704
                                           instance,
7705
                                           migration_info,
7706
                                           self.nodes_ip[target_node])
7707

    
7708
    msg = result.fail_msg
7709
    if msg:
7710
      logging.error("Instance pre-migration failed, trying to revert"
7711
                    " disk status: %s", msg)
7712
      self.feedback_fn("Pre-migration failed, aborting")
7713
      self._AbortMigration()
7714
      self._RevertDiskStatus()
7715
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7716
                               (instance.name, msg))
7717

    
7718
    self.feedback_fn("* migrating instance to %s" % target_node)
7719
    result = self.rpc.call_instance_migrate(source_node, instance,
7720
                                            self.nodes_ip[target_node],
7721
                                            self.live)
7722
    msg = result.fail_msg
7723
    if msg:
7724
      logging.error("Instance migration failed, trying to revert"
7725
                    " disk status: %s", msg)
7726
      self.feedback_fn("Migration failed, aborting")
7727
      self._AbortMigration()
7728
      self._RevertDiskStatus()
7729
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7730
                               (instance.name, msg))
7731

    
7732
    self.feedback_fn("* starting memory transfer")
7733
    last_feedback = time.time()
7734
    while True:
7735
      result = self.rpc.call_instance_get_migration_status(source_node,
7736
                                                           instance)
7737
      msg = result.fail_msg
7738
      ms = result.payload   # MigrationStatus instance
7739
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7740
        logging.error("Instance migration failed, trying to revert"
7741
                      " disk status: %s", msg)
7742
        self.feedback_fn("Migration failed, aborting")
7743
        self._AbortMigration()
7744
        self._RevertDiskStatus()
7745
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7746
                                 (instance.name, msg))
7747

    
7748
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7749
        self.feedback_fn("* memory transfer complete")
7750
        break
7751

    
7752
      if (utils.TimeoutExpired(last_feedback,
7753
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7754
          ms.transferred_ram is not None):
7755
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7756
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7757
        last_feedback = time.time()
7758

    
7759
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7760

    
7761
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7762
                                                           instance,
7763
                                                           True,
7764
                                                           self.live)
7765
    msg = result.fail_msg
7766
    if msg:
7767
      logging.error("Instance migration succeeded, but finalization failed"
7768
                    " on the source node: %s", msg)
7769
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7770
                               msg)
7771

    
7772
    instance.primary_node = target_node
7773

    
7774
    # distribute new instance config to the other nodes
7775
    self.cfg.Update(instance, self.feedback_fn)
7776

    
7777
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7778
                                                           instance,
7779
                                                           migration_info,
7780
                                                           True)
7781
    msg = result.fail_msg
7782
    if msg:
7783
      logging.error("Instance migration succeeded, but finalization failed"
7784
                    " on the target node: %s", msg)
7785
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7786
                               msg)
7787

    
7788
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7789
      self._EnsureSecondary(source_node)
7790
      self._WaitUntilSync()
7791
      self._GoStandalone()
7792
      self._GoReconnect(False)
7793
      self._WaitUntilSync()
7794

    
7795
    self.feedback_fn("* done")
7796

    
7797
  def _ExecFailover(self):
7798
    """Failover an instance.
7799

7800
    The failover is done by shutting it down on its present node and
7801
    starting it on the secondary.
7802

7803
    """
7804
    instance = self.instance
7805
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7806

    
7807
    source_node = instance.primary_node
7808
    target_node = self.target_node
7809

    
7810
    if instance.admin_state == constants.ADMINST_UP:
7811
      self.feedback_fn("* checking disk consistency between source and target")
7812
      for dev in instance.disks:
7813
        # for drbd, these are drbd over lvm
7814
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7815
          if primary_node.offline:
7816
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7817
                             " target node %s" %
7818
                             (primary_node.name, dev.iv_name, target_node))
7819
          elif not self.ignore_consistency:
7820
            raise errors.OpExecError("Disk %s is degraded on target node,"
7821
                                     " aborting failover" % dev.iv_name)
7822
    else:
7823
      self.feedback_fn("* not checking disk consistency as instance is not"
7824
                       " running")
7825

    
7826
    self.feedback_fn("* shutting down instance on source node")
7827
    logging.info("Shutting down instance %s on node %s",
7828
                 instance.name, source_node)
7829

    
7830
    result = self.rpc.call_instance_shutdown(source_node, instance,
7831
                                             self.shutdown_timeout)
7832
    msg = result.fail_msg
7833
    if msg:
7834
      if self.ignore_consistency or primary_node.offline:
7835
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7836
                           " proceeding anyway; please make sure node"
7837
                           " %s is down; error details: %s",
7838
                           instance.name, source_node, source_node, msg)
7839
      else:
7840
        raise errors.OpExecError("Could not shutdown instance %s on"
7841
                                 " node %s: %s" %
7842
                                 (instance.name, source_node, msg))
7843

    
7844
    self.feedback_fn("* deactivating the instance's disks on source node")
7845
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7846
      raise errors.OpExecError("Can't shut down the instance's disks")
7847

    
7848
    instance.primary_node = target_node
7849
    # distribute new instance config to the other nodes
7850
    self.cfg.Update(instance, self.feedback_fn)
7851

    
7852
    # Only start the instance if it's marked as up
7853
    if instance.admin_state == constants.ADMINST_UP:
7854
      self.feedback_fn("* activating the instance's disks on target node %s" %
7855
                       target_node)
7856
      logging.info("Starting instance %s on node %s",
7857
                   instance.name, target_node)
7858

    
7859
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7860
                                           ignore_secondaries=True)
7861
      if not disks_ok:
7862
        _ShutdownInstanceDisks(self.lu, instance)
7863
        raise errors.OpExecError("Can't activate the instance's disks")
7864

    
7865
      self.feedback_fn("* starting the instance on the target node %s" %
7866
                       target_node)
7867
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7868
                                            False)
7869
      msg = result.fail_msg
7870
      if msg:
7871
        _ShutdownInstanceDisks(self.lu, instance)
7872
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7873
                                 (instance.name, target_node, msg))
7874

    
7875
  def Exec(self, feedback_fn):
7876
    """Perform the migration.
7877

7878
    """
7879
    self.feedback_fn = feedback_fn
7880
    self.source_node = self.instance.primary_node
7881

    
7882
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7883
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7884
      self.target_node = self.instance.secondary_nodes[0]
7885
      # Otherwise self.target_node has been populated either
7886
      # directly, or through an iallocator.
7887

    
7888
    self.all_nodes = [self.source_node, self.target_node]
7889
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7890
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7891

    
7892
    if self.failover:
7893
      feedback_fn("Failover instance %s" % self.instance.name)
7894
      self._ExecFailover()
7895
    else:
7896
      feedback_fn("Migrating instance %s" % self.instance.name)
7897

    
7898
      if self.cleanup:
7899
        return self._ExecCleanup()
7900
      else:
7901
        return self._ExecMigration()
7902

    
7903

    
7904
def _CreateBlockDev(lu, node, instance, device, force_create,
7905
                    info, force_open):
7906
  """Create a tree of block devices on a given node.
7907

7908
  If this device type has to be created on secondaries, create it and
7909
  all its children.
7910

7911
  If not, just recurse to children keeping the same 'force' value.
7912

7913
  @param lu: the lu on whose behalf we execute
7914
  @param node: the node on which to create the device
7915
  @type instance: L{objects.Instance}
7916
  @param instance: the instance which owns the device
7917
  @type device: L{objects.Disk}
7918
  @param device: the device to create
7919
  @type force_create: boolean
7920
  @param force_create: whether to force creation of this device; this
7921
      will be change to True whenever we find a device which has
7922
      CreateOnSecondary() attribute
7923
  @param info: the extra 'metadata' we should attach to the device
7924
      (this will be represented as a LVM tag)
7925
  @type force_open: boolean
7926
  @param force_open: this parameter will be passes to the
7927
      L{backend.BlockdevCreate} function where it specifies
7928
      whether we run on primary or not, and it affects both
7929
      the child assembly and the device own Open() execution
7930

7931
  """
7932
  if device.CreateOnSecondary():
7933
    force_create = True
7934

    
7935
  if device.children:
7936
    for child in device.children:
7937
      _CreateBlockDev(lu, node, instance, child, force_create,
7938
                      info, force_open)
7939

    
7940
  if not force_create:
7941
    return
7942

    
7943
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7944

    
7945

    
7946
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7947
  """Create a single block device on a given node.
7948

7949
  This will not recurse over children of the device, so they must be
7950
  created in advance.
7951

7952
  @param lu: the lu on whose behalf we execute
7953
  @param node: the node on which to create the device
7954
  @type instance: L{objects.Instance}
7955
  @param instance: the instance which owns the device
7956
  @type device: L{objects.Disk}
7957
  @param device: the device to create
7958
  @param info: the extra 'metadata' we should attach to the device
7959
      (this will be represented as a LVM tag)
7960
  @type force_open: boolean
7961
  @param force_open: this parameter will be passes to the
7962
      L{backend.BlockdevCreate} function where it specifies
7963
      whether we run on primary or not, and it affects both
7964
      the child assembly and the device own Open() execution
7965

7966
  """
7967
  lu.cfg.SetDiskID(device, node)
7968
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7969
                                       instance.name, force_open, info)
7970
  result.Raise("Can't create block device %s on"
7971
               " node %s for instance %s" % (device, node, instance.name))
7972
  if device.physical_id is None:
7973
    device.physical_id = result.payload
7974

    
7975

    
7976
def _GenerateUniqueNames(lu, exts):
7977
  """Generate a suitable LV name.
7978

7979
  This will generate a logical volume name for the given instance.
7980

7981
  """
7982
  results = []
7983
  for val in exts:
7984
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7985
    results.append("%s%s" % (new_id, val))
7986
  return results
7987

    
7988

    
7989
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7990
                         iv_name, p_minor, s_minor):
7991
  """Generate a drbd8 device complete with its children.
7992

7993
  """
7994
  assert len(vgnames) == len(names) == 2
7995
  port = lu.cfg.AllocatePort()
7996
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7997
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7998
                          logical_id=(vgnames[0], names[0]))
7999
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8000
                          logical_id=(vgnames[1], names[1]))
8001
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8002
                          logical_id=(primary, secondary, port,
8003
                                      p_minor, s_minor,
8004
                                      shared_secret),
8005
                          children=[dev_data, dev_meta],
8006
                          iv_name=iv_name)
8007
  return drbd_dev
8008

    
8009

    
8010
def _GenerateDiskTemplate(lu, template_name,
8011
                          instance_name, primary_node,
8012
                          secondary_nodes, disk_info,
8013
                          file_storage_dir, file_driver,
8014
                          base_index, feedback_fn):
8015
  """Generate the entire disk layout for a given template type.
8016

8017
  """
8018
  #TODO: compute space requirements
8019

    
8020
  vgname = lu.cfg.GetVGName()
8021
  disk_count = len(disk_info)
8022
  disks = []
8023
  if template_name == constants.DT_DISKLESS:
8024
    pass
8025
  elif template_name == constants.DT_PLAIN:
8026
    if len(secondary_nodes) != 0:
8027
      raise errors.ProgrammerError("Wrong template configuration")
8028

    
8029
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8030
                                      for i in range(disk_count)])
8031
    for idx, disk in enumerate(disk_info):
8032
      disk_index = idx + base_index
8033
      vg = disk.get(constants.IDISK_VG, vgname)
8034
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8035
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
8036
                              size=disk[constants.IDISK_SIZE],
8037
                              logical_id=(vg, names[idx]),
8038
                              iv_name="disk/%d" % disk_index,
8039
                              mode=disk[constants.IDISK_MODE])
8040
      disks.append(disk_dev)
8041
  elif template_name == constants.DT_DRBD8:
8042
    if len(secondary_nodes) != 1:
8043
      raise errors.ProgrammerError("Wrong template configuration")
8044
    remote_node = secondary_nodes[0]
8045
    minors = lu.cfg.AllocateDRBDMinor(
8046
      [primary_node, remote_node] * len(disk_info), instance_name)
8047

    
8048
    names = []
8049
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8050
                                               for i in range(disk_count)]):
8051
      names.append(lv_prefix + "_data")
8052
      names.append(lv_prefix + "_meta")
8053
    for idx, disk in enumerate(disk_info):
8054
      disk_index = idx + base_index
8055
      data_vg = disk.get(constants.IDISK_VG, vgname)
8056
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8057
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8058
                                      disk[constants.IDISK_SIZE],
8059
                                      [data_vg, meta_vg],
8060
                                      names[idx * 2:idx * 2 + 2],
8061
                                      "disk/%d" % disk_index,
8062
                                      minors[idx * 2], minors[idx * 2 + 1])
8063
      disk_dev.mode = disk[constants.IDISK_MODE]
8064
      disks.append(disk_dev)
8065
  elif template_name == constants.DT_FILE:
8066
    if len(secondary_nodes) != 0:
8067
      raise errors.ProgrammerError("Wrong template configuration")
8068

    
8069
    opcodes.RequireFileStorage()
8070

    
8071
    for idx, disk in enumerate(disk_info):
8072
      disk_index = idx + base_index
8073
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8074
                              size=disk[constants.IDISK_SIZE],
8075
                              iv_name="disk/%d" % disk_index,
8076
                              logical_id=(file_driver,
8077
                                          "%s/disk%d" % (file_storage_dir,
8078
                                                         disk_index)),
8079
                              mode=disk[constants.IDISK_MODE])
8080
      disks.append(disk_dev)
8081
  elif template_name == constants.DT_SHARED_FILE:
8082
    if len(secondary_nodes) != 0:
8083
      raise errors.ProgrammerError("Wrong template configuration")
8084

    
8085
    opcodes.RequireSharedFileStorage()
8086

    
8087
    for idx, disk in enumerate(disk_info):
8088
      disk_index = idx + base_index
8089
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8090
                              size=disk[constants.IDISK_SIZE],
8091
                              iv_name="disk/%d" % disk_index,
8092
                              logical_id=(file_driver,
8093
                                          "%s/disk%d" % (file_storage_dir,
8094
                                                         disk_index)),
8095
                              mode=disk[constants.IDISK_MODE])
8096
      disks.append(disk_dev)
8097
  elif template_name == constants.DT_BLOCK:
8098
    if len(secondary_nodes) != 0:
8099
      raise errors.ProgrammerError("Wrong template configuration")
8100

    
8101
    for idx, disk in enumerate(disk_info):
8102
      disk_index = idx + base_index
8103
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8104
                              size=disk[constants.IDISK_SIZE],
8105
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8106
                                          disk[constants.IDISK_ADOPT]),
8107
                              iv_name="disk/%d" % disk_index,
8108
                              mode=disk[constants.IDISK_MODE])
8109
      disks.append(disk_dev)
8110

    
8111
  else:
8112
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8113
  return disks
8114

    
8115

    
8116
def _GetInstanceInfoText(instance):
8117
  """Compute that text that should be added to the disk's metadata.
8118

8119
  """
8120
  return "originstname+%s" % instance.name
8121

    
8122

    
8123
def _CalcEta(time_taken, written, total_size):
8124
  """Calculates the ETA based on size written and total size.
8125

8126
  @param time_taken: The time taken so far
8127
  @param written: amount written so far
8128
  @param total_size: The total size of data to be written
8129
  @return: The remaining time in seconds
8130

8131
  """
8132
  avg_time = time_taken / float(written)
8133
  return (total_size - written) * avg_time
8134

    
8135

    
8136
def _WipeDisks(lu, instance):
8137
  """Wipes instance disks.
8138

8139
  @type lu: L{LogicalUnit}
8140
  @param lu: the logical unit on whose behalf we execute
8141
  @type instance: L{objects.Instance}
8142
  @param instance: the instance whose disks we should create
8143
  @return: the success of the wipe
8144

8145
  """
8146
  node = instance.primary_node
8147

    
8148
  for device in instance.disks:
8149
    lu.cfg.SetDiskID(device, node)
8150

    
8151
  logging.info("Pause sync of instance %s disks", instance.name)
8152
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8153

    
8154
  for idx, success in enumerate(result.payload):
8155
    if not success:
8156
      logging.warn("pause-sync of instance %s for disks %d failed",
8157
                   instance.name, idx)
8158

    
8159
  try:
8160
    for idx, device in enumerate(instance.disks):
8161
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8162
      # MAX_WIPE_CHUNK at max
8163
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8164
                            constants.MIN_WIPE_CHUNK_PERCENT)
8165
      # we _must_ make this an int, otherwise rounding errors will
8166
      # occur
8167
      wipe_chunk_size = int(wipe_chunk_size)
8168

    
8169
      lu.LogInfo("* Wiping disk %d", idx)
8170
      logging.info("Wiping disk %d for instance %s, node %s using"
8171
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8172

    
8173
      offset = 0
8174
      size = device.size
8175
      last_output = 0
8176
      start_time = time.time()
8177

    
8178
      while offset < size:
8179
        wipe_size = min(wipe_chunk_size, size - offset)
8180
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8181
                      idx, offset, wipe_size)
8182
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8183
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8184
                     (idx, offset, wipe_size))
8185
        now = time.time()
8186
        offset += wipe_size
8187
        if now - last_output >= 60:
8188
          eta = _CalcEta(now - start_time, offset, size)
8189
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8190
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8191
          last_output = now
8192
  finally:
8193
    logging.info("Resume sync of instance %s disks", instance.name)
8194

    
8195
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8196

    
8197
    for idx, success in enumerate(result.payload):
8198
      if not success:
8199
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8200
                      " look at the status and troubleshoot the issue", idx)
8201
        logging.warn("resume-sync of instance %s for disks %d failed",
8202
                     instance.name, idx)
8203

    
8204

    
8205
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8206
  """Create all disks for an instance.
8207

8208
  This abstracts away some work from AddInstance.
8209

8210
  @type lu: L{LogicalUnit}
8211
  @param lu: the logical unit on whose behalf we execute
8212
  @type instance: L{objects.Instance}
8213
  @param instance: the instance whose disks we should create
8214
  @type to_skip: list
8215
  @param to_skip: list of indices to skip
8216
  @type target_node: string
8217
  @param target_node: if passed, overrides the target node for creation
8218
  @rtype: boolean
8219
  @return: the success of the creation
8220

8221
  """
8222
  info = _GetInstanceInfoText(instance)
8223
  if target_node is None:
8224
    pnode = instance.primary_node
8225
    all_nodes = instance.all_nodes
8226
  else:
8227
    pnode = target_node
8228
    all_nodes = [pnode]
8229

    
8230
  if instance.disk_template in constants.DTS_FILEBASED:
8231
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8232
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8233

    
8234
    result.Raise("Failed to create directory '%s' on"
8235
                 " node %s" % (file_storage_dir, pnode))
8236

    
8237
  # Note: this needs to be kept in sync with adding of disks in
8238
  # LUInstanceSetParams
8239
  for idx, device in enumerate(instance.disks):
8240
    if to_skip and idx in to_skip:
8241
      continue
8242
    logging.info("Creating volume %s for instance %s",
8243
                 device.iv_name, instance.name)
8244
    #HARDCODE
8245
    for node in all_nodes:
8246
      f_create = node == pnode
8247
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8248

    
8249

    
8250
def _RemoveDisks(lu, instance, target_node=None):
8251
  """Remove all disks for an instance.
8252

8253
  This abstracts away some work from `AddInstance()` and
8254
  `RemoveInstance()`. Note that in case some of the devices couldn't
8255
  be removed, the removal will continue with the other ones (compare
8256
  with `_CreateDisks()`).
8257

8258
  @type lu: L{LogicalUnit}
8259
  @param lu: the logical unit on whose behalf we execute
8260
  @type instance: L{objects.Instance}
8261
  @param instance: the instance whose disks we should remove
8262
  @type target_node: string
8263
  @param target_node: used to override the node on which to remove the disks
8264
  @rtype: boolean
8265
  @return: the success of the removal
8266

8267
  """
8268
  logging.info("Removing block devices for instance %s", instance.name)
8269

    
8270
  all_result = True
8271
  for device in instance.disks:
8272
    if target_node:
8273
      edata = [(target_node, device)]
8274
    else:
8275
      edata = device.ComputeNodeTree(instance.primary_node)
8276
    for node, disk in edata:
8277
      lu.cfg.SetDiskID(disk, node)
8278
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8279
      if msg:
8280
        lu.LogWarning("Could not remove block device %s on node %s,"
8281
                      " continuing anyway: %s", device.iv_name, node, msg)
8282
        all_result = False
8283

    
8284
  if instance.disk_template == constants.DT_FILE:
8285
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8286
    if target_node:
8287
      tgt = target_node
8288
    else:
8289
      tgt = instance.primary_node
8290
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8291
    if result.fail_msg:
8292
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8293
                    file_storage_dir, instance.primary_node, result.fail_msg)
8294
      all_result = False
8295

    
8296
  return all_result
8297

    
8298

    
8299
def _ComputeDiskSizePerVG(disk_template, disks):
8300
  """Compute disk size requirements in the volume group
8301

8302
  """
8303
  def _compute(disks, payload):
8304
    """Universal algorithm.
8305

8306
    """
8307
    vgs = {}
8308
    for disk in disks:
8309
      vgs[disk[constants.IDISK_VG]] = \
8310
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8311

    
8312
    return vgs
8313

    
8314
  # Required free disk space as a function of disk and swap space
8315
  req_size_dict = {
8316
    constants.DT_DISKLESS: {},
8317
    constants.DT_PLAIN: _compute(disks, 0),
8318
    # 128 MB are added for drbd metadata for each disk
8319
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8320
    constants.DT_FILE: {},
8321
    constants.DT_SHARED_FILE: {},
8322
  }
8323

    
8324
  if disk_template not in req_size_dict:
8325
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8326
                                 " is unknown" % disk_template)
8327

    
8328
  return req_size_dict[disk_template]
8329

    
8330

    
8331
def _ComputeDiskSize(disk_template, disks):
8332
  """Compute disk size requirements in the volume group
8333

8334
  """
8335
  # Required free disk space as a function of disk and swap space
8336
  req_size_dict = {
8337
    constants.DT_DISKLESS: None,
8338
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8339
    # 128 MB are added for drbd metadata for each disk
8340
    constants.DT_DRBD8:
8341
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8342
    constants.DT_FILE: None,
8343
    constants.DT_SHARED_FILE: 0,
8344
    constants.DT_BLOCK: 0,
8345
  }
8346

    
8347
  if disk_template not in req_size_dict:
8348
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8349
                                 " is unknown" % disk_template)
8350

    
8351
  return req_size_dict[disk_template]
8352

    
8353

    
8354
def _FilterVmNodes(lu, nodenames):
8355
  """Filters out non-vm_capable nodes from a list.
8356

8357
  @type lu: L{LogicalUnit}
8358
  @param lu: the logical unit for which we check
8359
  @type nodenames: list
8360
  @param nodenames: the list of nodes on which we should check
8361
  @rtype: list
8362
  @return: the list of vm-capable nodes
8363

8364
  """
8365
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8366
  return [name for name in nodenames if name not in vm_nodes]
8367

    
8368

    
8369
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8370
  """Hypervisor parameter validation.
8371

8372
  This function abstract the hypervisor parameter validation to be
8373
  used in both instance create and instance modify.
8374

8375
  @type lu: L{LogicalUnit}
8376
  @param lu: the logical unit for which we check
8377
  @type nodenames: list
8378
  @param nodenames: the list of nodes on which we should check
8379
  @type hvname: string
8380
  @param hvname: the name of the hypervisor we should use
8381
  @type hvparams: dict
8382
  @param hvparams: the parameters which we need to check
8383
  @raise errors.OpPrereqError: if the parameters are not valid
8384

8385
  """
8386
  nodenames = _FilterVmNodes(lu, nodenames)
8387

    
8388
  cluster = lu.cfg.GetClusterInfo()
8389
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8390

    
8391
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8392
  for node in nodenames:
8393
    info = hvinfo[node]
8394
    if info.offline:
8395
      continue
8396
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8397

    
8398

    
8399
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8400
  """OS parameters validation.
8401

8402
  @type lu: L{LogicalUnit}
8403
  @param lu: the logical unit for which we check
8404
  @type required: boolean
8405
  @param required: whether the validation should fail if the OS is not
8406
      found
8407
  @type nodenames: list
8408
  @param nodenames: the list of nodes on which we should check
8409
  @type osname: string
8410
  @param osname: the name of the hypervisor we should use
8411
  @type osparams: dict
8412
  @param osparams: the parameters which we need to check
8413
  @raise errors.OpPrereqError: if the parameters are not valid
8414

8415
  """
8416
  nodenames = _FilterVmNodes(lu, nodenames)
8417
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8418
                                   [constants.OS_VALIDATE_PARAMETERS],
8419
                                   osparams)
8420
  for node, nres in result.items():
8421
    # we don't check for offline cases since this should be run only
8422
    # against the master node and/or an instance's nodes
8423
    nres.Raise("OS Parameters validation failed on node %s" % node)
8424
    if not nres.payload:
8425
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8426
                 osname, node)
8427

    
8428

    
8429
class LUInstanceCreate(LogicalUnit):
8430
  """Create an instance.
8431

8432
  """
8433
  HPATH = "instance-add"
8434
  HTYPE = constants.HTYPE_INSTANCE
8435
  REQ_BGL = False
8436

    
8437
  def CheckArguments(self):
8438
    """Check arguments.
8439

8440
    """
8441
    # do not require name_check to ease forward/backward compatibility
8442
    # for tools
8443
    if self.op.no_install and self.op.start:
8444
      self.LogInfo("No-installation mode selected, disabling startup")
8445
      self.op.start = False
8446
    # validate/normalize the instance name
8447
    self.op.instance_name = \
8448
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8449

    
8450
    if self.op.ip_check and not self.op.name_check:
8451
      # TODO: make the ip check more flexible and not depend on the name check
8452
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8453
                                 " check", errors.ECODE_INVAL)
8454

    
8455
    # check nics' parameter names
8456
    for nic in self.op.nics:
8457
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8458

    
8459
    # check disks. parameter names and consistent adopt/no-adopt strategy
8460
    has_adopt = has_no_adopt = False
8461
    for disk in self.op.disks:
8462
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8463
      if constants.IDISK_ADOPT in disk:
8464
        has_adopt = True
8465
      else:
8466
        has_no_adopt = True
8467
    if has_adopt and has_no_adopt:
8468
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8469
                                 errors.ECODE_INVAL)
8470
    if has_adopt:
8471
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8472
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8473
                                   " '%s' disk template" %
8474
                                   self.op.disk_template,
8475
                                   errors.ECODE_INVAL)
8476
      if self.op.iallocator is not None:
8477
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8478
                                   " iallocator script", errors.ECODE_INVAL)
8479
      if self.op.mode == constants.INSTANCE_IMPORT:
8480
        raise errors.OpPrereqError("Disk adoption not allowed for"
8481
                                   " instance import", errors.ECODE_INVAL)
8482
    else:
8483
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8484
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8485
                                   " but no 'adopt' parameter given" %
8486
                                   self.op.disk_template,
8487
                                   errors.ECODE_INVAL)
8488

    
8489
    self.adopt_disks = has_adopt
8490

    
8491
    # instance name verification
8492
    if self.op.name_check:
8493
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8494
      self.op.instance_name = self.hostname1.name
8495
      # used in CheckPrereq for ip ping check
8496
      self.check_ip = self.hostname1.ip
8497
    else:
8498
      self.check_ip = None
8499

    
8500
    # file storage checks
8501
    if (self.op.file_driver and
8502
        not self.op.file_driver in constants.FILE_DRIVER):
8503
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8504
                                 self.op.file_driver, errors.ECODE_INVAL)
8505

    
8506
    if self.op.disk_template == constants.DT_FILE:
8507
      opcodes.RequireFileStorage()
8508
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8509
      opcodes.RequireSharedFileStorage()
8510

    
8511
    ### Node/iallocator related checks
8512
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8513

    
8514
    if self.op.pnode is not None:
8515
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8516
        if self.op.snode is None:
8517
          raise errors.OpPrereqError("The networked disk templates need"
8518
                                     " a mirror node", errors.ECODE_INVAL)
8519
      elif self.op.snode:
8520
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8521
                        " template")
8522
        self.op.snode = None
8523

    
8524
    self._cds = _GetClusterDomainSecret()
8525

    
8526
    if self.op.mode == constants.INSTANCE_IMPORT:
8527
      # On import force_variant must be True, because if we forced it at
8528
      # initial install, our only chance when importing it back is that it
8529
      # works again!
8530
      self.op.force_variant = True
8531

    
8532
      if self.op.no_install:
8533
        self.LogInfo("No-installation mode has no effect during import")
8534

    
8535
    elif self.op.mode == constants.INSTANCE_CREATE:
8536
      if self.op.os_type is None:
8537
        raise errors.OpPrereqError("No guest OS specified",
8538
                                   errors.ECODE_INVAL)
8539
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8540
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8541
                                   " installation" % self.op.os_type,
8542
                                   errors.ECODE_STATE)
8543
      if self.op.disk_template is None:
8544
        raise errors.OpPrereqError("No disk template specified",
8545
                                   errors.ECODE_INVAL)
8546

    
8547
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8548
      # Check handshake to ensure both clusters have the same domain secret
8549
      src_handshake = self.op.source_handshake
8550
      if not src_handshake:
8551
        raise errors.OpPrereqError("Missing source handshake",
8552
                                   errors.ECODE_INVAL)
8553

    
8554
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8555
                                                           src_handshake)
8556
      if errmsg:
8557
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8558
                                   errors.ECODE_INVAL)
8559

    
8560
      # Load and check source CA
8561
      self.source_x509_ca_pem = self.op.source_x509_ca
8562
      if not self.source_x509_ca_pem:
8563
        raise errors.OpPrereqError("Missing source X509 CA",
8564
                                   errors.ECODE_INVAL)
8565

    
8566
      try:
8567
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8568
                                                    self._cds)
8569
      except OpenSSL.crypto.Error, err:
8570
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8571
                                   (err, ), errors.ECODE_INVAL)
8572

    
8573
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8574
      if errcode is not None:
8575
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8576
                                   errors.ECODE_INVAL)
8577

    
8578
      self.source_x509_ca = cert
8579

    
8580
      src_instance_name = self.op.source_instance_name
8581
      if not src_instance_name:
8582
        raise errors.OpPrereqError("Missing source instance name",
8583
                                   errors.ECODE_INVAL)
8584

    
8585
      self.source_instance_name = \
8586
          netutils.GetHostname(name=src_instance_name).name
8587

    
8588
    else:
8589
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8590
                                 self.op.mode, errors.ECODE_INVAL)
8591

    
8592
  def ExpandNames(self):
8593
    """ExpandNames for CreateInstance.
8594

8595
    Figure out the right locks for instance creation.
8596

8597
    """
8598
    self.needed_locks = {}
8599

    
8600
    instance_name = self.op.instance_name
8601
    # this is just a preventive check, but someone might still add this
8602
    # instance in the meantime, and creation will fail at lock-add time
8603
    if instance_name in self.cfg.GetInstanceList():
8604
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8605
                                 instance_name, errors.ECODE_EXISTS)
8606

    
8607
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8608

    
8609
    if self.op.iallocator:
8610
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8611
      # specifying a group on instance creation and then selecting nodes from
8612
      # that group
8613
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8614
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8615
    else:
8616
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8617
      nodelist = [self.op.pnode]
8618
      if self.op.snode is not None:
8619
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8620
        nodelist.append(self.op.snode)
8621
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8622
      # Lock resources of instance's primary and secondary nodes (copy to
8623
      # prevent accidential modification)
8624
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8625

    
8626
    # in case of import lock the source node too
8627
    if self.op.mode == constants.INSTANCE_IMPORT:
8628
      src_node = self.op.src_node
8629
      src_path = self.op.src_path
8630

    
8631
      if src_path is None:
8632
        self.op.src_path = src_path = self.op.instance_name
8633

    
8634
      if src_node is None:
8635
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8636
        self.op.src_node = None
8637
        if os.path.isabs(src_path):
8638
          raise errors.OpPrereqError("Importing an instance from a path"
8639
                                     " requires a source node option",
8640
                                     errors.ECODE_INVAL)
8641
      else:
8642
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8643
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8644
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8645
        if not os.path.isabs(src_path):
8646
          self.op.src_path = src_path = \
8647
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8648

    
8649
  def _RunAllocator(self):
8650
    """Run the allocator based on input opcode.
8651

8652
    """
8653
    nics = [n.ToDict() for n in self.nics]
8654
    ial = IAllocator(self.cfg, self.rpc,
8655
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8656
                     name=self.op.instance_name,
8657
                     disk_template=self.op.disk_template,
8658
                     tags=self.op.tags,
8659
                     os=self.op.os_type,
8660
                     vcpus=self.be_full[constants.BE_VCPUS],
8661
                     memory=self.be_full[constants.BE_MEMORY],
8662
                     disks=self.disks,
8663
                     nics=nics,
8664
                     hypervisor=self.op.hypervisor,
8665
                     )
8666

    
8667
    ial.Run(self.op.iallocator)
8668

    
8669
    if not ial.success:
8670
      raise errors.OpPrereqError("Can't compute nodes using"
8671
                                 " iallocator '%s': %s" %
8672
                                 (self.op.iallocator, ial.info),
8673
                                 errors.ECODE_NORES)
8674
    if len(ial.result) != ial.required_nodes:
8675
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8676
                                 " of nodes (%s), required %s" %
8677
                                 (self.op.iallocator, len(ial.result),
8678
                                  ial.required_nodes), errors.ECODE_FAULT)
8679
    self.op.pnode = ial.result[0]
8680
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8681
                 self.op.instance_name, self.op.iallocator,
8682
                 utils.CommaJoin(ial.result))
8683
    if ial.required_nodes == 2:
8684
      self.op.snode = ial.result[1]
8685

    
8686
  def BuildHooksEnv(self):
8687
    """Build hooks env.
8688

8689
    This runs on master, primary and secondary nodes of the instance.
8690

8691
    """
8692
    env = {
8693
      "ADD_MODE": self.op.mode,
8694
      }
8695
    if self.op.mode == constants.INSTANCE_IMPORT:
8696
      env["SRC_NODE"] = self.op.src_node
8697
      env["SRC_PATH"] = self.op.src_path
8698
      env["SRC_IMAGES"] = self.src_images
8699

    
8700
    env.update(_BuildInstanceHookEnv(
8701
      name=self.op.instance_name,
8702
      primary_node=self.op.pnode,
8703
      secondary_nodes=self.secondaries,
8704
      status=self.op.start,
8705
      os_type=self.op.os_type,
8706
      memory=self.be_full[constants.BE_MEMORY],
8707
      vcpus=self.be_full[constants.BE_VCPUS],
8708
      nics=_NICListToTuple(self, self.nics),
8709
      disk_template=self.op.disk_template,
8710
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8711
             for d in self.disks],
8712
      bep=self.be_full,
8713
      hvp=self.hv_full,
8714
      hypervisor_name=self.op.hypervisor,
8715
      tags=self.op.tags,
8716
    ))
8717

    
8718
    return env
8719

    
8720
  def BuildHooksNodes(self):
8721
    """Build hooks nodes.
8722

8723
    """
8724
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8725
    return nl, nl
8726

    
8727
  def _ReadExportInfo(self):
8728
    """Reads the export information from disk.
8729

8730
    It will override the opcode source node and path with the actual
8731
    information, if these two were not specified before.
8732

8733
    @return: the export information
8734

8735
    """
8736
    assert self.op.mode == constants.INSTANCE_IMPORT
8737

    
8738
    src_node = self.op.src_node
8739
    src_path = self.op.src_path
8740

    
8741
    if src_node is None:
8742
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8743
      exp_list = self.rpc.call_export_list(locked_nodes)
8744
      found = False
8745
      for node in exp_list:
8746
        if exp_list[node].fail_msg:
8747
          continue
8748
        if src_path in exp_list[node].payload:
8749
          found = True
8750
          self.op.src_node = src_node = node
8751
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8752
                                                       src_path)
8753
          break
8754
      if not found:
8755
        raise errors.OpPrereqError("No export found for relative path %s" %
8756
                                    src_path, errors.ECODE_INVAL)
8757

    
8758
    _CheckNodeOnline(self, src_node)
8759
    result = self.rpc.call_export_info(src_node, src_path)
8760
    result.Raise("No export or invalid export found in dir %s" % src_path)
8761

    
8762
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8763
    if not export_info.has_section(constants.INISECT_EXP):
8764
      raise errors.ProgrammerError("Corrupted export config",
8765
                                   errors.ECODE_ENVIRON)
8766

    
8767
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8768
    if (int(ei_version) != constants.EXPORT_VERSION):
8769
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8770
                                 (ei_version, constants.EXPORT_VERSION),
8771
                                 errors.ECODE_ENVIRON)
8772
    return export_info
8773

    
8774
  def _ReadExportParams(self, einfo):
8775
    """Use export parameters as defaults.
8776

8777
    In case the opcode doesn't specify (as in override) some instance
8778
    parameters, then try to use them from the export information, if
8779
    that declares them.
8780

8781
    """
8782
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8783

    
8784
    if self.op.disk_template is None:
8785
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8786
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8787
                                          "disk_template")
8788
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8789
          raise errors.OpPrereqError("Disk template specified in configuration"
8790
                                     " file is not one of the allowed values:"
8791
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8792
      else:
8793
        raise errors.OpPrereqError("No disk template specified and the export"
8794
                                   " is missing the disk_template information",
8795
                                   errors.ECODE_INVAL)
8796

    
8797
    if not self.op.disks:
8798
      disks = []
8799
      # TODO: import the disk iv_name too
8800
      for idx in range(constants.MAX_DISKS):
8801
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8802
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8803
          disks.append({constants.IDISK_SIZE: disk_sz})
8804
      self.op.disks = disks
8805
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8806
        raise errors.OpPrereqError("No disk info specified and the export"
8807
                                   " is missing the disk information",
8808
                                   errors.ECODE_INVAL)
8809

    
8810
    if not self.op.nics:
8811
      nics = []
8812
      for idx in range(constants.MAX_NICS):
8813
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8814
          ndict = {}
8815
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8816
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8817
            ndict[name] = v
8818
          nics.append(ndict)
8819
        else:
8820
          break
8821
      self.op.nics = nics
8822

    
8823
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8824
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8825

    
8826
    if (self.op.hypervisor is None and
8827
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8828
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8829

    
8830
    if einfo.has_section(constants.INISECT_HYP):
8831
      # use the export parameters but do not override the ones
8832
      # specified by the user
8833
      for name, value in einfo.items(constants.INISECT_HYP):
8834
        if name not in self.op.hvparams:
8835
          self.op.hvparams[name] = value
8836

    
8837
    if einfo.has_section(constants.INISECT_BEP):
8838
      # use the parameters, without overriding
8839
      for name, value in einfo.items(constants.INISECT_BEP):
8840
        if name not in self.op.beparams:
8841
          self.op.beparams[name] = value
8842
    else:
8843
      # try to read the parameters old style, from the main section
8844
      for name in constants.BES_PARAMETERS:
8845
        if (name not in self.op.beparams and
8846
            einfo.has_option(constants.INISECT_INS, name)):
8847
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8848

    
8849
    if einfo.has_section(constants.INISECT_OSP):
8850
      # use the parameters, without overriding
8851
      for name, value in einfo.items(constants.INISECT_OSP):
8852
        if name not in self.op.osparams:
8853
          self.op.osparams[name] = value
8854

    
8855
  def _RevertToDefaults(self, cluster):
8856
    """Revert the instance parameters to the default values.
8857

8858
    """
8859
    # hvparams
8860
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8861
    for name in self.op.hvparams.keys():
8862
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8863
        del self.op.hvparams[name]
8864
    # beparams
8865
    be_defs = cluster.SimpleFillBE({})
8866
    for name in self.op.beparams.keys():
8867
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8868
        del self.op.beparams[name]
8869
    # nic params
8870
    nic_defs = cluster.SimpleFillNIC({})
8871
    for nic in self.op.nics:
8872
      for name in constants.NICS_PARAMETERS:
8873
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8874
          del nic[name]
8875
    # osparams
8876
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8877
    for name in self.op.osparams.keys():
8878
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8879
        del self.op.osparams[name]
8880

    
8881
  def _CalculateFileStorageDir(self):
8882
    """Calculate final instance file storage dir.
8883

8884
    """
8885
    # file storage dir calculation/check
8886
    self.instance_file_storage_dir = None
8887
    if self.op.disk_template in constants.DTS_FILEBASED:
8888
      # build the full file storage dir path
8889
      joinargs = []
8890

    
8891
      if self.op.disk_template == constants.DT_SHARED_FILE:
8892
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8893
      else:
8894
        get_fsd_fn = self.cfg.GetFileStorageDir
8895

    
8896
      cfg_storagedir = get_fsd_fn()
8897
      if not cfg_storagedir:
8898
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8899
      joinargs.append(cfg_storagedir)
8900

    
8901
      if self.op.file_storage_dir is not None:
8902
        joinargs.append(self.op.file_storage_dir)
8903

    
8904
      joinargs.append(self.op.instance_name)
8905

    
8906
      # pylint: disable=W0142
8907
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8908

    
8909
  def CheckPrereq(self):
8910
    """Check prerequisites.
8911

8912
    """
8913
    self._CalculateFileStorageDir()
8914

    
8915
    if self.op.mode == constants.INSTANCE_IMPORT:
8916
      export_info = self._ReadExportInfo()
8917
      self._ReadExportParams(export_info)
8918

    
8919
    if (not self.cfg.GetVGName() and
8920
        self.op.disk_template not in constants.DTS_NOT_LVM):
8921
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8922
                                 " instances", errors.ECODE_STATE)
8923

    
8924
    if (self.op.hypervisor is None or
8925
        self.op.hypervisor == constants.VALUE_AUTO):
8926
      self.op.hypervisor = self.cfg.GetHypervisorType()
8927

    
8928
    cluster = self.cfg.GetClusterInfo()
8929
    enabled_hvs = cluster.enabled_hypervisors
8930
    if self.op.hypervisor not in enabled_hvs:
8931
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8932
                                 " cluster (%s)" % (self.op.hypervisor,
8933
                                  ",".join(enabled_hvs)),
8934
                                 errors.ECODE_STATE)
8935

    
8936
    # Check tag validity
8937
    for tag in self.op.tags:
8938
      objects.TaggableObject.ValidateTag(tag)
8939

    
8940
    # check hypervisor parameter syntax (locally)
8941
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8942
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8943
                                      self.op.hvparams)
8944
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8945
    hv_type.CheckParameterSyntax(filled_hvp)
8946
    self.hv_full = filled_hvp
8947
    # check that we don't specify global parameters on an instance
8948
    _CheckGlobalHvParams(self.op.hvparams)
8949

    
8950
    # fill and remember the beparams dict
8951
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8952
    for param, value in self.op.beparams.iteritems():
8953
      if value == constants.VALUE_AUTO:
8954
        self.op.beparams[param] = default_beparams[param]
8955
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8956
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8957

    
8958
    # build os parameters
8959
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8960

    
8961
    # now that hvp/bep are in final format, let's reset to defaults,
8962
    # if told to do so
8963
    if self.op.identify_defaults:
8964
      self._RevertToDefaults(cluster)
8965

    
8966
    # NIC buildup
8967
    self.nics = []
8968
    for idx, nic in enumerate(self.op.nics):
8969
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8970
      nic_mode = nic_mode_req
8971
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8972
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8973

    
8974
      # in routed mode, for the first nic, the default ip is 'auto'
8975
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8976
        default_ip_mode = constants.VALUE_AUTO
8977
      else:
8978
        default_ip_mode = constants.VALUE_NONE
8979

    
8980
      # ip validity checks
8981
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8982
      if ip is None or ip.lower() == constants.VALUE_NONE:
8983
        nic_ip = None
8984
      elif ip.lower() == constants.VALUE_AUTO:
8985
        if not self.op.name_check:
8986
          raise errors.OpPrereqError("IP address set to auto but name checks"
8987
                                     " have been skipped",
8988
                                     errors.ECODE_INVAL)
8989
        nic_ip = self.hostname1.ip
8990
      else:
8991
        if not netutils.IPAddress.IsValid(ip):
8992
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8993
                                     errors.ECODE_INVAL)
8994
        nic_ip = ip
8995

    
8996
      # TODO: check the ip address for uniqueness
8997
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8998
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8999
                                   errors.ECODE_INVAL)
9000

    
9001
      # MAC address verification
9002
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9003
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9004
        mac = utils.NormalizeAndValidateMac(mac)
9005

    
9006
        try:
9007
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9008
        except errors.ReservationError:
9009
          raise errors.OpPrereqError("MAC address %s already in use"
9010
                                     " in cluster" % mac,
9011
                                     errors.ECODE_NOTUNIQUE)
9012

    
9013
      #  Build nic parameters
9014
      link = nic.get(constants.INIC_LINK, None)
9015
      if link == constants.VALUE_AUTO:
9016
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9017
      nicparams = {}
9018
      if nic_mode_req:
9019
        nicparams[constants.NIC_MODE] = nic_mode
9020
      if link:
9021
        nicparams[constants.NIC_LINK] = link
9022

    
9023
      check_params = cluster.SimpleFillNIC(nicparams)
9024
      objects.NIC.CheckParameterSyntax(check_params)
9025
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9026

    
9027
    # disk checks/pre-build
9028
    default_vg = self.cfg.GetVGName()
9029
    self.disks = []
9030
    for disk in self.op.disks:
9031
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9032
      if mode not in constants.DISK_ACCESS_SET:
9033
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9034
                                   mode, errors.ECODE_INVAL)
9035
      size = disk.get(constants.IDISK_SIZE, None)
9036
      if size is None:
9037
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9038
      try:
9039
        size = int(size)
9040
      except (TypeError, ValueError):
9041
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9042
                                   errors.ECODE_INVAL)
9043

    
9044
      data_vg = disk.get(constants.IDISK_VG, default_vg)
9045
      new_disk = {
9046
        constants.IDISK_SIZE: size,
9047
        constants.IDISK_MODE: mode,
9048
        constants.IDISK_VG: data_vg,
9049
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9050
        }
9051
      if constants.IDISK_ADOPT in disk:
9052
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9053
      self.disks.append(new_disk)
9054

    
9055
    if self.op.mode == constants.INSTANCE_IMPORT:
9056
      disk_images = []
9057
      for idx in range(len(self.disks)):
9058
        option = "disk%d_dump" % idx
9059
        if export_info.has_option(constants.INISECT_INS, option):
9060
          # FIXME: are the old os-es, disk sizes, etc. useful?
9061
          export_name = export_info.get(constants.INISECT_INS, option)
9062
          image = utils.PathJoin(self.op.src_path, export_name)
9063
          disk_images.append(image)
9064
        else:
9065
          disk_images.append(False)
9066

    
9067
      self.src_images = disk_images
9068

    
9069
      old_name = export_info.get(constants.INISECT_INS, "name")
9070
      if self.op.instance_name == old_name:
9071
        for idx, nic in enumerate(self.nics):
9072
          if nic.mac == constants.VALUE_AUTO:
9073
            nic_mac_ini = "nic%d_mac" % idx
9074
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9075

    
9076
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9077

    
9078
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9079
    if self.op.ip_check:
9080
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9081
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9082
                                   (self.check_ip, self.op.instance_name),
9083
                                   errors.ECODE_NOTUNIQUE)
9084

    
9085
    #### mac address generation
9086
    # By generating here the mac address both the allocator and the hooks get
9087
    # the real final mac address rather than the 'auto' or 'generate' value.
9088
    # There is a race condition between the generation and the instance object
9089
    # creation, which means that we know the mac is valid now, but we're not
9090
    # sure it will be when we actually add the instance. If things go bad
9091
    # adding the instance will abort because of a duplicate mac, and the
9092
    # creation job will fail.
9093
    for nic in self.nics:
9094
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9095
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9096

    
9097
    #### allocator run
9098

    
9099
    if self.op.iallocator is not None:
9100
      self._RunAllocator()
9101

    
9102
    #### node related checks
9103

    
9104
    # check primary node
9105
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9106
    assert self.pnode is not None, \
9107
      "Cannot retrieve locked node %s" % self.op.pnode
9108
    if pnode.offline:
9109
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9110
                                 pnode.name, errors.ECODE_STATE)
9111
    if pnode.drained:
9112
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9113
                                 pnode.name, errors.ECODE_STATE)
9114
    if not pnode.vm_capable:
9115
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9116
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9117

    
9118
    self.secondaries = []
9119

    
9120
    # mirror node verification
9121
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9122
      if self.op.snode == pnode.name:
9123
        raise errors.OpPrereqError("The secondary node cannot be the"
9124
                                   " primary node", errors.ECODE_INVAL)
9125
      _CheckNodeOnline(self, self.op.snode)
9126
      _CheckNodeNotDrained(self, self.op.snode)
9127
      _CheckNodeVmCapable(self, self.op.snode)
9128
      self.secondaries.append(self.op.snode)
9129

    
9130
    nodenames = [pnode.name] + self.secondaries
9131

    
9132
    if not self.adopt_disks:
9133
      # Check lv size requirements, if not adopting
9134
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9135
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9136

    
9137
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9138
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9139
                                disk[constants.IDISK_ADOPT])
9140
                     for disk in self.disks])
9141
      if len(all_lvs) != len(self.disks):
9142
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9143
                                   errors.ECODE_INVAL)
9144
      for lv_name in all_lvs:
9145
        try:
9146
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9147
          # to ReserveLV uses the same syntax
9148
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9149
        except errors.ReservationError:
9150
          raise errors.OpPrereqError("LV named %s used by another instance" %
9151
                                     lv_name, errors.ECODE_NOTUNIQUE)
9152

    
9153
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9154
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9155

    
9156
      node_lvs = self.rpc.call_lv_list([pnode.name],
9157
                                       vg_names.payload.keys())[pnode.name]
9158
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9159
      node_lvs = node_lvs.payload
9160

    
9161
      delta = all_lvs.difference(node_lvs.keys())
9162
      if delta:
9163
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9164
                                   utils.CommaJoin(delta),
9165
                                   errors.ECODE_INVAL)
9166
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9167
      if online_lvs:
9168
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9169
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9170
                                   errors.ECODE_STATE)
9171
      # update the size of disk based on what is found
9172
      for dsk in self.disks:
9173
        dsk[constants.IDISK_SIZE] = \
9174
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9175
                                        dsk[constants.IDISK_ADOPT])][0]))
9176

    
9177
    elif self.op.disk_template == constants.DT_BLOCK:
9178
      # Normalize and de-duplicate device paths
9179
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9180
                       for disk in self.disks])
9181
      if len(all_disks) != len(self.disks):
9182
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9183
                                   errors.ECODE_INVAL)
9184
      baddisks = [d for d in all_disks
9185
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9186
      if baddisks:
9187
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9188
                                   " cannot be adopted" %
9189
                                   (", ".join(baddisks),
9190
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9191
                                   errors.ECODE_INVAL)
9192

    
9193
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9194
                                            list(all_disks))[pnode.name]
9195
      node_disks.Raise("Cannot get block device information from node %s" %
9196
                       pnode.name)
9197
      node_disks = node_disks.payload
9198
      delta = all_disks.difference(node_disks.keys())
9199
      if delta:
9200
        raise errors.OpPrereqError("Missing block device(s): %s" %
9201
                                   utils.CommaJoin(delta),
9202
                                   errors.ECODE_INVAL)
9203
      for dsk in self.disks:
9204
        dsk[constants.IDISK_SIZE] = \
9205
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9206

    
9207
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9208

    
9209
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9210
    # check OS parameters (remotely)
9211
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9212

    
9213
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9214

    
9215
    # memory check on primary node
9216
    if self.op.start:
9217
      _CheckNodeFreeMemory(self, self.pnode.name,
9218
                           "creating instance %s" % self.op.instance_name,
9219
                           self.be_full[constants.BE_MEMORY],
9220
                           self.op.hypervisor)
9221

    
9222
    self.dry_run_result = list(nodenames)
9223

    
9224
  def Exec(self, feedback_fn):
9225
    """Create and add the instance to the cluster.
9226

9227
    """
9228
    instance = self.op.instance_name
9229
    pnode_name = self.pnode.name
9230

    
9231
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9232
                self.owned_locks(locking.LEVEL_NODE)), \
9233
      "Node locks differ from node resource locks"
9234

    
9235
    ht_kind = self.op.hypervisor
9236
    if ht_kind in constants.HTS_REQ_PORT:
9237
      network_port = self.cfg.AllocatePort()
9238
    else:
9239
      network_port = None
9240

    
9241
    disks = _GenerateDiskTemplate(self,
9242
                                  self.op.disk_template,
9243
                                  instance, pnode_name,
9244
                                  self.secondaries,
9245
                                  self.disks,
9246
                                  self.instance_file_storage_dir,
9247
                                  self.op.file_driver,
9248
                                  0,
9249
                                  feedback_fn)
9250

    
9251
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9252
                            primary_node=pnode_name,
9253
                            nics=self.nics, disks=disks,
9254
                            disk_template=self.op.disk_template,
9255
                            admin_state=constants.ADMINST_DOWN,
9256
                            network_port=network_port,
9257
                            beparams=self.op.beparams,
9258
                            hvparams=self.op.hvparams,
9259
                            hypervisor=self.op.hypervisor,
9260
                            osparams=self.op.osparams,
9261
                            )
9262

    
9263
    if self.op.tags:
9264
      for tag in self.op.tags:
9265
        iobj.AddTag(tag)
9266

    
9267
    if self.adopt_disks:
9268
      if self.op.disk_template == constants.DT_PLAIN:
9269
        # rename LVs to the newly-generated names; we need to construct
9270
        # 'fake' LV disks with the old data, plus the new unique_id
9271
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9272
        rename_to = []
9273
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9274
          rename_to.append(t_dsk.logical_id)
9275
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9276
          self.cfg.SetDiskID(t_dsk, pnode_name)
9277
        result = self.rpc.call_blockdev_rename(pnode_name,
9278
                                               zip(tmp_disks, rename_to))
9279
        result.Raise("Failed to rename adoped LVs")
9280
    else:
9281
      feedback_fn("* creating instance disks...")
9282
      try:
9283
        _CreateDisks(self, iobj)
9284
      except errors.OpExecError:
9285
        self.LogWarning("Device creation failed, reverting...")
9286
        try:
9287
          _RemoveDisks(self, iobj)
9288
        finally:
9289
          self.cfg.ReleaseDRBDMinors(instance)
9290
          raise
9291

    
9292
    feedback_fn("adding instance %s to cluster config" % instance)
9293

    
9294
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9295

    
9296
    # Declare that we don't want to remove the instance lock anymore, as we've
9297
    # added the instance to the config
9298
    del self.remove_locks[locking.LEVEL_INSTANCE]
9299

    
9300
    if self.op.mode == constants.INSTANCE_IMPORT:
9301
      # Release unused nodes
9302
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9303
    else:
9304
      # Release all nodes
9305
      _ReleaseLocks(self, locking.LEVEL_NODE)
9306

    
9307
    disk_abort = False
9308
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9309
      feedback_fn("* wiping instance disks...")
9310
      try:
9311
        _WipeDisks(self, iobj)
9312
      except errors.OpExecError, err:
9313
        logging.exception("Wiping disks failed")
9314
        self.LogWarning("Wiping instance disks failed (%s)", err)
9315
        disk_abort = True
9316

    
9317
    if disk_abort:
9318
      # Something is already wrong with the disks, don't do anything else
9319
      pass
9320
    elif self.op.wait_for_sync:
9321
      disk_abort = not _WaitForSync(self, iobj)
9322
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9323
      # make sure the disks are not degraded (still sync-ing is ok)
9324
      feedback_fn("* checking mirrors status")
9325
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9326
    else:
9327
      disk_abort = False
9328

    
9329
    if disk_abort:
9330
      _RemoveDisks(self, iobj)
9331
      self.cfg.RemoveInstance(iobj.name)
9332
      # Make sure the instance lock gets removed
9333
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9334
      raise errors.OpExecError("There are some degraded disks for"
9335
                               " this instance")
9336

    
9337
    # Release all node resource locks
9338
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9339

    
9340
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9341
      if self.op.mode == constants.INSTANCE_CREATE:
9342
        if not self.op.no_install:
9343
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9344
                        not self.op.wait_for_sync)
9345
          if pause_sync:
9346
            feedback_fn("* pausing disk sync to install instance OS")
9347
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9348
                                                              iobj.disks, True)
9349
            for idx, success in enumerate(result.payload):
9350
              if not success:
9351
                logging.warn("pause-sync of instance %s for disk %d failed",
9352
                             instance, idx)
9353

    
9354
          feedback_fn("* running the instance OS create scripts...")
9355
          # FIXME: pass debug option from opcode to backend
9356
          os_add_result = \
9357
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9358
                                          self.op.debug_level)
9359
          if pause_sync:
9360
            feedback_fn("* resuming disk sync")
9361
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9362
                                                              iobj.disks, False)
9363
            for idx, success in enumerate(result.payload):
9364
              if not success:
9365
                logging.warn("resume-sync of instance %s for disk %d failed",
9366
                             instance, idx)
9367

    
9368
          os_add_result.Raise("Could not add os for instance %s"
9369
                              " on node %s" % (instance, pnode_name))
9370

    
9371
      elif self.op.mode == constants.INSTANCE_IMPORT:
9372
        feedback_fn("* running the instance OS import scripts...")
9373

    
9374
        transfers = []
9375

    
9376
        for idx, image in enumerate(self.src_images):
9377
          if not image:
9378
            continue
9379

    
9380
          # FIXME: pass debug option from opcode to backend
9381
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9382
                                             constants.IEIO_FILE, (image, ),
9383
                                             constants.IEIO_SCRIPT,
9384
                                             (iobj.disks[idx], idx),
9385
                                             None)
9386
          transfers.append(dt)
9387

    
9388
        import_result = \
9389
          masterd.instance.TransferInstanceData(self, feedback_fn,
9390
                                                self.op.src_node, pnode_name,
9391
                                                self.pnode.secondary_ip,
9392
                                                iobj, transfers)
9393
        if not compat.all(import_result):
9394
          self.LogWarning("Some disks for instance %s on node %s were not"
9395
                          " imported successfully" % (instance, pnode_name))
9396

    
9397
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9398
        feedback_fn("* preparing remote import...")
9399
        # The source cluster will stop the instance before attempting to make a
9400
        # connection. In some cases stopping an instance can take a long time,
9401
        # hence the shutdown timeout is added to the connection timeout.
9402
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9403
                           self.op.source_shutdown_timeout)
9404
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9405

    
9406
        assert iobj.primary_node == self.pnode.name
9407
        disk_results = \
9408
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9409
                                        self.source_x509_ca,
9410
                                        self._cds, timeouts)
9411
        if not compat.all(disk_results):
9412
          # TODO: Should the instance still be started, even if some disks
9413
          # failed to import (valid for local imports, too)?
9414
          self.LogWarning("Some disks for instance %s on node %s were not"
9415
                          " imported successfully" % (instance, pnode_name))
9416

    
9417
        # Run rename script on newly imported instance
9418
        assert iobj.name == instance
9419
        feedback_fn("Running rename script for %s" % instance)
9420
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9421
                                                   self.source_instance_name,
9422
                                                   self.op.debug_level)
9423
        if result.fail_msg:
9424
          self.LogWarning("Failed to run rename script for %s on node"
9425
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9426

    
9427
      else:
9428
        # also checked in the prereq part
9429
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9430
                                     % self.op.mode)
9431

    
9432
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9433

    
9434
    if self.op.start:
9435
      iobj.admin_state = constants.ADMINST_UP
9436
      self.cfg.Update(iobj, feedback_fn)
9437
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9438
      feedback_fn("* starting instance...")
9439
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9440
                                            False)
9441
      result.Raise("Could not start instance")
9442

    
9443
    return list(iobj.all_nodes)
9444

    
9445

    
9446
class LUInstanceConsole(NoHooksLU):
9447
  """Connect to an instance's console.
9448

9449
  This is somewhat special in that it returns the command line that
9450
  you need to run on the master node in order to connect to the
9451
  console.
9452

9453
  """
9454
  REQ_BGL = False
9455

    
9456
  def ExpandNames(self):
9457
    self.share_locks = _ShareAll()
9458
    self._ExpandAndLockInstance()
9459

    
9460
  def CheckPrereq(self):
9461
    """Check prerequisites.
9462

9463
    This checks that the instance is in the cluster.
9464

9465
    """
9466
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9467
    assert self.instance is not None, \
9468
      "Cannot retrieve locked instance %s" % self.op.instance_name
9469
    _CheckNodeOnline(self, self.instance.primary_node)
9470

    
9471
  def Exec(self, feedback_fn):
9472
    """Connect to the console of an instance
9473

9474
    """
9475
    instance = self.instance
9476
    node = instance.primary_node
9477

    
9478
    node_insts = self.rpc.call_instance_list([node],
9479
                                             [instance.hypervisor])[node]
9480
    node_insts.Raise("Can't get node information from %s" % node)
9481

    
9482
    if instance.name not in node_insts.payload:
9483
      if instance.admin_state == constants.ADMINST_UP:
9484
        state = constants.INSTST_ERRORDOWN
9485
      elif instance.admin_state == constants.ADMINST_DOWN:
9486
        state = constants.INSTST_ADMINDOWN
9487
      else:
9488
        state = constants.INSTST_ADMINOFFLINE
9489
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9490
                               (instance.name, state))
9491

    
9492
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9493

    
9494
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9495

    
9496

    
9497
def _GetInstanceConsole(cluster, instance):
9498
  """Returns console information for an instance.
9499

9500
  @type cluster: L{objects.Cluster}
9501
  @type instance: L{objects.Instance}
9502
  @rtype: dict
9503

9504
  """
9505
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9506
  # beparams and hvparams are passed separately, to avoid editing the
9507
  # instance and then saving the defaults in the instance itself.
9508
  hvparams = cluster.FillHV(instance)
9509
  beparams = cluster.FillBE(instance)
9510
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9511

    
9512
  assert console.instance == instance.name
9513
  assert console.Validate()
9514

    
9515
  return console.ToDict()
9516

    
9517

    
9518
class LUInstanceReplaceDisks(LogicalUnit):
9519
  """Replace the disks of an instance.
9520

9521
  """
9522
  HPATH = "mirrors-replace"
9523
  HTYPE = constants.HTYPE_INSTANCE
9524
  REQ_BGL = False
9525

    
9526
  def CheckArguments(self):
9527
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9528
                                  self.op.iallocator)
9529

    
9530
  def ExpandNames(self):
9531
    self._ExpandAndLockInstance()
9532

    
9533
    assert locking.LEVEL_NODE not in self.needed_locks
9534
    assert locking.LEVEL_NODE_RES not in self.needed_locks
9535
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9536

    
9537
    assert self.op.iallocator is None or self.op.remote_node is None, \
9538
      "Conflicting options"
9539

    
9540
    if self.op.remote_node is not None:
9541
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9542

    
9543
      # Warning: do not remove the locking of the new secondary here
9544
      # unless DRBD8.AddChildren is changed to work in parallel;
9545
      # currently it doesn't since parallel invocations of
9546
      # FindUnusedMinor will conflict
9547
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9548
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9549
    else:
9550
      self.needed_locks[locking.LEVEL_NODE] = []
9551
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9552

    
9553
      if self.op.iallocator is not None:
9554
        # iallocator will select a new node in the same group
9555
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9556

    
9557
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9558

    
9559
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9560
                                   self.op.iallocator, self.op.remote_node,
9561
                                   self.op.disks, False, self.op.early_release)
9562

    
9563
    self.tasklets = [self.replacer]
9564

    
9565
  def DeclareLocks(self, level):
9566
    if level == locking.LEVEL_NODEGROUP:
9567
      assert self.op.remote_node is None
9568
      assert self.op.iallocator is not None
9569
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9570

    
9571
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9572
      # Lock all groups used by instance optimistically; this requires going
9573
      # via the node before it's locked, requiring verification later on
9574
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9575
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9576

    
9577
    elif level == locking.LEVEL_NODE:
9578
      if self.op.iallocator is not None:
9579
        assert self.op.remote_node is None
9580
        assert not self.needed_locks[locking.LEVEL_NODE]
9581

    
9582
        # Lock member nodes of all locked groups
9583
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9584
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9585
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9586
      else:
9587
        self._LockInstancesNodes()
9588
    elif level == locking.LEVEL_NODE_RES:
9589
      # Reuse node locks
9590
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9591
        self.needed_locks[locking.LEVEL_NODE]
9592

    
9593
  def BuildHooksEnv(self):
9594
    """Build hooks env.
9595

9596
    This runs on the master, the primary and all the secondaries.
9597

9598
    """
9599
    instance = self.replacer.instance
9600
    env = {
9601
      "MODE": self.op.mode,
9602
      "NEW_SECONDARY": self.op.remote_node,
9603
      "OLD_SECONDARY": instance.secondary_nodes[0],
9604
      }
9605
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9606
    return env
9607

    
9608
  def BuildHooksNodes(self):
9609
    """Build hooks nodes.
9610

9611
    """
9612
    instance = self.replacer.instance
9613
    nl = [
9614
      self.cfg.GetMasterNode(),
9615
      instance.primary_node,
9616
      ]
9617
    if self.op.remote_node is not None:
9618
      nl.append(self.op.remote_node)
9619
    return nl, nl
9620

    
9621
  def CheckPrereq(self):
9622
    """Check prerequisites.
9623

9624
    """
9625
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9626
            self.op.iallocator is None)
9627

    
9628
    # Verify if node group locks are still correct
9629
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9630
    if owned_groups:
9631
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9632

    
9633
    return LogicalUnit.CheckPrereq(self)
9634

    
9635

    
9636
class TLReplaceDisks(Tasklet):
9637
  """Replaces disks for an instance.
9638

9639
  Note: Locking is not within the scope of this class.
9640

9641
  """
9642
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9643
               disks, delay_iallocator, early_release):
9644
    """Initializes this class.
9645

9646
    """
9647
    Tasklet.__init__(self, lu)
9648

    
9649
    # Parameters
9650
    self.instance_name = instance_name
9651
    self.mode = mode
9652
    self.iallocator_name = iallocator_name
9653
    self.remote_node = remote_node
9654
    self.disks = disks
9655
    self.delay_iallocator = delay_iallocator
9656
    self.early_release = early_release
9657

    
9658
    # Runtime data
9659
    self.instance = None
9660
    self.new_node = None
9661
    self.target_node = None
9662
    self.other_node = None
9663
    self.remote_node_info = None
9664
    self.node_secondary_ip = None
9665

    
9666
  @staticmethod
9667
  def CheckArguments(mode, remote_node, iallocator):
9668
    """Helper function for users of this class.
9669

9670
    """
9671
    # check for valid parameter combination
9672
    if mode == constants.REPLACE_DISK_CHG:
9673
      if remote_node is None and iallocator is None:
9674
        raise errors.OpPrereqError("When changing the secondary either an"
9675
                                   " iallocator script must be used or the"
9676
                                   " new node given", errors.ECODE_INVAL)
9677

    
9678
      if remote_node is not None and iallocator is not None:
9679
        raise errors.OpPrereqError("Give either the iallocator or the new"
9680
                                   " secondary, not both", errors.ECODE_INVAL)
9681

    
9682
    elif remote_node is not None or iallocator is not None:
9683
      # Not replacing the secondary
9684
      raise errors.OpPrereqError("The iallocator and new node options can"
9685
                                 " only be used when changing the"
9686
                                 " secondary node", errors.ECODE_INVAL)
9687

    
9688
  @staticmethod
9689
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9690
    """Compute a new secondary node using an IAllocator.
9691

9692
    """
9693
    ial = IAllocator(lu.cfg, lu.rpc,
9694
                     mode=constants.IALLOCATOR_MODE_RELOC,
9695
                     name=instance_name,
9696
                     relocate_from=list(relocate_from))
9697

    
9698
    ial.Run(iallocator_name)
9699

    
9700
    if not ial.success:
9701
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9702
                                 " %s" % (iallocator_name, ial.info),
9703
                                 errors.ECODE_NORES)
9704

    
9705
    if len(ial.result) != ial.required_nodes:
9706
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9707
                                 " of nodes (%s), required %s" %
9708
                                 (iallocator_name,
9709
                                  len(ial.result), ial.required_nodes),
9710
                                 errors.ECODE_FAULT)
9711

    
9712
    remote_node_name = ial.result[0]
9713

    
9714
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9715
               instance_name, remote_node_name)
9716

    
9717
    return remote_node_name
9718

    
9719
  def _FindFaultyDisks(self, node_name):
9720
    """Wrapper for L{_FindFaultyInstanceDisks}.
9721

9722
    """
9723
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9724
                                    node_name, True)
9725

    
9726
  def _CheckDisksActivated(self, instance):
9727
    """Checks if the instance disks are activated.
9728

9729
    @param instance: The instance to check disks
9730
    @return: True if they are activated, False otherwise
9731

9732
    """
9733
    nodes = instance.all_nodes
9734

    
9735
    for idx, dev in enumerate(instance.disks):
9736
      for node in nodes:
9737
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9738
        self.cfg.SetDiskID(dev, node)
9739

    
9740
        result = self.rpc.call_blockdev_find(node, dev)
9741

    
9742
        if result.offline:
9743
          continue
9744
        elif result.fail_msg or not result.payload:
9745
          return False
9746

    
9747
    return True
9748

    
9749
  def CheckPrereq(self):
9750
    """Check prerequisites.
9751

9752
    This checks that the instance is in the cluster.
9753

9754
    """
9755
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9756
    assert instance is not None, \
9757
      "Cannot retrieve locked instance %s" % self.instance_name
9758

    
9759
    if instance.disk_template != constants.DT_DRBD8:
9760
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9761
                                 " instances", errors.ECODE_INVAL)
9762

    
9763
    if len(instance.secondary_nodes) != 1:
9764
      raise errors.OpPrereqError("The instance has a strange layout,"
9765
                                 " expected one secondary but found %d" %
9766
                                 len(instance.secondary_nodes),
9767
                                 errors.ECODE_FAULT)
9768

    
9769
    if not self.delay_iallocator:
9770
      self._CheckPrereq2()
9771

    
9772
  def _CheckPrereq2(self):
9773
    """Check prerequisites, second part.
9774

9775
    This function should always be part of CheckPrereq. It was separated and is
9776
    now called from Exec because during node evacuation iallocator was only
9777
    called with an unmodified cluster model, not taking planned changes into
9778
    account.
9779

9780
    """
9781
    instance = self.instance
9782
    secondary_node = instance.secondary_nodes[0]
9783

    
9784
    if self.iallocator_name is None:
9785
      remote_node = self.remote_node
9786
    else:
9787
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9788
                                       instance.name, instance.secondary_nodes)
9789

    
9790
    if remote_node is None:
9791
      self.remote_node_info = None
9792
    else:
9793
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9794
             "Remote node '%s' is not locked" % remote_node
9795

    
9796
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9797
      assert self.remote_node_info is not None, \
9798
        "Cannot retrieve locked node %s" % remote_node
9799

    
9800
    if remote_node == self.instance.primary_node:
9801
      raise errors.OpPrereqError("The specified node is the primary node of"
9802
                                 " the instance", errors.ECODE_INVAL)
9803

    
9804
    if remote_node == secondary_node:
9805
      raise errors.OpPrereqError("The specified node is already the"
9806
                                 " secondary node of the instance",
9807
                                 errors.ECODE_INVAL)
9808

    
9809
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9810
                                    constants.REPLACE_DISK_CHG):
9811
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9812
                                 errors.ECODE_INVAL)
9813

    
9814
    if self.mode == constants.REPLACE_DISK_AUTO:
9815
      if not self._CheckDisksActivated(instance):
9816
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9817
                                   " first" % self.instance_name,
9818
                                   errors.ECODE_STATE)
9819
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9820
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9821

    
9822
      if faulty_primary and faulty_secondary:
9823
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9824
                                   " one node and can not be repaired"
9825
                                   " automatically" % self.instance_name,
9826
                                   errors.ECODE_STATE)
9827

    
9828
      if faulty_primary:
9829
        self.disks = faulty_primary
9830
        self.target_node = instance.primary_node
9831
        self.other_node = secondary_node
9832
        check_nodes = [self.target_node, self.other_node]
9833
      elif faulty_secondary:
9834
        self.disks = faulty_secondary
9835
        self.target_node = secondary_node
9836
        self.other_node = instance.primary_node
9837
        check_nodes = [self.target_node, self.other_node]
9838
      else:
9839
        self.disks = []
9840
        check_nodes = []
9841

    
9842
    else:
9843
      # Non-automatic modes
9844
      if self.mode == constants.REPLACE_DISK_PRI:
9845
        self.target_node = instance.primary_node
9846
        self.other_node = secondary_node
9847
        check_nodes = [self.target_node, self.other_node]
9848

    
9849
      elif self.mode == constants.REPLACE_DISK_SEC:
9850
        self.target_node = secondary_node
9851
        self.other_node = instance.primary_node
9852
        check_nodes = [self.target_node, self.other_node]
9853

    
9854
      elif self.mode == constants.REPLACE_DISK_CHG:
9855
        self.new_node = remote_node
9856
        self.other_node = instance.primary_node
9857
        self.target_node = secondary_node
9858
        check_nodes = [self.new_node, self.other_node]
9859

    
9860
        _CheckNodeNotDrained(self.lu, remote_node)
9861
        _CheckNodeVmCapable(self.lu, remote_node)
9862

    
9863
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9864
        assert old_node_info is not None
9865
        if old_node_info.offline and not self.early_release:
9866
          # doesn't make sense to delay the release
9867
          self.early_release = True
9868
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9869
                          " early-release mode", secondary_node)
9870

    
9871
      else:
9872
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9873
                                     self.mode)
9874

    
9875
      # If not specified all disks should be replaced
9876
      if not self.disks:
9877
        self.disks = range(len(self.instance.disks))
9878

    
9879
    for node in check_nodes:
9880
      _CheckNodeOnline(self.lu, node)
9881

    
9882
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9883
                                                          self.other_node,
9884
                                                          self.target_node]
9885
                              if node_name is not None)
9886

    
9887
    # Release unneeded node and node resource locks
9888
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9889
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
9890

    
9891
    # Release any owned node group
9892
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9893
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9894

    
9895
    # Check whether disks are valid
9896
    for disk_idx in self.disks:
9897
      instance.FindDisk(disk_idx)
9898

    
9899
    # Get secondary node IP addresses
9900
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9901
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9902

    
9903
  def Exec(self, feedback_fn):
9904
    """Execute disk replacement.
9905

9906
    This dispatches the disk replacement to the appropriate handler.
9907

9908
    """
9909
    if self.delay_iallocator:
9910
      self._CheckPrereq2()
9911

    
9912
    if __debug__:
9913
      # Verify owned locks before starting operation
9914
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9915
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9916
          ("Incorrect node locks, owning %s, expected %s" %
9917
           (owned_nodes, self.node_secondary_ip.keys()))
9918
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
9919
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
9920

    
9921
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9922
      assert list(owned_instances) == [self.instance_name], \
9923
          "Instance '%s' not locked" % self.instance_name
9924

    
9925
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9926
          "Should not own any node group lock at this point"
9927

    
9928
    if not self.disks:
9929
      feedback_fn("No disks need replacement")
9930
      return
9931

    
9932
    feedback_fn("Replacing disk(s) %s for %s" %
9933
                (utils.CommaJoin(self.disks), self.instance.name))
9934

    
9935
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
9936

    
9937
    # Activate the instance disks if we're replacing them on a down instance
9938
    if activate_disks:
9939
      _StartInstanceDisks(self.lu, self.instance, True)
9940

    
9941
    try:
9942
      # Should we replace the secondary node?
9943
      if self.new_node is not None:
9944
        fn = self._ExecDrbd8Secondary
9945
      else:
9946
        fn = self._ExecDrbd8DiskOnly
9947

    
9948
      result = fn(feedback_fn)
9949
    finally:
9950
      # Deactivate the instance disks if we're replacing them on a
9951
      # down instance
9952
      if activate_disks:
9953
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9954

    
9955
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
9956

    
9957
    if __debug__:
9958
      # Verify owned locks
9959
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
9960
      nodes = frozenset(self.node_secondary_ip)
9961
      assert ((self.early_release and not owned_nodes) or
9962
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9963
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9964
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9965

    
9966
    return result
9967

    
9968
  def _CheckVolumeGroup(self, nodes):
9969
    self.lu.LogInfo("Checking volume groups")
9970

    
9971
    vgname = self.cfg.GetVGName()
9972

    
9973
    # Make sure volume group exists on all involved nodes
9974
    results = self.rpc.call_vg_list(nodes)
9975
    if not results:
9976
      raise errors.OpExecError("Can't list volume groups on the nodes")
9977

    
9978
    for node in nodes:
9979
      res = results[node]
9980
      res.Raise("Error checking node %s" % node)
9981
      if vgname not in res.payload:
9982
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9983
                                 (vgname, node))
9984

    
9985
  def _CheckDisksExistence(self, nodes):
9986
    # Check disk existence
9987
    for idx, dev in enumerate(self.instance.disks):
9988
      if idx not in self.disks:
9989
        continue
9990

    
9991
      for node in nodes:
9992
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9993
        self.cfg.SetDiskID(dev, node)
9994

    
9995
        result = self.rpc.call_blockdev_find(node, dev)
9996

    
9997
        msg = result.fail_msg
9998
        if msg or not result.payload:
9999
          if not msg:
10000
            msg = "disk not found"
10001
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10002
                                   (idx, node, msg))
10003

    
10004
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10005
    for idx, dev in enumerate(self.instance.disks):
10006
      if idx not in self.disks:
10007
        continue
10008

    
10009
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10010
                      (idx, node_name))
10011

    
10012
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10013
                                   ldisk=ldisk):
10014
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10015
                                 " replace disks for instance %s" %
10016
                                 (node_name, self.instance.name))
10017

    
10018
  def _CreateNewStorage(self, node_name):
10019
    """Create new storage on the primary or secondary node.
10020

10021
    This is only used for same-node replaces, not for changing the
10022
    secondary node, hence we don't want to modify the existing disk.
10023

10024
    """
10025
    iv_names = {}
10026

    
10027
    for idx, dev in enumerate(self.instance.disks):
10028
      if idx not in self.disks:
10029
        continue
10030

    
10031
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10032

    
10033
      self.cfg.SetDiskID(dev, node_name)
10034

    
10035
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10036
      names = _GenerateUniqueNames(self.lu, lv_names)
10037

    
10038
      vg_data = dev.children[0].logical_id[0]
10039
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10040
                             logical_id=(vg_data, names[0]))
10041
      vg_meta = dev.children[1].logical_id[0]
10042
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10043
                             logical_id=(vg_meta, names[1]))
10044

    
10045
      new_lvs = [lv_data, lv_meta]
10046
      old_lvs = [child.Copy() for child in dev.children]
10047
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10048

    
10049
      # we pass force_create=True to force the LVM creation
10050
      for new_lv in new_lvs:
10051
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10052
                        _GetInstanceInfoText(self.instance), False)
10053

    
10054
    return iv_names
10055

    
10056
  def _CheckDevices(self, node_name, iv_names):
10057
    for name, (dev, _, _) in iv_names.iteritems():
10058
      self.cfg.SetDiskID(dev, node_name)
10059

    
10060
      result = self.rpc.call_blockdev_find(node_name, dev)
10061

    
10062
      msg = result.fail_msg
10063
      if msg or not result.payload:
10064
        if not msg:
10065
          msg = "disk not found"
10066
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10067
                                 (name, msg))
10068

    
10069
      if result.payload.is_degraded:
10070
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10071

    
10072
  def _RemoveOldStorage(self, node_name, iv_names):
10073
    for name, (_, old_lvs, _) in iv_names.iteritems():
10074
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10075

    
10076
      for lv in old_lvs:
10077
        self.cfg.SetDiskID(lv, node_name)
10078

    
10079
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10080
        if msg:
10081
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10082
                             hint="remove unused LVs manually")
10083

    
10084
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10085
    """Replace a disk on the primary or secondary for DRBD 8.
10086

10087
    The algorithm for replace is quite complicated:
10088

10089
      1. for each disk to be replaced:
10090

10091
        1. create new LVs on the target node with unique names
10092
        1. detach old LVs from the drbd device
10093
        1. rename old LVs to name_replaced.<time_t>
10094
        1. rename new LVs to old LVs
10095
        1. attach the new LVs (with the old names now) to the drbd device
10096

10097
      1. wait for sync across all devices
10098

10099
      1. for each modified disk:
10100

10101
        1. remove old LVs (which have the name name_replaces.<time_t>)
10102

10103
    Failures are not very well handled.
10104

10105
    """
10106
    steps_total = 6
10107

    
10108
    # Step: check device activation
10109
    self.lu.LogStep(1, steps_total, "Check device existence")
10110
    self._CheckDisksExistence([self.other_node, self.target_node])
10111
    self._CheckVolumeGroup([self.target_node, self.other_node])
10112

    
10113
    # Step: check other node consistency
10114
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10115
    self._CheckDisksConsistency(self.other_node,
10116
                                self.other_node == self.instance.primary_node,
10117
                                False)
10118

    
10119
    # Step: create new storage
10120
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10121
    iv_names = self._CreateNewStorage(self.target_node)
10122

    
10123
    # Step: for each lv, detach+rename*2+attach
10124
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10125
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10126
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10127

    
10128
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10129
                                                     old_lvs)
10130
      result.Raise("Can't detach drbd from local storage on node"
10131
                   " %s for device %s" % (self.target_node, dev.iv_name))
10132
      #dev.children = []
10133
      #cfg.Update(instance)
10134

    
10135
      # ok, we created the new LVs, so now we know we have the needed
10136
      # storage; as such, we proceed on the target node to rename
10137
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10138
      # using the assumption that logical_id == physical_id (which in
10139
      # turn is the unique_id on that node)
10140

    
10141
      # FIXME(iustin): use a better name for the replaced LVs
10142
      temp_suffix = int(time.time())
10143
      ren_fn = lambda d, suff: (d.physical_id[0],
10144
                                d.physical_id[1] + "_replaced-%s" % suff)
10145

    
10146
      # Build the rename list based on what LVs exist on the node
10147
      rename_old_to_new = []
10148
      for to_ren in old_lvs:
10149
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10150
        if not result.fail_msg and result.payload:
10151
          # device exists
10152
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10153

    
10154
      self.lu.LogInfo("Renaming the old LVs on the target node")
10155
      result = self.rpc.call_blockdev_rename(self.target_node,
10156
                                             rename_old_to_new)
10157
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10158

    
10159
      # Now we rename the new LVs to the old LVs
10160
      self.lu.LogInfo("Renaming the new LVs on the target node")
10161
      rename_new_to_old = [(new, old.physical_id)
10162
                           for old, new in zip(old_lvs, new_lvs)]
10163
      result = self.rpc.call_blockdev_rename(self.target_node,
10164
                                             rename_new_to_old)
10165
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10166

    
10167
      # Intermediate steps of in memory modifications
10168
      for old, new in zip(old_lvs, new_lvs):
10169
        new.logical_id = old.logical_id
10170
        self.cfg.SetDiskID(new, self.target_node)
10171

    
10172
      # We need to modify old_lvs so that removal later removes the
10173
      # right LVs, not the newly added ones; note that old_lvs is a
10174
      # copy here
10175
      for disk in old_lvs:
10176
        disk.logical_id = ren_fn(disk, temp_suffix)
10177
        self.cfg.SetDiskID(disk, self.target_node)
10178

    
10179
      # Now that the new lvs have the old name, we can add them to the device
10180
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10181
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10182
                                                  new_lvs)
10183
      msg = result.fail_msg
10184
      if msg:
10185
        for new_lv in new_lvs:
10186
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10187
                                               new_lv).fail_msg
10188
          if msg2:
10189
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10190
                               hint=("cleanup manually the unused logical"
10191
                                     "volumes"))
10192
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10193

    
10194
    cstep = 5
10195
    if self.early_release:
10196
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10197
      cstep += 1
10198
      self._RemoveOldStorage(self.target_node, iv_names)
10199
      # TODO: Check if releasing locks early still makes sense
10200
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10201
    else:
10202
      # Release all resource locks except those used by the instance
10203
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10204
                    keep=self.node_secondary_ip.keys())
10205

    
10206
    # Release all node locks while waiting for sync
10207
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10208

    
10209
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10210
    # shutdown in the caller into consideration.
10211

    
10212
    # Wait for sync
10213
    # This can fail as the old devices are degraded and _WaitForSync
10214
    # does a combined result over all disks, so we don't check its return value
10215
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10216
    cstep += 1
10217
    _WaitForSync(self.lu, self.instance)
10218

    
10219
    # Check all devices manually
10220
    self._CheckDevices(self.instance.primary_node, iv_names)
10221

    
10222
    # Step: remove old storage
10223
    if not self.early_release:
10224
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10225
      cstep += 1
10226
      self._RemoveOldStorage(self.target_node, iv_names)
10227

    
10228
  def _ExecDrbd8Secondary(self, feedback_fn):
10229
    """Replace the secondary node for DRBD 8.
10230

10231
    The algorithm for replace is quite complicated:
10232
      - for all disks of the instance:
10233
        - create new LVs on the new node with same names
10234
        - shutdown the drbd device on the old secondary
10235
        - disconnect the drbd network on the primary
10236
        - create the drbd device on the new secondary
10237
        - network attach the drbd on the primary, using an artifice:
10238
          the drbd code for Attach() will connect to the network if it
10239
          finds a device which is connected to the good local disks but
10240
          not network enabled
10241
      - wait for sync across all devices
10242
      - remove all disks from the old secondary
10243

10244
    Failures are not very well handled.
10245

10246
    """
10247
    steps_total = 6
10248

    
10249
    pnode = self.instance.primary_node
10250

    
10251
    # Step: check device activation
10252
    self.lu.LogStep(1, steps_total, "Check device existence")
10253
    self._CheckDisksExistence([self.instance.primary_node])
10254
    self._CheckVolumeGroup([self.instance.primary_node])
10255

    
10256
    # Step: check other node consistency
10257
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10258
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10259

    
10260
    # Step: create new storage
10261
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10262
    for idx, dev in enumerate(self.instance.disks):
10263
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10264
                      (self.new_node, idx))
10265
      # we pass force_create=True to force LVM creation
10266
      for new_lv in dev.children:
10267
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10268
                        _GetInstanceInfoText(self.instance), False)
10269

    
10270
    # Step 4: dbrd minors and drbd setups changes
10271
    # after this, we must manually remove the drbd minors on both the
10272
    # error and the success paths
10273
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10274
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10275
                                         for dev in self.instance.disks],
10276
                                        self.instance.name)
10277
    logging.debug("Allocated minors %r", minors)
10278

    
10279
    iv_names = {}
10280
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10281
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10282
                      (self.new_node, idx))
10283
      # create new devices on new_node; note that we create two IDs:
10284
      # one without port, so the drbd will be activated without
10285
      # networking information on the new node at this stage, and one
10286
      # with network, for the latter activation in step 4
10287
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10288
      if self.instance.primary_node == o_node1:
10289
        p_minor = o_minor1
10290
      else:
10291
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10292
        p_minor = o_minor2
10293

    
10294
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10295
                      p_minor, new_minor, o_secret)
10296
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10297
                    p_minor, new_minor, o_secret)
10298

    
10299
      iv_names[idx] = (dev, dev.children, new_net_id)
10300
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10301
                    new_net_id)
10302
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10303
                              logical_id=new_alone_id,
10304
                              children=dev.children,
10305
                              size=dev.size)
10306
      try:
10307
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10308
                              _GetInstanceInfoText(self.instance), False)
10309
      except errors.GenericError:
10310
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10311
        raise
10312

    
10313
    # We have new devices, shutdown the drbd on the old secondary
10314
    for idx, dev in enumerate(self.instance.disks):
10315
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10316
      self.cfg.SetDiskID(dev, self.target_node)
10317
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10318
      if msg:
10319
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10320
                           "node: %s" % (idx, msg),
10321
                           hint=("Please cleanup this device manually as"
10322
                                 " soon as possible"))
10323

    
10324
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10325
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10326
                                               self.instance.disks)[pnode]
10327

    
10328
    msg = result.fail_msg
10329
    if msg:
10330
      # detaches didn't succeed (unlikely)
10331
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10332
      raise errors.OpExecError("Can't detach the disks from the network on"
10333
                               " old node: %s" % (msg,))
10334

    
10335
    # if we managed to detach at least one, we update all the disks of
10336
    # the instance to point to the new secondary
10337
    self.lu.LogInfo("Updating instance configuration")
10338
    for dev, _, new_logical_id in iv_names.itervalues():
10339
      dev.logical_id = new_logical_id
10340
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10341

    
10342
    self.cfg.Update(self.instance, feedback_fn)
10343

    
10344
    # Release all node locks (the configuration has been updated)
10345
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10346

    
10347
    # and now perform the drbd attach
10348
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10349
                    " (standalone => connected)")
10350
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10351
                                            self.new_node],
10352
                                           self.node_secondary_ip,
10353
                                           self.instance.disks,
10354
                                           self.instance.name,
10355
                                           False)
10356
    for to_node, to_result in result.items():
10357
      msg = to_result.fail_msg
10358
      if msg:
10359
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10360
                           to_node, msg,
10361
                           hint=("please do a gnt-instance info to see the"
10362
                                 " status of disks"))
10363
    cstep = 5
10364
    if self.early_release:
10365
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10366
      cstep += 1
10367
      self._RemoveOldStorage(self.target_node, iv_names)
10368
      # TODO: Check if releasing locks early still makes sense
10369
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10370
    else:
10371
      # Release all resource locks except those used by the instance
10372
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10373
                    keep=self.node_secondary_ip.keys())
10374

    
10375
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10376
    # shutdown in the caller into consideration.
10377

    
10378
    # Wait for sync
10379
    # This can fail as the old devices are degraded and _WaitForSync
10380
    # does a combined result over all disks, so we don't check its return value
10381
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10382
    cstep += 1
10383
    _WaitForSync(self.lu, self.instance)
10384

    
10385
    # Check all devices manually
10386
    self._CheckDevices(self.instance.primary_node, iv_names)
10387

    
10388
    # Step: remove old storage
10389
    if not self.early_release:
10390
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10391
      self._RemoveOldStorage(self.target_node, iv_names)
10392

    
10393

    
10394
class LURepairNodeStorage(NoHooksLU):
10395
  """Repairs the volume group on a node.
10396

10397
  """
10398
  REQ_BGL = False
10399

    
10400
  def CheckArguments(self):
10401
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10402

    
10403
    storage_type = self.op.storage_type
10404

    
10405
    if (constants.SO_FIX_CONSISTENCY not in
10406
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10407
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10408
                                 " repaired" % storage_type,
10409
                                 errors.ECODE_INVAL)
10410

    
10411
  def ExpandNames(self):
10412
    self.needed_locks = {
10413
      locking.LEVEL_NODE: [self.op.node_name],
10414
      }
10415

    
10416
  def _CheckFaultyDisks(self, instance, node_name):
10417
    """Ensure faulty disks abort the opcode or at least warn."""
10418
    try:
10419
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10420
                                  node_name, True):
10421
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10422
                                   " node '%s'" % (instance.name, node_name),
10423
                                   errors.ECODE_STATE)
10424
    except errors.OpPrereqError, err:
10425
      if self.op.ignore_consistency:
10426
        self.proc.LogWarning(str(err.args[0]))
10427
      else:
10428
        raise
10429

    
10430
  def CheckPrereq(self):
10431
    """Check prerequisites.
10432

10433
    """
10434
    # Check whether any instance on this node has faulty disks
10435
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10436
      if inst.admin_state != constants.ADMINST_UP:
10437
        continue
10438
      check_nodes = set(inst.all_nodes)
10439
      check_nodes.discard(self.op.node_name)
10440
      for inst_node_name in check_nodes:
10441
        self._CheckFaultyDisks(inst, inst_node_name)
10442

    
10443
  def Exec(self, feedback_fn):
10444
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10445
                (self.op.name, self.op.node_name))
10446

    
10447
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10448
    result = self.rpc.call_storage_execute(self.op.node_name,
10449
                                           self.op.storage_type, st_args,
10450
                                           self.op.name,
10451
                                           constants.SO_FIX_CONSISTENCY)
10452
    result.Raise("Failed to repair storage unit '%s' on %s" %
10453
                 (self.op.name, self.op.node_name))
10454

    
10455

    
10456
class LUNodeEvacuate(NoHooksLU):
10457
  """Evacuates instances off a list of nodes.
10458

10459
  """
10460
  REQ_BGL = False
10461

    
10462
  def CheckArguments(self):
10463
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10464

    
10465
  def ExpandNames(self):
10466
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10467

    
10468
    if self.op.remote_node is not None:
10469
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10470
      assert self.op.remote_node
10471

    
10472
      if self.op.remote_node == self.op.node_name:
10473
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10474
                                   " secondary node", errors.ECODE_INVAL)
10475

    
10476
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10477
        raise errors.OpPrereqError("Without the use of an iallocator only"
10478
                                   " secondary instances can be evacuated",
10479
                                   errors.ECODE_INVAL)
10480

    
10481
    # Declare locks
10482
    self.share_locks = _ShareAll()
10483
    self.needed_locks = {
10484
      locking.LEVEL_INSTANCE: [],
10485
      locking.LEVEL_NODEGROUP: [],
10486
      locking.LEVEL_NODE: [],
10487
      }
10488

    
10489
    if self.op.remote_node is None:
10490
      # Iallocator will choose any node(s) in the same group
10491
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10492
    else:
10493
      group_nodes = frozenset([self.op.remote_node])
10494

    
10495
    # Determine nodes to be locked
10496
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10497

    
10498
  def _DetermineInstances(self):
10499
    """Builds list of instances to operate on.
10500

10501
    """
10502
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10503

    
10504
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10505
      # Primary instances only
10506
      inst_fn = _GetNodePrimaryInstances
10507
      assert self.op.remote_node is None, \
10508
        "Evacuating primary instances requires iallocator"
10509
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10510
      # Secondary instances only
10511
      inst_fn = _GetNodeSecondaryInstances
10512
    else:
10513
      # All instances
10514
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10515
      inst_fn = _GetNodeInstances
10516

    
10517
    return inst_fn(self.cfg, self.op.node_name)
10518

    
10519
  def DeclareLocks(self, level):
10520
    if level == locking.LEVEL_INSTANCE:
10521
      # Lock instances optimistically, needs verification once node and group
10522
      # locks have been acquired
10523
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10524
        set(i.name for i in self._DetermineInstances())
10525

    
10526
    elif level == locking.LEVEL_NODEGROUP:
10527
      # Lock node groups optimistically, needs verification once nodes have
10528
      # been acquired
10529
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10530
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10531

    
10532
    elif level == locking.LEVEL_NODE:
10533
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10534

    
10535
  def CheckPrereq(self):
10536
    # Verify locks
10537
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10538
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10539
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10540

    
10541
    assert owned_nodes == self.lock_nodes
10542

    
10543
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10544
    if owned_groups != wanted_groups:
10545
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10546
                               " current groups are '%s', used to be '%s'" %
10547
                               (utils.CommaJoin(wanted_groups),
10548
                                utils.CommaJoin(owned_groups)))
10549

    
10550
    # Determine affected instances
10551
    self.instances = self._DetermineInstances()
10552
    self.instance_names = [i.name for i in self.instances]
10553

    
10554
    if set(self.instance_names) != owned_instances:
10555
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10556
                               " were acquired, current instances are '%s',"
10557
                               " used to be '%s'" %
10558
                               (self.op.node_name,
10559
                                utils.CommaJoin(self.instance_names),
10560
                                utils.CommaJoin(owned_instances)))
10561

    
10562
    if self.instance_names:
10563
      self.LogInfo("Evacuating instances from node '%s': %s",
10564
                   self.op.node_name,
10565
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10566
    else:
10567
      self.LogInfo("No instances to evacuate from node '%s'",
10568
                   self.op.node_name)
10569

    
10570
    if self.op.remote_node is not None:
10571
      for i in self.instances:
10572
        if i.primary_node == self.op.remote_node:
10573
          raise errors.OpPrereqError("Node %s is the primary node of"
10574
                                     " instance %s, cannot use it as"
10575
                                     " secondary" %
10576
                                     (self.op.remote_node, i.name),
10577
                                     errors.ECODE_INVAL)
10578

    
10579
  def Exec(self, feedback_fn):
10580
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10581

    
10582
    if not self.instance_names:
10583
      # No instances to evacuate
10584
      jobs = []
10585

    
10586
    elif self.op.iallocator is not None:
10587
      # TODO: Implement relocation to other group
10588
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10589
                       evac_mode=self.op.mode,
10590
                       instances=list(self.instance_names))
10591

    
10592
      ial.Run(self.op.iallocator)
10593

    
10594
      if not ial.success:
10595
        raise errors.OpPrereqError("Can't compute node evacuation using"
10596
                                   " iallocator '%s': %s" %
10597
                                   (self.op.iallocator, ial.info),
10598
                                   errors.ECODE_NORES)
10599

    
10600
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10601

    
10602
    elif self.op.remote_node is not None:
10603
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10604
      jobs = [
10605
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10606
                                        remote_node=self.op.remote_node,
10607
                                        disks=[],
10608
                                        mode=constants.REPLACE_DISK_CHG,
10609
                                        early_release=self.op.early_release)]
10610
        for instance_name in self.instance_names
10611
        ]
10612

    
10613
    else:
10614
      raise errors.ProgrammerError("No iallocator or remote node")
10615

    
10616
    return ResultWithJobs(jobs)
10617

    
10618

    
10619
def _SetOpEarlyRelease(early_release, op):
10620
  """Sets C{early_release} flag on opcodes if available.
10621

10622
  """
10623
  try:
10624
    op.early_release = early_release
10625
  except AttributeError:
10626
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10627

    
10628
  return op
10629

    
10630

    
10631
def _NodeEvacDest(use_nodes, group, nodes):
10632
  """Returns group or nodes depending on caller's choice.
10633

10634
  """
10635
  if use_nodes:
10636
    return utils.CommaJoin(nodes)
10637
  else:
10638
    return group
10639

    
10640

    
10641
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10642
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10643

10644
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10645
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10646

10647
  @type lu: L{LogicalUnit}
10648
  @param lu: Logical unit instance
10649
  @type alloc_result: tuple/list
10650
  @param alloc_result: Result from iallocator
10651
  @type early_release: bool
10652
  @param early_release: Whether to release locks early if possible
10653
  @type use_nodes: bool
10654
  @param use_nodes: Whether to display node names instead of groups
10655

10656
  """
10657
  (moved, failed, jobs) = alloc_result
10658

    
10659
  if failed:
10660
    lu.LogWarning("Unable to evacuate instances %s",
10661
                  utils.CommaJoin("%s (%s)" % (name, reason)
10662
                                  for (name, reason) in failed))
10663

    
10664
  if moved:
10665
    lu.LogInfo("Instances to be moved: %s",
10666
               utils.CommaJoin("%s (to %s)" %
10667
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10668
                               for (name, group, nodes) in moved))
10669

    
10670
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10671
              map(opcodes.OpCode.LoadOpCode, ops))
10672
          for ops in jobs]
10673

    
10674

    
10675
class LUInstanceGrowDisk(LogicalUnit):
10676
  """Grow a disk of an instance.
10677

10678
  """
10679
  HPATH = "disk-grow"
10680
  HTYPE = constants.HTYPE_INSTANCE
10681
  REQ_BGL = False
10682

    
10683
  def ExpandNames(self):
10684
    self._ExpandAndLockInstance()
10685
    self.needed_locks[locking.LEVEL_NODE] = []
10686
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10687
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10688

    
10689
  def DeclareLocks(self, level):
10690
    if level == locking.LEVEL_NODE:
10691
      self._LockInstancesNodes()
10692
    elif level == locking.LEVEL_NODE_RES:
10693
      # Copy node locks
10694
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10695
        self.needed_locks[locking.LEVEL_NODE][:]
10696

    
10697
  def BuildHooksEnv(self):
10698
    """Build hooks env.
10699

10700
    This runs on the master, the primary and all the secondaries.
10701

10702
    """
10703
    env = {
10704
      "DISK": self.op.disk,
10705
      "AMOUNT": self.op.amount,
10706
      }
10707
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10708
    return env
10709

    
10710
  def BuildHooksNodes(self):
10711
    """Build hooks nodes.
10712

10713
    """
10714
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10715
    return (nl, nl)
10716

    
10717
  def CheckPrereq(self):
10718
    """Check prerequisites.
10719

10720
    This checks that the instance is in the cluster.
10721

10722
    """
10723
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10724
    assert instance is not None, \
10725
      "Cannot retrieve locked instance %s" % self.op.instance_name
10726
    nodenames = list(instance.all_nodes)
10727
    for node in nodenames:
10728
      _CheckNodeOnline(self, node)
10729

    
10730
    self.instance = instance
10731

    
10732
    if instance.disk_template not in constants.DTS_GROWABLE:
10733
      raise errors.OpPrereqError("Instance's disk layout does not support"
10734
                                 " growing", errors.ECODE_INVAL)
10735

    
10736
    self.disk = instance.FindDisk(self.op.disk)
10737

    
10738
    if instance.disk_template not in (constants.DT_FILE,
10739
                                      constants.DT_SHARED_FILE):
10740
      # TODO: check the free disk space for file, when that feature will be
10741
      # supported
10742
      _CheckNodesFreeDiskPerVG(self, nodenames,
10743
                               self.disk.ComputeGrowth(self.op.amount))
10744

    
10745
  def Exec(self, feedback_fn):
10746
    """Execute disk grow.
10747

10748
    """
10749
    instance = self.instance
10750
    disk = self.disk
10751

    
10752
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10753
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10754
            self.owned_locks(locking.LEVEL_NODE_RES))
10755

    
10756
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10757
    if not disks_ok:
10758
      raise errors.OpExecError("Cannot activate block device to grow")
10759

    
10760
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10761
                (self.op.disk, instance.name,
10762
                 utils.FormatUnit(self.op.amount, "h")))
10763

    
10764
    # First run all grow ops in dry-run mode
10765
    for node in instance.all_nodes:
10766
      self.cfg.SetDiskID(disk, node)
10767
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10768
      result.Raise("Grow request failed to node %s" % node)
10769

    
10770
    # We know that (as far as we can test) operations across different
10771
    # nodes will succeed, time to run it for real
10772
    for node in instance.all_nodes:
10773
      self.cfg.SetDiskID(disk, node)
10774
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10775
      result.Raise("Grow request failed to node %s" % node)
10776

    
10777
      # TODO: Rewrite code to work properly
10778
      # DRBD goes into sync mode for a short amount of time after executing the
10779
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10780
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10781
      # time is a work-around.
10782
      time.sleep(5)
10783

    
10784
    disk.RecordGrow(self.op.amount)
10785
    self.cfg.Update(instance, feedback_fn)
10786

    
10787
    # Changes have been recorded, release node lock
10788
    _ReleaseLocks(self, locking.LEVEL_NODE)
10789

    
10790
    # Downgrade lock while waiting for sync
10791
    self.glm.downgrade(locking.LEVEL_INSTANCE)
10792

    
10793
    if self.op.wait_for_sync:
10794
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10795
      if disk_abort:
10796
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10797
                             " status; please check the instance")
10798
      if instance.admin_state != constants.ADMINST_UP:
10799
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10800
    elif instance.admin_state != constants.ADMINST_UP:
10801
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10802
                           " not supposed to be running because no wait for"
10803
                           " sync mode was requested")
10804

    
10805
    assert self.owned_locks(locking.LEVEL_NODE_RES)
10806
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10807

    
10808

    
10809
class LUInstanceQueryData(NoHooksLU):
10810
  """Query runtime instance data.
10811

10812
  """
10813
  REQ_BGL = False
10814

    
10815
  def ExpandNames(self):
10816
    self.needed_locks = {}
10817

    
10818
    # Use locking if requested or when non-static information is wanted
10819
    if not (self.op.static or self.op.use_locking):
10820
      self.LogWarning("Non-static data requested, locks need to be acquired")
10821
      self.op.use_locking = True
10822

    
10823
    if self.op.instances or not self.op.use_locking:
10824
      # Expand instance names right here
10825
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10826
    else:
10827
      # Will use acquired locks
10828
      self.wanted_names = None
10829

    
10830
    if self.op.use_locking:
10831
      self.share_locks = _ShareAll()
10832

    
10833
      if self.wanted_names is None:
10834
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10835
      else:
10836
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10837

    
10838
      self.needed_locks[locking.LEVEL_NODE] = []
10839
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10840

    
10841
  def DeclareLocks(self, level):
10842
    if self.op.use_locking and level == locking.LEVEL_NODE:
10843
      self._LockInstancesNodes()
10844

    
10845
  def CheckPrereq(self):
10846
    """Check prerequisites.
10847

10848
    This only checks the optional instance list against the existing names.
10849

10850
    """
10851
    if self.wanted_names is None:
10852
      assert self.op.use_locking, "Locking was not used"
10853
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10854

    
10855
    self.wanted_instances = \
10856
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10857

    
10858
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10859
    """Returns the status of a block device
10860

10861
    """
10862
    if self.op.static or not node:
10863
      return None
10864

    
10865
    self.cfg.SetDiskID(dev, node)
10866

    
10867
    result = self.rpc.call_blockdev_find(node, dev)
10868
    if result.offline:
10869
      return None
10870

    
10871
    result.Raise("Can't compute disk status for %s" % instance_name)
10872

    
10873
    status = result.payload
10874
    if status is None:
10875
      return None
10876

    
10877
    return (status.dev_path, status.major, status.minor,
10878
            status.sync_percent, status.estimated_time,
10879
            status.is_degraded, status.ldisk_status)
10880

    
10881
  def _ComputeDiskStatus(self, instance, snode, dev):
10882
    """Compute block device status.
10883

10884
    """
10885
    if dev.dev_type in constants.LDS_DRBD:
10886
      # we change the snode then (otherwise we use the one passed in)
10887
      if dev.logical_id[0] == instance.primary_node:
10888
        snode = dev.logical_id[1]
10889
      else:
10890
        snode = dev.logical_id[0]
10891

    
10892
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10893
                                              instance.name, dev)
10894
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10895

    
10896
    if dev.children:
10897
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10898
                                        instance, snode),
10899
                         dev.children)
10900
    else:
10901
      dev_children = []
10902

    
10903
    return {
10904
      "iv_name": dev.iv_name,
10905
      "dev_type": dev.dev_type,
10906
      "logical_id": dev.logical_id,
10907
      "physical_id": dev.physical_id,
10908
      "pstatus": dev_pstatus,
10909
      "sstatus": dev_sstatus,
10910
      "children": dev_children,
10911
      "mode": dev.mode,
10912
      "size": dev.size,
10913
      }
10914

    
10915
  def Exec(self, feedback_fn):
10916
    """Gather and return data"""
10917
    result = {}
10918

    
10919
    cluster = self.cfg.GetClusterInfo()
10920

    
10921
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10922
                                          for i in self.wanted_instances)
10923
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10924
      if self.op.static or pnode.offline:
10925
        remote_state = None
10926
        if pnode.offline:
10927
          self.LogWarning("Primary node %s is marked offline, returning static"
10928
                          " information only for instance %s" %
10929
                          (pnode.name, instance.name))
10930
      else:
10931
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10932
                                                  instance.name,
10933
                                                  instance.hypervisor)
10934
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10935
        remote_info = remote_info.payload
10936
        if remote_info and "state" in remote_info:
10937
          remote_state = "up"
10938
        else:
10939
          if instance.admin_state == constants.ADMINST_UP:
10940
            remote_state = "down"
10941
          else:
10942
            remote_state = instance.admin_state
10943

    
10944
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10945
                  instance.disks)
10946

    
10947
      result[instance.name] = {
10948
        "name": instance.name,
10949
        "config_state": instance.admin_state,
10950
        "run_state": remote_state,
10951
        "pnode": instance.primary_node,
10952
        "snodes": instance.secondary_nodes,
10953
        "os": instance.os,
10954
        # this happens to be the same format used for hooks
10955
        "nics": _NICListToTuple(self, instance.nics),
10956
        "disk_template": instance.disk_template,
10957
        "disks": disks,
10958
        "hypervisor": instance.hypervisor,
10959
        "network_port": instance.network_port,
10960
        "hv_instance": instance.hvparams,
10961
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10962
        "be_instance": instance.beparams,
10963
        "be_actual": cluster.FillBE(instance),
10964
        "os_instance": instance.osparams,
10965
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10966
        "serial_no": instance.serial_no,
10967
        "mtime": instance.mtime,
10968
        "ctime": instance.ctime,
10969
        "uuid": instance.uuid,
10970
        }
10971

    
10972
    return result
10973

    
10974

    
10975
class LUInstanceSetParams(LogicalUnit):
10976
  """Modifies an instances's parameters.
10977

10978
  """
10979
  HPATH = "instance-modify"
10980
  HTYPE = constants.HTYPE_INSTANCE
10981
  REQ_BGL = False
10982

    
10983
  def CheckArguments(self):
10984
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10985
            self.op.hvparams or self.op.beparams or self.op.os_name or
10986
            self.op.online_inst or self.op.offline_inst):
10987
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10988

    
10989
    if self.op.hvparams:
10990
      _CheckGlobalHvParams(self.op.hvparams)
10991

    
10992
    # Disk validation
10993
    disk_addremove = 0
10994
    for disk_op, disk_dict in self.op.disks:
10995
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10996
      if disk_op == constants.DDM_REMOVE:
10997
        disk_addremove += 1
10998
        continue
10999
      elif disk_op == constants.DDM_ADD:
11000
        disk_addremove += 1
11001
      else:
11002
        if not isinstance(disk_op, int):
11003
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11004
        if not isinstance(disk_dict, dict):
11005
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11006
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11007

    
11008
      if disk_op == constants.DDM_ADD:
11009
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11010
        if mode not in constants.DISK_ACCESS_SET:
11011
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11012
                                     errors.ECODE_INVAL)
11013
        size = disk_dict.get(constants.IDISK_SIZE, None)
11014
        if size is None:
11015
          raise errors.OpPrereqError("Required disk parameter size missing",
11016
                                     errors.ECODE_INVAL)
11017
        try:
11018
          size = int(size)
11019
        except (TypeError, ValueError), err:
11020
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11021
                                     str(err), errors.ECODE_INVAL)
11022
        disk_dict[constants.IDISK_SIZE] = size
11023
      else:
11024
        # modification of disk
11025
        if constants.IDISK_SIZE in disk_dict:
11026
          raise errors.OpPrereqError("Disk size change not possible, use"
11027
                                     " grow-disk", errors.ECODE_INVAL)
11028

    
11029
    if disk_addremove > 1:
11030
      raise errors.OpPrereqError("Only one disk add or remove operation"
11031
                                 " supported at a time", errors.ECODE_INVAL)
11032

    
11033
    if self.op.disks and self.op.disk_template is not None:
11034
      raise errors.OpPrereqError("Disk template conversion and other disk"
11035
                                 " changes not supported at the same time",
11036
                                 errors.ECODE_INVAL)
11037

    
11038
    if (self.op.disk_template and
11039
        self.op.disk_template in constants.DTS_INT_MIRROR and
11040
        self.op.remote_node is None):
11041
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
11042
                                 " one requires specifying a secondary node",
11043
                                 errors.ECODE_INVAL)
11044

    
11045
    # NIC validation
11046
    nic_addremove = 0
11047
    for nic_op, nic_dict in self.op.nics:
11048
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11049
      if nic_op == constants.DDM_REMOVE:
11050
        nic_addremove += 1
11051
        continue
11052
      elif nic_op == constants.DDM_ADD:
11053
        nic_addremove += 1
11054
      else:
11055
        if not isinstance(nic_op, int):
11056
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11057
        if not isinstance(nic_dict, dict):
11058
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11059
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11060

    
11061
      # nic_dict should be a dict
11062
      nic_ip = nic_dict.get(constants.INIC_IP, None)
11063
      if nic_ip is not None:
11064
        if nic_ip.lower() == constants.VALUE_NONE:
11065
          nic_dict[constants.INIC_IP] = None
11066
        else:
11067
          if not netutils.IPAddress.IsValid(nic_ip):
11068
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11069
                                       errors.ECODE_INVAL)
11070

    
11071
      nic_bridge = nic_dict.get("bridge", None)
11072
      nic_link = nic_dict.get(constants.INIC_LINK, None)
11073
      if nic_bridge and nic_link:
11074
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11075
                                   " at the same time", errors.ECODE_INVAL)
11076
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11077
        nic_dict["bridge"] = None
11078
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11079
        nic_dict[constants.INIC_LINK] = None
11080

    
11081
      if nic_op == constants.DDM_ADD:
11082
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11083
        if nic_mac is None:
11084
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11085

    
11086
      if constants.INIC_MAC in nic_dict:
11087
        nic_mac = nic_dict[constants.INIC_MAC]
11088
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11089
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11090

    
11091
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11092
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11093
                                     " modifying an existing nic",
11094
                                     errors.ECODE_INVAL)
11095

    
11096
    if nic_addremove > 1:
11097
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11098
                                 " supported at a time", errors.ECODE_INVAL)
11099

    
11100
  def ExpandNames(self):
11101
    self._ExpandAndLockInstance()
11102
    # Can't even acquire node locks in shared mode as upcoming changes in
11103
    # Ganeti 2.6 will start to modify the node object on disk conversion
11104
    self.needed_locks[locking.LEVEL_NODE] = []
11105
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11106
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11107

    
11108
  def DeclareLocks(self, level):
11109
    if level == locking.LEVEL_NODE:
11110
      self._LockInstancesNodes()
11111
      if self.op.disk_template and self.op.remote_node:
11112
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11113
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11114
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11115
      # Copy node locks
11116
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11117
        self.needed_locks[locking.LEVEL_NODE][:]
11118

    
11119
  def BuildHooksEnv(self):
11120
    """Build hooks env.
11121

11122
    This runs on the master, primary and secondaries.
11123

11124
    """
11125
    args = dict()
11126
    if constants.BE_MEMORY in self.be_new:
11127
      args["memory"] = self.be_new[constants.BE_MEMORY]
11128
    if constants.BE_VCPUS in self.be_new:
11129
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11130
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11131
    # information at all.
11132
    if self.op.nics:
11133
      args["nics"] = []
11134
      nic_override = dict(self.op.nics)
11135
      for idx, nic in enumerate(self.instance.nics):
11136
        if idx in nic_override:
11137
          this_nic_override = nic_override[idx]
11138
        else:
11139
          this_nic_override = {}
11140
        if constants.INIC_IP in this_nic_override:
11141
          ip = this_nic_override[constants.INIC_IP]
11142
        else:
11143
          ip = nic.ip
11144
        if constants.INIC_MAC in this_nic_override:
11145
          mac = this_nic_override[constants.INIC_MAC]
11146
        else:
11147
          mac = nic.mac
11148
        if idx in self.nic_pnew:
11149
          nicparams = self.nic_pnew[idx]
11150
        else:
11151
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11152
        mode = nicparams[constants.NIC_MODE]
11153
        link = nicparams[constants.NIC_LINK]
11154
        args["nics"].append((ip, mac, mode, link))
11155
      if constants.DDM_ADD in nic_override:
11156
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11157
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11158
        nicparams = self.nic_pnew[constants.DDM_ADD]
11159
        mode = nicparams[constants.NIC_MODE]
11160
        link = nicparams[constants.NIC_LINK]
11161
        args["nics"].append((ip, mac, mode, link))
11162
      elif constants.DDM_REMOVE in nic_override:
11163
        del args["nics"][-1]
11164

    
11165
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11166
    if self.op.disk_template:
11167
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11168

    
11169
    return env
11170

    
11171
  def BuildHooksNodes(self):
11172
    """Build hooks nodes.
11173

11174
    """
11175
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11176
    return (nl, nl)
11177

    
11178
  def CheckPrereq(self):
11179
    """Check prerequisites.
11180

11181
    This only checks the instance list against the existing names.
11182

11183
    """
11184
    # checking the new params on the primary/secondary nodes
11185

    
11186
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11187
    cluster = self.cluster = self.cfg.GetClusterInfo()
11188
    assert self.instance is not None, \
11189
      "Cannot retrieve locked instance %s" % self.op.instance_name
11190
    pnode = instance.primary_node
11191
    nodelist = list(instance.all_nodes)
11192

    
11193
    # OS change
11194
    if self.op.os_name and not self.op.force:
11195
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11196
                      self.op.force_variant)
11197
      instance_os = self.op.os_name
11198
    else:
11199
      instance_os = instance.os
11200

    
11201
    if self.op.disk_template:
11202
      if instance.disk_template == self.op.disk_template:
11203
        raise errors.OpPrereqError("Instance already has disk template %s" %
11204
                                   instance.disk_template, errors.ECODE_INVAL)
11205

    
11206
      if (instance.disk_template,
11207
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11208
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11209
                                   " %s to %s" % (instance.disk_template,
11210
                                                  self.op.disk_template),
11211
                                   errors.ECODE_INVAL)
11212
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11213
                          msg="cannot change disk template")
11214
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11215
        if self.op.remote_node == pnode:
11216
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11217
                                     " as the primary node of the instance" %
11218
                                     self.op.remote_node, errors.ECODE_STATE)
11219
        _CheckNodeOnline(self, self.op.remote_node)
11220
        _CheckNodeNotDrained(self, self.op.remote_node)
11221
        # FIXME: here we assume that the old instance type is DT_PLAIN
11222
        assert instance.disk_template == constants.DT_PLAIN
11223
        disks = [{constants.IDISK_SIZE: d.size,
11224
                  constants.IDISK_VG: d.logical_id[0]}
11225
                 for d in instance.disks]
11226
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11227
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11228

    
11229
    # hvparams processing
11230
    if self.op.hvparams:
11231
      hv_type = instance.hypervisor
11232
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11233
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11234
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11235

    
11236
      # local check
11237
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11238
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11239
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11240
      self.hv_inst = i_hvdict # the new dict (without defaults)
11241
    else:
11242
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11243
                                              instance.hvparams)
11244
      self.hv_new = self.hv_inst = {}
11245

    
11246
    # beparams processing
11247
    if self.op.beparams:
11248
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11249
                                   use_none=True)
11250
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11251
      be_new = cluster.SimpleFillBE(i_bedict)
11252
      self.be_proposed = self.be_new = be_new # the new actual values
11253
      self.be_inst = i_bedict # the new dict (without defaults)
11254
    else:
11255
      self.be_new = self.be_inst = {}
11256
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11257
    be_old = cluster.FillBE(instance)
11258

    
11259
    # CPU param validation -- checking every time a paramtere is
11260
    # changed to cover all cases where either CPU mask or vcpus have
11261
    # changed
11262
    if (constants.BE_VCPUS in self.be_proposed and
11263
        constants.HV_CPU_MASK in self.hv_proposed):
11264
      cpu_list = \
11265
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11266
      # Verify mask is consistent with number of vCPUs. Can skip this
11267
      # test if only 1 entry in the CPU mask, which means same mask
11268
      # is applied to all vCPUs.
11269
      if (len(cpu_list) > 1 and
11270
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11271
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11272
                                   " CPU mask [%s]" %
11273
                                   (self.be_proposed[constants.BE_VCPUS],
11274
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11275
                                   errors.ECODE_INVAL)
11276

    
11277
      # Only perform this test if a new CPU mask is given
11278
      if constants.HV_CPU_MASK in self.hv_new:
11279
        # Calculate the largest CPU number requested
11280
        max_requested_cpu = max(map(max, cpu_list))
11281
        # Check that all of the instance's nodes have enough physical CPUs to
11282
        # satisfy the requested CPU mask
11283
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11284
                                max_requested_cpu + 1, instance.hypervisor)
11285

    
11286
    # osparams processing
11287
    if self.op.osparams:
11288
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11289
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11290
      self.os_inst = i_osdict # the new dict (without defaults)
11291
    else:
11292
      self.os_inst = {}
11293

    
11294
    self.warn = []
11295

    
11296
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11297
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11298
      mem_check_list = [pnode]
11299
      if be_new[constants.BE_AUTO_BALANCE]:
11300
        # either we changed auto_balance to yes or it was from before
11301
        mem_check_list.extend(instance.secondary_nodes)
11302
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11303
                                                  instance.hypervisor)
11304
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11305
                                         instance.hypervisor)
11306
      pninfo = nodeinfo[pnode]
11307
      msg = pninfo.fail_msg
11308
      if msg:
11309
        # Assume the primary node is unreachable and go ahead
11310
        self.warn.append("Can't get info from primary node %s: %s" %
11311
                         (pnode, msg))
11312
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11313
        self.warn.append("Node data from primary node %s doesn't contain"
11314
                         " free memory information" % pnode)
11315
      elif instance_info.fail_msg:
11316
        self.warn.append("Can't get instance runtime information: %s" %
11317
                        instance_info.fail_msg)
11318
      else:
11319
        if instance_info.payload:
11320
          current_mem = int(instance_info.payload["memory"])
11321
        else:
11322
          # Assume instance not running
11323
          # (there is a slight race condition here, but it's not very probable,
11324
          # and we have no other way to check)
11325
          current_mem = 0
11326
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11327
                    pninfo.payload["memory_free"])
11328
        if miss_mem > 0:
11329
          raise errors.OpPrereqError("This change will prevent the instance"
11330
                                     " from starting, due to %d MB of memory"
11331
                                     " missing on its primary node" % miss_mem,
11332
                                     errors.ECODE_NORES)
11333

    
11334
      if be_new[constants.BE_AUTO_BALANCE]:
11335
        for node, nres in nodeinfo.items():
11336
          if node not in instance.secondary_nodes:
11337
            continue
11338
          nres.Raise("Can't get info from secondary node %s" % node,
11339
                     prereq=True, ecode=errors.ECODE_STATE)
11340
          if not isinstance(nres.payload.get("memory_free", None), int):
11341
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11342
                                       " memory information" % node,
11343
                                       errors.ECODE_STATE)
11344
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11345
            raise errors.OpPrereqError("This change will prevent the instance"
11346
                                       " from failover to its secondary node"
11347
                                       " %s, due to not enough memory" % node,
11348
                                       errors.ECODE_STATE)
11349

    
11350
    # NIC processing
11351
    self.nic_pnew = {}
11352
    self.nic_pinst = {}
11353
    for nic_op, nic_dict in self.op.nics:
11354
      if nic_op == constants.DDM_REMOVE:
11355
        if not instance.nics:
11356
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11357
                                     errors.ECODE_INVAL)
11358
        continue
11359
      if nic_op != constants.DDM_ADD:
11360
        # an existing nic
11361
        if not instance.nics:
11362
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11363
                                     " no NICs" % nic_op,
11364
                                     errors.ECODE_INVAL)
11365
        if nic_op < 0 or nic_op >= len(instance.nics):
11366
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11367
                                     " are 0 to %d" %
11368
                                     (nic_op, len(instance.nics) - 1),
11369
                                     errors.ECODE_INVAL)
11370
        old_nic_params = instance.nics[nic_op].nicparams
11371
        old_nic_ip = instance.nics[nic_op].ip
11372
      else:
11373
        old_nic_params = {}
11374
        old_nic_ip = None
11375

    
11376
      update_params_dict = dict([(key, nic_dict[key])
11377
                                 for key in constants.NICS_PARAMETERS
11378
                                 if key in nic_dict])
11379

    
11380
      if "bridge" in nic_dict:
11381
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11382

    
11383
      new_nic_params = _GetUpdatedParams(old_nic_params,
11384
                                         update_params_dict)
11385
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11386
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11387
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11388
      self.nic_pinst[nic_op] = new_nic_params
11389
      self.nic_pnew[nic_op] = new_filled_nic_params
11390
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11391

    
11392
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11393
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11394
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11395
        if msg:
11396
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11397
          if self.op.force:
11398
            self.warn.append(msg)
11399
          else:
11400
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11401
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11402
        if constants.INIC_IP in nic_dict:
11403
          nic_ip = nic_dict[constants.INIC_IP]
11404
        else:
11405
          nic_ip = old_nic_ip
11406
        if nic_ip is None:
11407
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11408
                                     " on a routed nic", errors.ECODE_INVAL)
11409
      if constants.INIC_MAC in nic_dict:
11410
        nic_mac = nic_dict[constants.INIC_MAC]
11411
        if nic_mac is None:
11412
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11413
                                     errors.ECODE_INVAL)
11414
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11415
          # otherwise generate the mac
11416
          nic_dict[constants.INIC_MAC] = \
11417
            self.cfg.GenerateMAC(self.proc.GetECId())
11418
        else:
11419
          # or validate/reserve the current one
11420
          try:
11421
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11422
          except errors.ReservationError:
11423
            raise errors.OpPrereqError("MAC address %s already in use"
11424
                                       " in cluster" % nic_mac,
11425
                                       errors.ECODE_NOTUNIQUE)
11426

    
11427
    # DISK processing
11428
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11429
      raise errors.OpPrereqError("Disk operations not supported for"
11430
                                 " diskless instances",
11431
                                 errors.ECODE_INVAL)
11432
    for disk_op, _ in self.op.disks:
11433
      if disk_op == constants.DDM_REMOVE:
11434
        if len(instance.disks) == 1:
11435
          raise errors.OpPrereqError("Cannot remove the last disk of"
11436
                                     " an instance", errors.ECODE_INVAL)
11437
        _CheckInstanceState(self, instance, INSTANCE_DOWN,
11438
                            msg="cannot remove disks")
11439

    
11440
      if (disk_op == constants.DDM_ADD and
11441
          len(instance.disks) >= constants.MAX_DISKS):
11442
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11443
                                   " add more" % constants.MAX_DISKS,
11444
                                   errors.ECODE_STATE)
11445
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11446
        # an existing disk
11447
        if disk_op < 0 or disk_op >= len(instance.disks):
11448
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11449
                                     " are 0 to %d" %
11450
                                     (disk_op, len(instance.disks)),
11451
                                     errors.ECODE_INVAL)
11452

    
11453
    # disabling the instance
11454
    if self.op.offline_inst:
11455
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11456
                          msg="cannot change instance state to offline")
11457

    
11458
    # enabling the instance
11459
    if self.op.online_inst:
11460
      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11461
                          msg="cannot make instance go online")
11462

    
11463
  def _ConvertPlainToDrbd(self, feedback_fn):
11464
    """Converts an instance from plain to drbd.
11465

11466
    """
11467
    feedback_fn("Converting template to drbd")
11468
    instance = self.instance
11469
    pnode = instance.primary_node
11470
    snode = self.op.remote_node
11471

    
11472
    assert instance.disk_template == constants.DT_PLAIN
11473

    
11474
    # create a fake disk info for _GenerateDiskTemplate
11475
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11476
                  constants.IDISK_VG: d.logical_id[0]}
11477
                 for d in instance.disks]
11478
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11479
                                      instance.name, pnode, [snode],
11480
                                      disk_info, None, None, 0, feedback_fn)
11481
    info = _GetInstanceInfoText(instance)
11482
    feedback_fn("Creating aditional volumes...")
11483
    # first, create the missing data and meta devices
11484
    for disk in new_disks:
11485
      # unfortunately this is... not too nice
11486
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11487
                            info, True)
11488
      for child in disk.children:
11489
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11490
    # at this stage, all new LVs have been created, we can rename the
11491
    # old ones
11492
    feedback_fn("Renaming original volumes...")
11493
    rename_list = [(o, n.children[0].logical_id)
11494
                   for (o, n) in zip(instance.disks, new_disks)]
11495
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11496
    result.Raise("Failed to rename original LVs")
11497

    
11498
    feedback_fn("Initializing DRBD devices...")
11499
    # all child devices are in place, we can now create the DRBD devices
11500
    for disk in new_disks:
11501
      for node in [pnode, snode]:
11502
        f_create = node == pnode
11503
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11504

    
11505
    # at this point, the instance has been modified
11506
    instance.disk_template = constants.DT_DRBD8
11507
    instance.disks = new_disks
11508
    self.cfg.Update(instance, feedback_fn)
11509

    
11510
    # Release node locks while waiting for sync
11511
    _ReleaseLocks(self, locking.LEVEL_NODE)
11512

    
11513
    # disks are created, waiting for sync
11514
    disk_abort = not _WaitForSync(self, instance,
11515
                                  oneshot=not self.op.wait_for_sync)
11516
    if disk_abort:
11517
      raise errors.OpExecError("There are some degraded disks for"
11518
                               " this instance, please cleanup manually")
11519

    
11520
    # Node resource locks will be released by caller
11521

    
11522
  def _ConvertDrbdToPlain(self, feedback_fn):
11523
    """Converts an instance from drbd to plain.
11524

11525
    """
11526
    instance = self.instance
11527

    
11528
    assert len(instance.secondary_nodes) == 1
11529
    assert instance.disk_template == constants.DT_DRBD8
11530

    
11531
    pnode = instance.primary_node
11532
    snode = instance.secondary_nodes[0]
11533
    feedback_fn("Converting template to plain")
11534

    
11535
    old_disks = instance.disks
11536
    new_disks = [d.children[0] for d in old_disks]
11537

    
11538
    # copy over size and mode
11539
    for parent, child in zip(old_disks, new_disks):
11540
      child.size = parent.size
11541
      child.mode = parent.mode
11542

    
11543
    # update instance structure
11544
    instance.disks = new_disks
11545
    instance.disk_template = constants.DT_PLAIN
11546
    self.cfg.Update(instance, feedback_fn)
11547

    
11548
    # Release locks in case removing disks takes a while
11549
    _ReleaseLocks(self, locking.LEVEL_NODE)
11550

    
11551
    feedback_fn("Removing volumes on the secondary node...")
11552
    for disk in old_disks:
11553
      self.cfg.SetDiskID(disk, snode)
11554
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11555
      if msg:
11556
        self.LogWarning("Could not remove block device %s on node %s,"
11557
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11558

    
11559
    feedback_fn("Removing unneeded volumes on the primary node...")
11560
    for idx, disk in enumerate(old_disks):
11561
      meta = disk.children[1]
11562
      self.cfg.SetDiskID(meta, pnode)
11563
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11564
      if msg:
11565
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11566
                        " continuing anyway: %s", idx, pnode, msg)
11567

    
11568
    # Node resource locks will be released by caller
11569

    
11570
  def Exec(self, feedback_fn):
11571
    """Modifies an instance.
11572

11573
    All parameters take effect only at the next restart of the instance.
11574

11575
    """
11576
    # Process here the warnings from CheckPrereq, as we don't have a
11577
    # feedback_fn there.
11578
    for warn in self.warn:
11579
      feedback_fn("WARNING: %s" % warn)
11580

    
11581
    assert ((self.op.disk_template is None) ^
11582
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11583
      "Not owning any node resource locks"
11584

    
11585
    result = []
11586
    instance = self.instance
11587
    # disk changes
11588
    for disk_op, disk_dict in self.op.disks:
11589
      if disk_op == constants.DDM_REMOVE:
11590
        # remove the last disk
11591
        device = instance.disks.pop()
11592
        device_idx = len(instance.disks)
11593
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11594
          self.cfg.SetDiskID(disk, node)
11595
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11596
          if msg:
11597
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11598
                            " continuing anyway", device_idx, node, msg)
11599
        result.append(("disk/%d" % device_idx, "remove"))
11600
      elif disk_op == constants.DDM_ADD:
11601
        # add a new disk
11602
        if instance.disk_template in (constants.DT_FILE,
11603
                                        constants.DT_SHARED_FILE):
11604
          file_driver, file_path = instance.disks[0].logical_id
11605
          file_path = os.path.dirname(file_path)
11606
        else:
11607
          file_driver = file_path = None
11608
        disk_idx_base = len(instance.disks)
11609
        new_disk = _GenerateDiskTemplate(self,
11610
                                         instance.disk_template,
11611
                                         instance.name, instance.primary_node,
11612
                                         instance.secondary_nodes,
11613
                                         [disk_dict],
11614
                                         file_path,
11615
                                         file_driver,
11616
                                         disk_idx_base, feedback_fn)[0]
11617
        instance.disks.append(new_disk)
11618
        info = _GetInstanceInfoText(instance)
11619

    
11620
        logging.info("Creating volume %s for instance %s",
11621
                     new_disk.iv_name, instance.name)
11622
        # Note: this needs to be kept in sync with _CreateDisks
11623
        #HARDCODE
11624
        for node in instance.all_nodes:
11625
          f_create = node == instance.primary_node
11626
          try:
11627
            _CreateBlockDev(self, node, instance, new_disk,
11628
                            f_create, info, f_create)
11629
          except errors.OpExecError, err:
11630
            self.LogWarning("Failed to create volume %s (%s) on"
11631
                            " node %s: %s",
11632
                            new_disk.iv_name, new_disk, node, err)
11633
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11634
                       (new_disk.size, new_disk.mode)))
11635
      else:
11636
        # change a given disk
11637
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11638
        result.append(("disk.mode/%d" % disk_op,
11639
                       disk_dict[constants.IDISK_MODE]))
11640

    
11641
    if self.op.disk_template:
11642
      if __debug__:
11643
        check_nodes = set(instance.all_nodes)
11644
        if self.op.remote_node:
11645
          check_nodes.add(self.op.remote_node)
11646
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11647
          owned = self.owned_locks(level)
11648
          assert not (check_nodes - owned), \
11649
            ("Not owning the correct locks, owning %r, expected at least %r" %
11650
             (owned, check_nodes))
11651

    
11652
      r_shut = _ShutdownInstanceDisks(self, instance)
11653
      if not r_shut:
11654
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11655
                                 " proceed with disk template conversion")
11656
      mode = (instance.disk_template, self.op.disk_template)
11657
      try:
11658
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11659
      except:
11660
        self.cfg.ReleaseDRBDMinors(instance.name)
11661
        raise
11662
      result.append(("disk_template", self.op.disk_template))
11663

    
11664
      assert instance.disk_template == self.op.disk_template, \
11665
        ("Expected disk template '%s', found '%s'" %
11666
         (self.op.disk_template, instance.disk_template))
11667

    
11668
    # Release node and resource locks if there are any (they might already have
11669
    # been released during disk conversion)
11670
    _ReleaseLocks(self, locking.LEVEL_NODE)
11671
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11672

    
11673
    # NIC changes
11674
    for nic_op, nic_dict in self.op.nics:
11675
      if nic_op == constants.DDM_REMOVE:
11676
        # remove the last nic
11677
        del instance.nics[-1]
11678
        result.append(("nic.%d" % len(instance.nics), "remove"))
11679
      elif nic_op == constants.DDM_ADD:
11680
        # mac and bridge should be set, by now
11681
        mac = nic_dict[constants.INIC_MAC]
11682
        ip = nic_dict.get(constants.INIC_IP, None)
11683
        nicparams = self.nic_pinst[constants.DDM_ADD]
11684
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11685
        instance.nics.append(new_nic)
11686
        result.append(("nic.%d" % (len(instance.nics) - 1),
11687
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11688
                       (new_nic.mac, new_nic.ip,
11689
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11690
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11691
                       )))
11692
      else:
11693
        for key in (constants.INIC_MAC, constants.INIC_IP):
11694
          if key in nic_dict:
11695
            setattr(instance.nics[nic_op], key, nic_dict[key])
11696
        if nic_op in self.nic_pinst:
11697
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11698
        for key, val in nic_dict.iteritems():
11699
          result.append(("nic.%s/%d" % (key, nic_op), val))
11700

    
11701
    # hvparams changes
11702
    if self.op.hvparams:
11703
      instance.hvparams = self.hv_inst
11704
      for key, val in self.op.hvparams.iteritems():
11705
        result.append(("hv/%s" % key, val))
11706

    
11707
    # beparams changes
11708
    if self.op.beparams:
11709
      instance.beparams = self.be_inst
11710
      for key, val in self.op.beparams.iteritems():
11711
        result.append(("be/%s" % key, val))
11712

    
11713
    # OS change
11714
    if self.op.os_name:
11715
      instance.os = self.op.os_name
11716

    
11717
    # osparams changes
11718
    if self.op.osparams:
11719
      instance.osparams = self.os_inst
11720
      for key, val in self.op.osparams.iteritems():
11721
        result.append(("os/%s" % key, val))
11722

    
11723
    # online/offline instance
11724
    if self.op.online_inst:
11725
      self.cfg.MarkInstanceDown(instance.name)
11726
      result.append(("admin_state", constants.ADMINST_DOWN))
11727
    if self.op.offline_inst:
11728
      self.cfg.MarkInstanceOffline(instance.name)
11729
      result.append(("admin_state", constants.ADMINST_OFFLINE))
11730

    
11731
    self.cfg.Update(instance, feedback_fn)
11732

    
11733
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11734
                self.owned_locks(locking.LEVEL_NODE)), \
11735
      "All node locks should have been released by now"
11736

    
11737
    return result
11738

    
11739
  _DISK_CONVERSIONS = {
11740
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11741
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11742
    }
11743

    
11744

    
11745
class LUInstanceChangeGroup(LogicalUnit):
11746
  HPATH = "instance-change-group"
11747
  HTYPE = constants.HTYPE_INSTANCE
11748
  REQ_BGL = False
11749

    
11750
  def ExpandNames(self):
11751
    self.share_locks = _ShareAll()
11752
    self.needed_locks = {
11753
      locking.LEVEL_NODEGROUP: [],
11754
      locking.LEVEL_NODE: [],
11755
      }
11756

    
11757
    self._ExpandAndLockInstance()
11758

    
11759
    if self.op.target_groups:
11760
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11761
                                  self.op.target_groups)
11762
    else:
11763
      self.req_target_uuids = None
11764

    
11765
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11766

    
11767
  def DeclareLocks(self, level):
11768
    if level == locking.LEVEL_NODEGROUP:
11769
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11770

    
11771
      if self.req_target_uuids:
11772
        lock_groups = set(self.req_target_uuids)
11773

    
11774
        # Lock all groups used by instance optimistically; this requires going
11775
        # via the node before it's locked, requiring verification later on
11776
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11777
        lock_groups.update(instance_groups)
11778
      else:
11779
        # No target groups, need to lock all of them
11780
        lock_groups = locking.ALL_SET
11781

    
11782
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11783

    
11784
    elif level == locking.LEVEL_NODE:
11785
      if self.req_target_uuids:
11786
        # Lock all nodes used by instances
11787
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11788
        self._LockInstancesNodes()
11789

    
11790
        # Lock all nodes in all potential target groups
11791
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11792
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11793
        member_nodes = [node_name
11794
                        for group in lock_groups
11795
                        for node_name in self.cfg.GetNodeGroup(group).members]
11796
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11797
      else:
11798
        # Lock all nodes as all groups are potential targets
11799
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11800

    
11801
  def CheckPrereq(self):
11802
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11803
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11804
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11805

    
11806
    assert (self.req_target_uuids is None or
11807
            owned_groups.issuperset(self.req_target_uuids))
11808
    assert owned_instances == set([self.op.instance_name])
11809

    
11810
    # Get instance information
11811
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11812

    
11813
    # Check if node groups for locked instance are still correct
11814
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11815
      ("Instance %s's nodes changed while we kept the lock" %
11816
       self.op.instance_name)
11817

    
11818
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11819
                                           owned_groups)
11820

    
11821
    if self.req_target_uuids:
11822
      # User requested specific target groups
11823
      self.target_uuids = self.req_target_uuids
11824
    else:
11825
      # All groups except those used by the instance are potential targets
11826
      self.target_uuids = owned_groups - inst_groups
11827

    
11828
    conflicting_groups = self.target_uuids & inst_groups
11829
    if conflicting_groups:
11830
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11831
                                 " used by the instance '%s'" %
11832
                                 (utils.CommaJoin(conflicting_groups),
11833
                                  self.op.instance_name),
11834
                                 errors.ECODE_INVAL)
11835

    
11836
    if not self.target_uuids:
11837
      raise errors.OpPrereqError("There are no possible target groups",
11838
                                 errors.ECODE_INVAL)
11839

    
11840
  def BuildHooksEnv(self):
11841
    """Build hooks env.
11842

11843
    """
11844
    assert self.target_uuids
11845

    
11846
    env = {
11847
      "TARGET_GROUPS": " ".join(self.target_uuids),
11848
      }
11849

    
11850
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11851

    
11852
    return env
11853

    
11854
  def BuildHooksNodes(self):
11855
    """Build hooks nodes.
11856

11857
    """
11858
    mn = self.cfg.GetMasterNode()
11859
    return ([mn], [mn])
11860

    
11861
  def Exec(self, feedback_fn):
11862
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11863

    
11864
    assert instances == [self.op.instance_name], "Instance not locked"
11865

    
11866
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11867
                     instances=instances, target_groups=list(self.target_uuids))
11868

    
11869
    ial.Run(self.op.iallocator)
11870

    
11871
    if not ial.success:
11872
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11873
                                 " instance '%s' using iallocator '%s': %s" %
11874
                                 (self.op.instance_name, self.op.iallocator,
11875
                                  ial.info),
11876
                                 errors.ECODE_NORES)
11877

    
11878
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11879

    
11880
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11881
                 " instance '%s'", len(jobs), self.op.instance_name)
11882

    
11883
    return ResultWithJobs(jobs)
11884

    
11885

    
11886
class LUBackupQuery(NoHooksLU):
11887
  """Query the exports list
11888

11889
  """
11890
  REQ_BGL = False
11891

    
11892
  def ExpandNames(self):
11893
    self.needed_locks = {}
11894
    self.share_locks[locking.LEVEL_NODE] = 1
11895
    if not self.op.nodes:
11896
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11897
    else:
11898
      self.needed_locks[locking.LEVEL_NODE] = \
11899
        _GetWantedNodes(self, self.op.nodes)
11900

    
11901
  def Exec(self, feedback_fn):
11902
    """Compute the list of all the exported system images.
11903

11904
    @rtype: dict
11905
    @return: a dictionary with the structure node->(export-list)
11906
        where export-list is a list of the instances exported on
11907
        that node.
11908

11909
    """
11910
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11911
    rpcresult = self.rpc.call_export_list(self.nodes)
11912
    result = {}
11913
    for node in rpcresult:
11914
      if rpcresult[node].fail_msg:
11915
        result[node] = False
11916
      else:
11917
        result[node] = rpcresult[node].payload
11918

    
11919
    return result
11920

    
11921

    
11922
class LUBackupPrepare(NoHooksLU):
11923
  """Prepares an instance for an export and returns useful information.
11924

11925
  """
11926
  REQ_BGL = False
11927

    
11928
  def ExpandNames(self):
11929
    self._ExpandAndLockInstance()
11930

    
11931
  def CheckPrereq(self):
11932
    """Check prerequisites.
11933

11934
    """
11935
    instance_name = self.op.instance_name
11936

    
11937
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11938
    assert self.instance is not None, \
11939
          "Cannot retrieve locked instance %s" % self.op.instance_name
11940
    _CheckNodeOnline(self, self.instance.primary_node)
11941

    
11942
    self._cds = _GetClusterDomainSecret()
11943

    
11944
  def Exec(self, feedback_fn):
11945
    """Prepares an instance for an export.
11946

11947
    """
11948
    instance = self.instance
11949

    
11950
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11951
      salt = utils.GenerateSecret(8)
11952

    
11953
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11954
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11955
                                              constants.RIE_CERT_VALIDITY)
11956
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11957

    
11958
      (name, cert_pem) = result.payload
11959

    
11960
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11961
                                             cert_pem)
11962

    
11963
      return {
11964
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11965
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11966
                          salt),
11967
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11968
        }
11969

    
11970
    return None
11971

    
11972

    
11973
class LUBackupExport(LogicalUnit):
11974
  """Export an instance to an image in the cluster.
11975

11976
  """
11977
  HPATH = "instance-export"
11978
  HTYPE = constants.HTYPE_INSTANCE
11979
  REQ_BGL = False
11980

    
11981
  def CheckArguments(self):
11982
    """Check the arguments.
11983

11984
    """
11985
    self.x509_key_name = self.op.x509_key_name
11986
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11987

    
11988
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11989
      if not self.x509_key_name:
11990
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11991
                                   errors.ECODE_INVAL)
11992

    
11993
      if not self.dest_x509_ca_pem:
11994
        raise errors.OpPrereqError("Missing destination X509 CA",
11995
                                   errors.ECODE_INVAL)
11996

    
11997
  def ExpandNames(self):
11998
    self._ExpandAndLockInstance()
11999

    
12000
    # Lock all nodes for local exports
12001
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12002
      # FIXME: lock only instance primary and destination node
12003
      #
12004
      # Sad but true, for now we have do lock all nodes, as we don't know where
12005
      # the previous export might be, and in this LU we search for it and
12006
      # remove it from its current node. In the future we could fix this by:
12007
      #  - making a tasklet to search (share-lock all), then create the
12008
      #    new one, then one to remove, after
12009
      #  - removing the removal operation altogether
12010
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12011

    
12012
  def DeclareLocks(self, level):
12013
    """Last minute lock declaration."""
12014
    # All nodes are locked anyway, so nothing to do here.
12015

    
12016
  def BuildHooksEnv(self):
12017
    """Build hooks env.
12018

12019
    This will run on the master, primary node and target node.
12020

12021
    """
12022
    env = {
12023
      "EXPORT_MODE": self.op.mode,
12024
      "EXPORT_NODE": self.op.target_node,
12025
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12026
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12027
      # TODO: Generic function for boolean env variables
12028
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12029
      }
12030

    
12031
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12032

    
12033
    return env
12034

    
12035
  def BuildHooksNodes(self):
12036
    """Build hooks nodes.
12037

12038
    """
12039
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12040

    
12041
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12042
      nl.append(self.op.target_node)
12043

    
12044
    return (nl, nl)
12045

    
12046
  def CheckPrereq(self):
12047
    """Check prerequisites.
12048

12049
    This checks that the instance and node names are valid.
12050

12051
    """
12052
    instance_name = self.op.instance_name
12053

    
12054
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12055
    assert self.instance is not None, \
12056
          "Cannot retrieve locked instance %s" % self.op.instance_name
12057
    _CheckNodeOnline(self, self.instance.primary_node)
12058

    
12059
    if (self.op.remove_instance and
12060
        self.instance.admin_state == constants.ADMINST_UP and
12061
        not self.op.shutdown):
12062
      raise errors.OpPrereqError("Can not remove instance without shutting it"
12063
                                 " down before")
12064

    
12065
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12066
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12067
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12068
      assert self.dst_node is not None
12069

    
12070
      _CheckNodeOnline(self, self.dst_node.name)
12071
      _CheckNodeNotDrained(self, self.dst_node.name)
12072

    
12073
      self._cds = None
12074
      self.dest_disk_info = None
12075
      self.dest_x509_ca = None
12076

    
12077
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12078
      self.dst_node = None
12079

    
12080
      if len(self.op.target_node) != len(self.instance.disks):
12081
        raise errors.OpPrereqError(("Received destination information for %s"
12082
                                    " disks, but instance %s has %s disks") %
12083
                                   (len(self.op.target_node), instance_name,
12084
                                    len(self.instance.disks)),
12085
                                   errors.ECODE_INVAL)
12086

    
12087
      cds = _GetClusterDomainSecret()
12088

    
12089
      # Check X509 key name
12090
      try:
12091
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12092
      except (TypeError, ValueError), err:
12093
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12094

    
12095
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12096
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12097
                                   errors.ECODE_INVAL)
12098

    
12099
      # Load and verify CA
12100
      try:
12101
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12102
      except OpenSSL.crypto.Error, err:
12103
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12104
                                   (err, ), errors.ECODE_INVAL)
12105

    
12106
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12107
      if errcode is not None:
12108
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12109
                                   (msg, ), errors.ECODE_INVAL)
12110

    
12111
      self.dest_x509_ca = cert
12112

    
12113
      # Verify target information
12114
      disk_info = []
12115
      for idx, disk_data in enumerate(self.op.target_node):
12116
        try:
12117
          (host, port, magic) = \
12118
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12119
        except errors.GenericError, err:
12120
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12121
                                     (idx, err), errors.ECODE_INVAL)
12122

    
12123
        disk_info.append((host, port, magic))
12124

    
12125
      assert len(disk_info) == len(self.op.target_node)
12126
      self.dest_disk_info = disk_info
12127

    
12128
    else:
12129
      raise errors.ProgrammerError("Unhandled export mode %r" %
12130
                                   self.op.mode)
12131

    
12132
    # instance disk type verification
12133
    # TODO: Implement export support for file-based disks
12134
    for disk in self.instance.disks:
12135
      if disk.dev_type == constants.LD_FILE:
12136
        raise errors.OpPrereqError("Export not supported for instances with"
12137
                                   " file-based disks", errors.ECODE_INVAL)
12138

    
12139
  def _CleanupExports(self, feedback_fn):
12140
    """Removes exports of current instance from all other nodes.
12141

12142
    If an instance in a cluster with nodes A..D was exported to node C, its
12143
    exports will be removed from the nodes A, B and D.
12144

12145
    """
12146
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12147

    
12148
    nodelist = self.cfg.GetNodeList()
12149
    nodelist.remove(self.dst_node.name)
12150

    
12151
    # on one-node clusters nodelist will be empty after the removal
12152
    # if we proceed the backup would be removed because OpBackupQuery
12153
    # substitutes an empty list with the full cluster node list.
12154
    iname = self.instance.name
12155
    if nodelist:
12156
      feedback_fn("Removing old exports for instance %s" % iname)
12157
      exportlist = self.rpc.call_export_list(nodelist)
12158
      for node in exportlist:
12159
        if exportlist[node].fail_msg:
12160
          continue
12161
        if iname in exportlist[node].payload:
12162
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12163
          if msg:
12164
            self.LogWarning("Could not remove older export for instance %s"
12165
                            " on node %s: %s", iname, node, msg)
12166

    
12167
  def Exec(self, feedback_fn):
12168
    """Export an instance to an image in the cluster.
12169

12170
    """
12171
    assert self.op.mode in constants.EXPORT_MODES
12172

    
12173
    instance = self.instance
12174
    src_node = instance.primary_node
12175

    
12176
    if self.op.shutdown:
12177
      # shutdown the instance, but not the disks
12178
      feedback_fn("Shutting down instance %s" % instance.name)
12179
      result = self.rpc.call_instance_shutdown(src_node, instance,
12180
                                               self.op.shutdown_timeout)
12181
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12182
      result.Raise("Could not shutdown instance %s on"
12183
                   " node %s" % (instance.name, src_node))
12184

    
12185
    # set the disks ID correctly since call_instance_start needs the
12186
    # correct drbd minor to create the symlinks
12187
    for disk in instance.disks:
12188
      self.cfg.SetDiskID(disk, src_node)
12189

    
12190
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
12191

    
12192
    if activate_disks:
12193
      # Activate the instance disks if we'exporting a stopped instance
12194
      feedback_fn("Activating disks for %s" % instance.name)
12195
      _StartInstanceDisks(self, instance, None)
12196

    
12197
    try:
12198
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12199
                                                     instance)
12200

    
12201
      helper.CreateSnapshots()
12202
      try:
12203
        if (self.op.shutdown and
12204
            instance.admin_state == constants.ADMINST_UP and
12205
            not self.op.remove_instance):
12206
          assert not activate_disks
12207
          feedback_fn("Starting instance %s" % instance.name)
12208
          result = self.rpc.call_instance_start(src_node,
12209
                                                (instance, None, None), False)
12210
          msg = result.fail_msg
12211
          if msg:
12212
            feedback_fn("Failed to start instance: %s" % msg)
12213
            _ShutdownInstanceDisks(self, instance)
12214
            raise errors.OpExecError("Could not start instance: %s" % msg)
12215

    
12216
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12217
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12218
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12219
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12220
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12221

    
12222
          (key_name, _, _) = self.x509_key_name
12223

    
12224
          dest_ca_pem = \
12225
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12226
                                            self.dest_x509_ca)
12227

    
12228
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12229
                                                     key_name, dest_ca_pem,
12230
                                                     timeouts)
12231
      finally:
12232
        helper.Cleanup()
12233

    
12234
      # Check for backwards compatibility
12235
      assert len(dresults) == len(instance.disks)
12236
      assert compat.all(isinstance(i, bool) for i in dresults), \
12237
             "Not all results are boolean: %r" % dresults
12238

    
12239
    finally:
12240
      if activate_disks:
12241
        feedback_fn("Deactivating disks for %s" % instance.name)
12242
        _ShutdownInstanceDisks(self, instance)
12243

    
12244
    if not (compat.all(dresults) and fin_resu):
12245
      failures = []
12246
      if not fin_resu:
12247
        failures.append("export finalization")
12248
      if not compat.all(dresults):
12249
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12250
                               if not dsk)
12251
        failures.append("disk export: disk(s) %s" % fdsk)
12252

    
12253
      raise errors.OpExecError("Export failed, errors in %s" %
12254
                               utils.CommaJoin(failures))
12255

    
12256
    # At this point, the export was successful, we can cleanup/finish
12257

    
12258
    # Remove instance if requested
12259
    if self.op.remove_instance:
12260
      feedback_fn("Removing instance %s" % instance.name)
12261
      _RemoveInstance(self, feedback_fn, instance,
12262
                      self.op.ignore_remove_failures)
12263

    
12264
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12265
      self._CleanupExports(feedback_fn)
12266

    
12267
    return fin_resu, dresults
12268

    
12269

    
12270
class LUBackupRemove(NoHooksLU):
12271
  """Remove exports related to the named instance.
12272

12273
  """
12274
  REQ_BGL = False
12275

    
12276
  def ExpandNames(self):
12277
    self.needed_locks = {}
12278
    # We need all nodes to be locked in order for RemoveExport to work, but we
12279
    # don't need to lock the instance itself, as nothing will happen to it (and
12280
    # we can remove exports also for a removed instance)
12281
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12282

    
12283
  def Exec(self, feedback_fn):
12284
    """Remove any export.
12285

12286
    """
12287
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12288
    # If the instance was not found we'll try with the name that was passed in.
12289
    # This will only work if it was an FQDN, though.
12290
    fqdn_warn = False
12291
    if not instance_name:
12292
      fqdn_warn = True
12293
      instance_name = self.op.instance_name
12294

    
12295
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12296
    exportlist = self.rpc.call_export_list(locked_nodes)
12297
    found = False
12298
    for node in exportlist:
12299
      msg = exportlist[node].fail_msg
12300
      if msg:
12301
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12302
        continue
12303
      if instance_name in exportlist[node].payload:
12304
        found = True
12305
        result = self.rpc.call_export_remove(node, instance_name)
12306
        msg = result.fail_msg
12307
        if msg:
12308
          logging.error("Could not remove export for instance %s"
12309
                        " on node %s: %s", instance_name, node, msg)
12310

    
12311
    if fqdn_warn and not found:
12312
      feedback_fn("Export not found. If trying to remove an export belonging"
12313
                  " to a deleted instance please use its Fully Qualified"
12314
                  " Domain Name.")
12315

    
12316

    
12317
class LUGroupAdd(LogicalUnit):
12318
  """Logical unit for creating node groups.
12319

12320
  """
12321
  HPATH = "group-add"
12322
  HTYPE = constants.HTYPE_GROUP
12323
  REQ_BGL = False
12324

    
12325
  def ExpandNames(self):
12326
    # We need the new group's UUID here so that we can create and acquire the
12327
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12328
    # that it should not check whether the UUID exists in the configuration.
12329
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12330
    self.needed_locks = {}
12331
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12332

    
12333
  def CheckPrereq(self):
12334
    """Check prerequisites.
12335

12336
    This checks that the given group name is not an existing node group
12337
    already.
12338

12339
    """
12340
    try:
12341
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12342
    except errors.OpPrereqError:
12343
      pass
12344
    else:
12345
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12346
                                 " node group (UUID: %s)" %
12347
                                 (self.op.group_name, existing_uuid),
12348
                                 errors.ECODE_EXISTS)
12349

    
12350
    if self.op.ndparams:
12351
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12352

    
12353
  def BuildHooksEnv(self):
12354
    """Build hooks env.
12355

12356
    """
12357
    return {
12358
      "GROUP_NAME": self.op.group_name,
12359
      }
12360

    
12361
  def BuildHooksNodes(self):
12362
    """Build hooks nodes.
12363

12364
    """
12365
    mn = self.cfg.GetMasterNode()
12366
    return ([mn], [mn])
12367

    
12368
  def Exec(self, feedback_fn):
12369
    """Add the node group to the cluster.
12370

12371
    """
12372
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12373
                                  uuid=self.group_uuid,
12374
                                  alloc_policy=self.op.alloc_policy,
12375
                                  ndparams=self.op.ndparams)
12376

    
12377
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12378
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12379

    
12380

    
12381
class LUGroupAssignNodes(NoHooksLU):
12382
  """Logical unit for assigning nodes to groups.
12383

12384
  """
12385
  REQ_BGL = False
12386

    
12387
  def ExpandNames(self):
12388
    # These raise errors.OpPrereqError on their own:
12389
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12390
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12391

    
12392
    # We want to lock all the affected nodes and groups. We have readily
12393
    # available the list of nodes, and the *destination* group. To gather the
12394
    # list of "source" groups, we need to fetch node information later on.
12395
    self.needed_locks = {
12396
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12397
      locking.LEVEL_NODE: self.op.nodes,
12398
      }
12399

    
12400
  def DeclareLocks(self, level):
12401
    if level == locking.LEVEL_NODEGROUP:
12402
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12403

    
12404
      # Try to get all affected nodes' groups without having the group or node
12405
      # lock yet. Needs verification later in the code flow.
12406
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12407

    
12408
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12409

    
12410
  def CheckPrereq(self):
12411
    """Check prerequisites.
12412

12413
    """
12414
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12415
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12416
            frozenset(self.op.nodes))
12417

    
12418
    expected_locks = (set([self.group_uuid]) |
12419
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12420
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12421
    if actual_locks != expected_locks:
12422
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12423
                               " current groups are '%s', used to be '%s'" %
12424
                               (utils.CommaJoin(expected_locks),
12425
                                utils.CommaJoin(actual_locks)))
12426

    
12427
    self.node_data = self.cfg.GetAllNodesInfo()
12428
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12429
    instance_data = self.cfg.GetAllInstancesInfo()
12430

    
12431
    if self.group is None:
12432
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12433
                               (self.op.group_name, self.group_uuid))
12434

    
12435
    (new_splits, previous_splits) = \
12436
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12437
                                             for node in self.op.nodes],
12438
                                            self.node_data, instance_data)
12439

    
12440
    if new_splits:
12441
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12442

    
12443
      if not self.op.force:
12444
        raise errors.OpExecError("The following instances get split by this"
12445
                                 " change and --force was not given: %s" %
12446
                                 fmt_new_splits)
12447
      else:
12448
        self.LogWarning("This operation will split the following instances: %s",
12449
                        fmt_new_splits)
12450

    
12451
        if previous_splits:
12452
          self.LogWarning("In addition, these already-split instances continue"
12453
                          " to be split across groups: %s",
12454
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12455

    
12456
  def Exec(self, feedback_fn):
12457
    """Assign nodes to a new group.
12458

12459
    """
12460
    for node in self.op.nodes:
12461
      self.node_data[node].group = self.group_uuid
12462

    
12463
    # FIXME: Depends on side-effects of modifying the result of
12464
    # C{cfg.GetAllNodesInfo}
12465

    
12466
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12467

    
12468
  @staticmethod
12469
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12470
    """Check for split instances after a node assignment.
12471

12472
    This method considers a series of node assignments as an atomic operation,
12473
    and returns information about split instances after applying the set of
12474
    changes.
12475

12476
    In particular, it returns information about newly split instances, and
12477
    instances that were already split, and remain so after the change.
12478

12479
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12480
    considered.
12481

12482
    @type changes: list of (node_name, new_group_uuid) pairs.
12483
    @param changes: list of node assignments to consider.
12484
    @param node_data: a dict with data for all nodes
12485
    @param instance_data: a dict with all instances to consider
12486
    @rtype: a two-tuple
12487
    @return: a list of instances that were previously okay and result split as a
12488
      consequence of this change, and a list of instances that were previously
12489
      split and this change does not fix.
12490

12491
    """
12492
    changed_nodes = dict((node, group) for node, group in changes
12493
                         if node_data[node].group != group)
12494

    
12495
    all_split_instances = set()
12496
    previously_split_instances = set()
12497

    
12498
    def InstanceNodes(instance):
12499
      return [instance.primary_node] + list(instance.secondary_nodes)
12500

    
12501
    for inst in instance_data.values():
12502
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12503
        continue
12504

    
12505
      instance_nodes = InstanceNodes(inst)
12506

    
12507
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12508
        previously_split_instances.add(inst.name)
12509

    
12510
      if len(set(changed_nodes.get(node, node_data[node].group)
12511
                 for node in instance_nodes)) > 1:
12512
        all_split_instances.add(inst.name)
12513

    
12514
    return (list(all_split_instances - previously_split_instances),
12515
            list(previously_split_instances & all_split_instances))
12516

    
12517

    
12518
class _GroupQuery(_QueryBase):
12519
  FIELDS = query.GROUP_FIELDS
12520

    
12521
  def ExpandNames(self, lu):
12522
    lu.needed_locks = {}
12523

    
12524
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12525
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12526

    
12527
    if not self.names:
12528
      self.wanted = [name_to_uuid[name]
12529
                     for name in utils.NiceSort(name_to_uuid.keys())]
12530
    else:
12531
      # Accept names to be either names or UUIDs.
12532
      missing = []
12533
      self.wanted = []
12534
      all_uuid = frozenset(self._all_groups.keys())
12535

    
12536
      for name in self.names:
12537
        if name in all_uuid:
12538
          self.wanted.append(name)
12539
        elif name in name_to_uuid:
12540
          self.wanted.append(name_to_uuid[name])
12541
        else:
12542
          missing.append(name)
12543

    
12544
      if missing:
12545
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12546
                                   utils.CommaJoin(missing),
12547
                                   errors.ECODE_NOENT)
12548

    
12549
  def DeclareLocks(self, lu, level):
12550
    pass
12551

    
12552
  def _GetQueryData(self, lu):
12553
    """Computes the list of node groups and their attributes.
12554

12555
    """
12556
    do_nodes = query.GQ_NODE in self.requested_data
12557
    do_instances = query.GQ_INST in self.requested_data
12558

    
12559
    group_to_nodes = None
12560
    group_to_instances = None
12561

    
12562
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12563
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12564
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12565
    # instance->node. Hence, we will need to process nodes even if we only need
12566
    # instance information.
12567
    if do_nodes or do_instances:
12568
      all_nodes = lu.cfg.GetAllNodesInfo()
12569
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12570
      node_to_group = {}
12571

    
12572
      for node in all_nodes.values():
12573
        if node.group in group_to_nodes:
12574
          group_to_nodes[node.group].append(node.name)
12575
          node_to_group[node.name] = node.group
12576

    
12577
      if do_instances:
12578
        all_instances = lu.cfg.GetAllInstancesInfo()
12579
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12580

    
12581
        for instance in all_instances.values():
12582
          node = instance.primary_node
12583
          if node in node_to_group:
12584
            group_to_instances[node_to_group[node]].append(instance.name)
12585

    
12586
        if not do_nodes:
12587
          # Do not pass on node information if it was not requested.
12588
          group_to_nodes = None
12589

    
12590
    return query.GroupQueryData([self._all_groups[uuid]
12591
                                 for uuid in self.wanted],
12592
                                group_to_nodes, group_to_instances)
12593

    
12594

    
12595
class LUGroupQuery(NoHooksLU):
12596
  """Logical unit for querying node groups.
12597

12598
  """
12599
  REQ_BGL = False
12600

    
12601
  def CheckArguments(self):
12602
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12603
                          self.op.output_fields, False)
12604

    
12605
  def ExpandNames(self):
12606
    self.gq.ExpandNames(self)
12607

    
12608
  def DeclareLocks(self, level):
12609
    self.gq.DeclareLocks(self, level)
12610

    
12611
  def Exec(self, feedback_fn):
12612
    return self.gq.OldStyleQuery(self)
12613

    
12614

    
12615
class LUGroupSetParams(LogicalUnit):
12616
  """Modifies the parameters of a node group.
12617

12618
  """
12619
  HPATH = "group-modify"
12620
  HTYPE = constants.HTYPE_GROUP
12621
  REQ_BGL = False
12622

    
12623
  def CheckArguments(self):
12624
    all_changes = [
12625
      self.op.ndparams,
12626
      self.op.alloc_policy,
12627
      ]
12628

    
12629
    if all_changes.count(None) == len(all_changes):
12630
      raise errors.OpPrereqError("Please pass at least one modification",
12631
                                 errors.ECODE_INVAL)
12632

    
12633
  def ExpandNames(self):
12634
    # This raises errors.OpPrereqError on its own:
12635
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12636

    
12637
    self.needed_locks = {
12638
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12639
      }
12640

    
12641
  def CheckPrereq(self):
12642
    """Check prerequisites.
12643

12644
    """
12645
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12646

    
12647
    if self.group is None:
12648
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12649
                               (self.op.group_name, self.group_uuid))
12650

    
12651
    if self.op.ndparams:
12652
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12653
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12654
      self.new_ndparams = new_ndparams
12655

    
12656
  def BuildHooksEnv(self):
12657
    """Build hooks env.
12658

12659
    """
12660
    return {
12661
      "GROUP_NAME": self.op.group_name,
12662
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12663
      }
12664

    
12665
  def BuildHooksNodes(self):
12666
    """Build hooks nodes.
12667

12668
    """
12669
    mn = self.cfg.GetMasterNode()
12670
    return ([mn], [mn])
12671

    
12672
  def Exec(self, feedback_fn):
12673
    """Modifies the node group.
12674

12675
    """
12676
    result = []
12677

    
12678
    if self.op.ndparams:
12679
      self.group.ndparams = self.new_ndparams
12680
      result.append(("ndparams", str(self.group.ndparams)))
12681

    
12682
    if self.op.alloc_policy:
12683
      self.group.alloc_policy = self.op.alloc_policy
12684

    
12685
    self.cfg.Update(self.group, feedback_fn)
12686
    return result
12687

    
12688

    
12689
class LUGroupRemove(LogicalUnit):
12690
  HPATH = "group-remove"
12691
  HTYPE = constants.HTYPE_GROUP
12692
  REQ_BGL = False
12693

    
12694
  def ExpandNames(self):
12695
    # This will raises errors.OpPrereqError on its own:
12696
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12697
    self.needed_locks = {
12698
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12699
      }
12700

    
12701
  def CheckPrereq(self):
12702
    """Check prerequisites.
12703

12704
    This checks that the given group name exists as a node group, that is
12705
    empty (i.e., contains no nodes), and that is not the last group of the
12706
    cluster.
12707

12708
    """
12709
    # Verify that the group is empty.
12710
    group_nodes = [node.name
12711
                   for node in self.cfg.GetAllNodesInfo().values()
12712
                   if node.group == self.group_uuid]
12713

    
12714
    if group_nodes:
12715
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12716
                                 " nodes: %s" %
12717
                                 (self.op.group_name,
12718
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12719
                                 errors.ECODE_STATE)
12720

    
12721
    # Verify the cluster would not be left group-less.
12722
    if len(self.cfg.GetNodeGroupList()) == 1:
12723
      raise errors.OpPrereqError("Group '%s' is the only group,"
12724
                                 " cannot be removed" %
12725
                                 self.op.group_name,
12726
                                 errors.ECODE_STATE)
12727

    
12728
  def BuildHooksEnv(self):
12729
    """Build hooks env.
12730

12731
    """
12732
    return {
12733
      "GROUP_NAME": self.op.group_name,
12734
      }
12735

    
12736
  def BuildHooksNodes(self):
12737
    """Build hooks nodes.
12738

12739
    """
12740
    mn = self.cfg.GetMasterNode()
12741
    return ([mn], [mn])
12742

    
12743
  def Exec(self, feedback_fn):
12744
    """Remove the node group.
12745

12746
    """
12747
    try:
12748
      self.cfg.RemoveNodeGroup(self.group_uuid)
12749
    except errors.ConfigurationError:
12750
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12751
                               (self.op.group_name, self.group_uuid))
12752

    
12753
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12754

    
12755

    
12756
class LUGroupRename(LogicalUnit):
12757
  HPATH = "group-rename"
12758
  HTYPE = constants.HTYPE_GROUP
12759
  REQ_BGL = False
12760

    
12761
  def ExpandNames(self):
12762
    # This raises errors.OpPrereqError on its own:
12763
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12764

    
12765
    self.needed_locks = {
12766
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12767
      }
12768

    
12769
  def CheckPrereq(self):
12770
    """Check prerequisites.
12771

12772
    Ensures requested new name is not yet used.
12773

12774
    """
12775
    try:
12776
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12777
    except errors.OpPrereqError:
12778
      pass
12779
    else:
12780
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12781
                                 " node group (UUID: %s)" %
12782
                                 (self.op.new_name, new_name_uuid),
12783
                                 errors.ECODE_EXISTS)
12784

    
12785
  def BuildHooksEnv(self):
12786
    """Build hooks env.
12787

12788
    """
12789
    return {
12790
      "OLD_NAME": self.op.group_name,
12791
      "NEW_NAME": self.op.new_name,
12792
      }
12793

    
12794
  def BuildHooksNodes(self):
12795
    """Build hooks nodes.
12796

12797
    """
12798
    mn = self.cfg.GetMasterNode()
12799

    
12800
    all_nodes = self.cfg.GetAllNodesInfo()
12801
    all_nodes.pop(mn, None)
12802

    
12803
    run_nodes = [mn]
12804
    run_nodes.extend(node.name for node in all_nodes.values()
12805
                     if node.group == self.group_uuid)
12806

    
12807
    return (run_nodes, run_nodes)
12808

    
12809
  def Exec(self, feedback_fn):
12810
    """Rename the node group.
12811

12812
    """
12813
    group = self.cfg.GetNodeGroup(self.group_uuid)
12814

    
12815
    if group is None:
12816
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12817
                               (self.op.group_name, self.group_uuid))
12818

    
12819
    group.name = self.op.new_name
12820
    self.cfg.Update(group, feedback_fn)
12821

    
12822
    return self.op.new_name
12823

    
12824

    
12825
class LUGroupEvacuate(LogicalUnit):
12826
  HPATH = "group-evacuate"
12827
  HTYPE = constants.HTYPE_GROUP
12828
  REQ_BGL = False
12829

    
12830
  def ExpandNames(self):
12831
    # This raises errors.OpPrereqError on its own:
12832
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12833

    
12834
    if self.op.target_groups:
12835
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12836
                                  self.op.target_groups)
12837
    else:
12838
      self.req_target_uuids = []
12839

    
12840
    if self.group_uuid in self.req_target_uuids:
12841
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12842
                                 " as a target group (targets are %s)" %
12843
                                 (self.group_uuid,
12844
                                  utils.CommaJoin(self.req_target_uuids)),
12845
                                 errors.ECODE_INVAL)
12846

    
12847
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12848

    
12849
    self.share_locks = _ShareAll()
12850
    self.needed_locks = {
12851
      locking.LEVEL_INSTANCE: [],
12852
      locking.LEVEL_NODEGROUP: [],
12853
      locking.LEVEL_NODE: [],
12854
      }
12855

    
12856
  def DeclareLocks(self, level):
12857
    if level == locking.LEVEL_INSTANCE:
12858
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12859

    
12860
      # Lock instances optimistically, needs verification once node and group
12861
      # locks have been acquired
12862
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12863
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12864

    
12865
    elif level == locking.LEVEL_NODEGROUP:
12866
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12867

    
12868
      if self.req_target_uuids:
12869
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12870

    
12871
        # Lock all groups used by instances optimistically; this requires going
12872
        # via the node before it's locked, requiring verification later on
12873
        lock_groups.update(group_uuid
12874
                           for instance_name in
12875
                             self.owned_locks(locking.LEVEL_INSTANCE)
12876
                           for group_uuid in
12877
                             self.cfg.GetInstanceNodeGroups(instance_name))
12878
      else:
12879
        # No target groups, need to lock all of them
12880
        lock_groups = locking.ALL_SET
12881

    
12882
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12883

    
12884
    elif level == locking.LEVEL_NODE:
12885
      # This will only lock the nodes in the group to be evacuated which
12886
      # contain actual instances
12887
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12888
      self._LockInstancesNodes()
12889

    
12890
      # Lock all nodes in group to be evacuated and target groups
12891
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12892
      assert self.group_uuid in owned_groups
12893
      member_nodes = [node_name
12894
                      for group in owned_groups
12895
                      for node_name in self.cfg.GetNodeGroup(group).members]
12896
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12897

    
12898
  def CheckPrereq(self):
12899
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12900
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12901
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12902

    
12903
    assert owned_groups.issuperset(self.req_target_uuids)
12904
    assert self.group_uuid in owned_groups
12905

    
12906
    # Check if locked instances are still correct
12907
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12908

    
12909
    # Get instance information
12910
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12911

    
12912
    # Check if node groups for locked instances are still correct
12913
    for instance_name in owned_instances:
12914
      inst = self.instances[instance_name]
12915
      assert owned_nodes.issuperset(inst.all_nodes), \
12916
        "Instance %s's nodes changed while we kept the lock" % instance_name
12917

    
12918
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12919
                                             owned_groups)
12920

    
12921
      assert self.group_uuid in inst_groups, \
12922
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12923

    
12924
    if self.req_target_uuids:
12925
      # User requested specific target groups
12926
      self.target_uuids = self.req_target_uuids
12927
    else:
12928
      # All groups except the one to be evacuated are potential targets
12929
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12930
                           if group_uuid != self.group_uuid]
12931

    
12932
      if not self.target_uuids:
12933
        raise errors.OpPrereqError("There are no possible target groups",
12934
                                   errors.ECODE_INVAL)
12935

    
12936
  def BuildHooksEnv(self):
12937
    """Build hooks env.
12938

12939
    """
12940
    return {
12941
      "GROUP_NAME": self.op.group_name,
12942
      "TARGET_GROUPS": " ".join(self.target_uuids),
12943
      }
12944

    
12945
  def BuildHooksNodes(self):
12946
    """Build hooks nodes.
12947

12948
    """
12949
    mn = self.cfg.GetMasterNode()
12950

    
12951
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12952

    
12953
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12954

    
12955
    return (run_nodes, run_nodes)
12956

    
12957
  def Exec(self, feedback_fn):
12958
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12959

    
12960
    assert self.group_uuid not in self.target_uuids
12961

    
12962
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12963
                     instances=instances, target_groups=self.target_uuids)
12964

    
12965
    ial.Run(self.op.iallocator)
12966

    
12967
    if not ial.success:
12968
      raise errors.OpPrereqError("Can't compute group evacuation using"
12969
                                 " iallocator '%s': %s" %
12970
                                 (self.op.iallocator, ial.info),
12971
                                 errors.ECODE_NORES)
12972

    
12973
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12974

    
12975
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12976
                 len(jobs), self.op.group_name)
12977

    
12978
    return ResultWithJobs(jobs)
12979

    
12980

    
12981
class TagsLU(NoHooksLU): # pylint: disable=W0223
12982
  """Generic tags LU.
12983

12984
  This is an abstract class which is the parent of all the other tags LUs.
12985

12986
  """
12987
  def ExpandNames(self):
12988
    self.group_uuid = None
12989
    self.needed_locks = {}
12990
    if self.op.kind == constants.TAG_NODE:
12991
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12992
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12993
    elif self.op.kind == constants.TAG_INSTANCE:
12994
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12995
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12996
    elif self.op.kind == constants.TAG_NODEGROUP:
12997
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12998

    
12999
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13000
    # not possible to acquire the BGL based on opcode parameters)
13001

    
13002
  def CheckPrereq(self):
13003
    """Check prerequisites.
13004

13005
    """
13006
    if self.op.kind == constants.TAG_CLUSTER:
13007
      self.target = self.cfg.GetClusterInfo()
13008
    elif self.op.kind == constants.TAG_NODE:
13009
      self.target = self.cfg.GetNodeInfo(self.op.name)
13010
    elif self.op.kind == constants.TAG_INSTANCE:
13011
      self.target = self.cfg.GetInstanceInfo(self.op.name)
13012
    elif self.op.kind == constants.TAG_NODEGROUP:
13013
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
13014
    else:
13015
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13016
                                 str(self.op.kind), errors.ECODE_INVAL)
13017

    
13018

    
13019
class LUTagsGet(TagsLU):
13020
  """Returns the tags of a given object.
13021

13022
  """
13023
  REQ_BGL = False
13024

    
13025
  def ExpandNames(self):
13026
    TagsLU.ExpandNames(self)
13027

    
13028
    # Share locks as this is only a read operation
13029
    self.share_locks = _ShareAll()
13030

    
13031
  def Exec(self, feedback_fn):
13032
    """Returns the tag list.
13033

13034
    """
13035
    return list(self.target.GetTags())
13036

    
13037

    
13038
class LUTagsSearch(NoHooksLU):
13039
  """Searches the tags for a given pattern.
13040

13041
  """
13042
  REQ_BGL = False
13043

    
13044
  def ExpandNames(self):
13045
    self.needed_locks = {}
13046

    
13047
  def CheckPrereq(self):
13048
    """Check prerequisites.
13049

13050
    This checks the pattern passed for validity by compiling it.
13051

13052
    """
13053
    try:
13054
      self.re = re.compile(self.op.pattern)
13055
    except re.error, err:
13056
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13057
                                 (self.op.pattern, err), errors.ECODE_INVAL)
13058

    
13059
  def Exec(self, feedback_fn):
13060
    """Returns the tag list.
13061

13062
    """
13063
    cfg = self.cfg
13064
    tgts = [("/cluster", cfg.GetClusterInfo())]
13065
    ilist = cfg.GetAllInstancesInfo().values()
13066
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13067
    nlist = cfg.GetAllNodesInfo().values()
13068
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13069
    tgts.extend(("/nodegroup/%s" % n.name, n)
13070
                for n in cfg.GetAllNodeGroupsInfo().values())
13071
    results = []
13072
    for path, target in tgts:
13073
      for tag in target.GetTags():
13074
        if self.re.search(tag):
13075
          results.append((path, tag))
13076
    return results
13077

    
13078

    
13079
class LUTagsSet(TagsLU):
13080
  """Sets a tag on a given object.
13081

13082
  """
13083
  REQ_BGL = False
13084

    
13085
  def CheckPrereq(self):
13086
    """Check prerequisites.
13087

13088
    This checks the type and length of the tag name and value.
13089

13090
    """
13091
    TagsLU.CheckPrereq(self)
13092
    for tag in self.op.tags:
13093
      objects.TaggableObject.ValidateTag(tag)
13094

    
13095
  def Exec(self, feedback_fn):
13096
    """Sets the tag.
13097

13098
    """
13099
    try:
13100
      for tag in self.op.tags:
13101
        self.target.AddTag(tag)
13102
    except errors.TagError, err:
13103
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13104
    self.cfg.Update(self.target, feedback_fn)
13105

    
13106

    
13107
class LUTagsDel(TagsLU):
13108
  """Delete a list of tags from a given object.
13109

13110
  """
13111
  REQ_BGL = False
13112

    
13113
  def CheckPrereq(self):
13114
    """Check prerequisites.
13115

13116
    This checks that we have the given tag.
13117

13118
    """
13119
    TagsLU.CheckPrereq(self)
13120
    for tag in self.op.tags:
13121
      objects.TaggableObject.ValidateTag(tag)
13122
    del_tags = frozenset(self.op.tags)
13123
    cur_tags = self.target.GetTags()
13124

    
13125
    diff_tags = del_tags - cur_tags
13126
    if diff_tags:
13127
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13128
      raise errors.OpPrereqError("Tag(s) %s not found" %
13129
                                 (utils.CommaJoin(diff_names), ),
13130
                                 errors.ECODE_NOENT)
13131

    
13132
  def Exec(self, feedback_fn):
13133
    """Remove the tag from the object.
13134

13135
    """
13136
    for tag in self.op.tags:
13137
      self.target.RemoveTag(tag)
13138
    self.cfg.Update(self.target, feedback_fn)
13139

    
13140

    
13141
class LUTestDelay(NoHooksLU):
13142
  """Sleep for a specified amount of time.
13143

13144
  This LU sleeps on the master and/or nodes for a specified amount of
13145
  time.
13146

13147
  """
13148
  REQ_BGL = False
13149

    
13150
  def ExpandNames(self):
13151
    """Expand names and set required locks.
13152

13153
    This expands the node list, if any.
13154

13155
    """
13156
    self.needed_locks = {}
13157
    if self.op.on_nodes:
13158
      # _GetWantedNodes can be used here, but is not always appropriate to use
13159
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13160
      # more information.
13161
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13162
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13163

    
13164
  def _TestDelay(self):
13165
    """Do the actual sleep.
13166

13167
    """
13168
    if self.op.on_master:
13169
      if not utils.TestDelay(self.op.duration):
13170
        raise errors.OpExecError("Error during master delay test")
13171
    if self.op.on_nodes:
13172
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13173
      for node, node_result in result.items():
13174
        node_result.Raise("Failure during rpc call to node %s" % node)
13175

    
13176
  def Exec(self, feedback_fn):
13177
    """Execute the test delay opcode, with the wanted repetitions.
13178

13179
    """
13180
    if self.op.repeat == 0:
13181
      self._TestDelay()
13182
    else:
13183
      top_value = self.op.repeat - 1
13184
      for i in range(self.op.repeat):
13185
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13186
        self._TestDelay()
13187

    
13188

    
13189
class LUTestJqueue(NoHooksLU):
13190
  """Utility LU to test some aspects of the job queue.
13191

13192
  """
13193
  REQ_BGL = False
13194

    
13195
  # Must be lower than default timeout for WaitForJobChange to see whether it
13196
  # notices changed jobs
13197
  _CLIENT_CONNECT_TIMEOUT = 20.0
13198
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13199

    
13200
  @classmethod
13201
  def _NotifyUsingSocket(cls, cb, errcls):
13202
    """Opens a Unix socket and waits for another program to connect.
13203

13204
    @type cb: callable
13205
    @param cb: Callback to send socket name to client
13206
    @type errcls: class
13207
    @param errcls: Exception class to use for errors
13208

13209
    """
13210
    # Using a temporary directory as there's no easy way to create temporary
13211
    # sockets without writing a custom loop around tempfile.mktemp and
13212
    # socket.bind
13213
    tmpdir = tempfile.mkdtemp()
13214
    try:
13215
      tmpsock = utils.PathJoin(tmpdir, "sock")
13216

    
13217
      logging.debug("Creating temporary socket at %s", tmpsock)
13218
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13219
      try:
13220
        sock.bind(tmpsock)
13221
        sock.listen(1)
13222

    
13223
        # Send details to client
13224
        cb(tmpsock)
13225

    
13226
        # Wait for client to connect before continuing
13227
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13228
        try:
13229
          (conn, _) = sock.accept()
13230
        except socket.error, err:
13231
          raise errcls("Client didn't connect in time (%s)" % err)
13232
      finally:
13233
        sock.close()
13234
    finally:
13235
      # Remove as soon as client is connected
13236
      shutil.rmtree(tmpdir)
13237

    
13238
    # Wait for client to close
13239
    try:
13240
      try:
13241
        # pylint: disable=E1101
13242
        # Instance of '_socketobject' has no ... member
13243
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13244
        conn.recv(1)
13245
      except socket.error, err:
13246
        raise errcls("Client failed to confirm notification (%s)" % err)
13247
    finally:
13248
      conn.close()
13249

    
13250
  def _SendNotification(self, test, arg, sockname):
13251
    """Sends a notification to the client.
13252

13253
    @type test: string
13254
    @param test: Test name
13255
    @param arg: Test argument (depends on test)
13256
    @type sockname: string
13257
    @param sockname: Socket path
13258

13259
    """
13260
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13261

    
13262
  def _Notify(self, prereq, test, arg):
13263
    """Notifies the client of a test.
13264

13265
    @type prereq: bool
13266
    @param prereq: Whether this is a prereq-phase test
13267
    @type test: string
13268
    @param test: Test name
13269
    @param arg: Test argument (depends on test)
13270

13271
    """
13272
    if prereq:
13273
      errcls = errors.OpPrereqError
13274
    else:
13275
      errcls = errors.OpExecError
13276

    
13277
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13278
                                                  test, arg),
13279
                                   errcls)
13280

    
13281
  def CheckArguments(self):
13282
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13283
    self.expandnames_calls = 0
13284

    
13285
  def ExpandNames(self):
13286
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13287
    if checkargs_calls < 1:
13288
      raise errors.ProgrammerError("CheckArguments was not called")
13289

    
13290
    self.expandnames_calls += 1
13291

    
13292
    if self.op.notify_waitlock:
13293
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13294

    
13295
    self.LogInfo("Expanding names")
13296

    
13297
    # Get lock on master node (just to get a lock, not for a particular reason)
13298
    self.needed_locks = {
13299
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13300
      }
13301

    
13302
  def Exec(self, feedback_fn):
13303
    if self.expandnames_calls < 1:
13304
      raise errors.ProgrammerError("ExpandNames was not called")
13305

    
13306
    if self.op.notify_exec:
13307
      self._Notify(False, constants.JQT_EXEC, None)
13308

    
13309
    self.LogInfo("Executing")
13310

    
13311
    if self.op.log_messages:
13312
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13313
      for idx, msg in enumerate(self.op.log_messages):
13314
        self.LogInfo("Sending log message %s", idx + 1)
13315
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13316
        # Report how many test messages have been sent
13317
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13318

    
13319
    if self.op.fail:
13320
      raise errors.OpExecError("Opcode failure was requested")
13321

    
13322
    return True
13323

    
13324

    
13325
class IAllocator(object):
13326
  """IAllocator framework.
13327

13328
  An IAllocator instance has three sets of attributes:
13329
    - cfg that is needed to query the cluster
13330
    - input data (all members of the _KEYS class attribute are required)
13331
    - four buffer attributes (in|out_data|text), that represent the
13332
      input (to the external script) in text and data structure format,
13333
      and the output from it, again in two formats
13334
    - the result variables from the script (success, info, nodes) for
13335
      easy usage
13336

13337
  """
13338
  # pylint: disable=R0902
13339
  # lots of instance attributes
13340

    
13341
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13342
    self.cfg = cfg
13343
    self.rpc = rpc_runner
13344
    # init buffer variables
13345
    self.in_text = self.out_text = self.in_data = self.out_data = None
13346
    # init all input fields so that pylint is happy
13347
    self.mode = mode
13348
    self.memory = self.disks = self.disk_template = None
13349
    self.os = self.tags = self.nics = self.vcpus = None
13350
    self.hypervisor = None
13351
    self.relocate_from = None
13352
    self.name = None
13353
    self.instances = None
13354
    self.evac_mode = None
13355
    self.target_groups = []
13356
    # computed fields
13357
    self.required_nodes = None
13358
    # init result fields
13359
    self.success = self.info = self.result = None
13360

    
13361
    try:
13362
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13363
    except KeyError:
13364
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13365
                                   " IAllocator" % self.mode)
13366

    
13367
    keyset = [n for (n, _) in keydata]
13368

    
13369
    for key in kwargs:
13370
      if key not in keyset:
13371
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13372
                                     " IAllocator" % key)
13373
      setattr(self, key, kwargs[key])
13374

    
13375
    for key in keyset:
13376
      if key not in kwargs:
13377
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13378
                                     " IAllocator" % key)
13379
    self._BuildInputData(compat.partial(fn, self), keydata)
13380

    
13381
  def _ComputeClusterData(self):
13382
    """Compute the generic allocator input data.
13383

13384
    This is the data that is independent of the actual operation.
13385

13386
    """
13387
    cfg = self.cfg
13388
    cluster_info = cfg.GetClusterInfo()
13389
    # cluster data
13390
    data = {
13391
      "version": constants.IALLOCATOR_VERSION,
13392
      "cluster_name": cfg.GetClusterName(),
13393
      "cluster_tags": list(cluster_info.GetTags()),
13394
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13395
      # we don't have job IDs
13396
      }
13397
    ninfo = cfg.GetAllNodesInfo()
13398
    iinfo = cfg.GetAllInstancesInfo().values()
13399
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13400

    
13401
    # node data
13402
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13403

    
13404
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13405
      hypervisor_name = self.hypervisor
13406
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13407
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13408
    else:
13409
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13410

    
13411
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13412
                                        hypervisor_name)
13413
    node_iinfo = \
13414
      self.rpc.call_all_instances_info(node_list,
13415
                                       cluster_info.enabled_hypervisors)
13416

    
13417
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13418

    
13419
    config_ndata = self._ComputeBasicNodeData(ninfo)
13420
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13421
                                                 i_list, config_ndata)
13422
    assert len(data["nodes"]) == len(ninfo), \
13423
        "Incomplete node data computed"
13424

    
13425
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13426

    
13427
    self.in_data = data
13428

    
13429
  @staticmethod
13430
  def _ComputeNodeGroupData(cfg):
13431
    """Compute node groups data.
13432

13433
    """
13434
    ng = dict((guuid, {
13435
      "name": gdata.name,
13436
      "alloc_policy": gdata.alloc_policy,
13437
      })
13438
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13439

    
13440
    return ng
13441

    
13442
  @staticmethod
13443
  def _ComputeBasicNodeData(node_cfg):
13444
    """Compute global node data.
13445

13446
    @rtype: dict
13447
    @returns: a dict of name: (node dict, node config)
13448

13449
    """
13450
    # fill in static (config-based) values
13451
    node_results = dict((ninfo.name, {
13452
      "tags": list(ninfo.GetTags()),
13453
      "primary_ip": ninfo.primary_ip,
13454
      "secondary_ip": ninfo.secondary_ip,
13455
      "offline": ninfo.offline,
13456
      "drained": ninfo.drained,
13457
      "master_candidate": ninfo.master_candidate,
13458
      "group": ninfo.group,
13459
      "master_capable": ninfo.master_capable,
13460
      "vm_capable": ninfo.vm_capable,
13461
      })
13462
      for ninfo in node_cfg.values())
13463

    
13464
    return node_results
13465

    
13466
  @staticmethod
13467
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13468
                              node_results):
13469
    """Compute global node data.
13470

13471
    @param node_results: the basic node structures as filled from the config
13472

13473
    """
13474
    # make a copy of the current dict
13475
    node_results = dict(node_results)
13476
    for nname, nresult in node_data.items():
13477
      assert nname in node_results, "Missing basic data for node %s" % nname
13478
      ninfo = node_cfg[nname]
13479

    
13480
      if not (ninfo.offline or ninfo.drained):
13481
        nresult.Raise("Can't get data for node %s" % nname)
13482
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13483
                                nname)
13484
        remote_info = nresult.payload
13485

    
13486
        for attr in ["memory_total", "memory_free", "memory_dom0",
13487
                     "vg_size", "vg_free", "cpu_total"]:
13488
          if attr not in remote_info:
13489
            raise errors.OpExecError("Node '%s' didn't return attribute"
13490
                                     " '%s'" % (nname, attr))
13491
          if not isinstance(remote_info[attr], int):
13492
            raise errors.OpExecError("Node '%s' returned invalid value"
13493
                                     " for '%s': %s" %
13494
                                     (nname, attr, remote_info[attr]))
13495
        # compute memory used by primary instances
13496
        i_p_mem = i_p_up_mem = 0
13497
        for iinfo, beinfo in i_list:
13498
          if iinfo.primary_node == nname:
13499
            i_p_mem += beinfo[constants.BE_MEMORY]
13500
            if iinfo.name not in node_iinfo[nname].payload:
13501
              i_used_mem = 0
13502
            else:
13503
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13504
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13505
            remote_info["memory_free"] -= max(0, i_mem_diff)
13506

    
13507
            if iinfo.admin_state == constants.ADMINST_UP:
13508
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13509

    
13510
        # compute memory used by instances
13511
        pnr_dyn = {
13512
          "total_memory": remote_info["memory_total"],
13513
          "reserved_memory": remote_info["memory_dom0"],
13514
          "free_memory": remote_info["memory_free"],
13515
          "total_disk": remote_info["vg_size"],
13516
          "free_disk": remote_info["vg_free"],
13517
          "total_cpus": remote_info["cpu_total"],
13518
          "i_pri_memory": i_p_mem,
13519
          "i_pri_up_memory": i_p_up_mem,
13520
          }
13521
        pnr_dyn.update(node_results[nname])
13522
        node_results[nname] = pnr_dyn
13523

    
13524
    return node_results
13525

    
13526
  @staticmethod
13527
  def _ComputeInstanceData(cluster_info, i_list):
13528
    """Compute global instance data.
13529

13530
    """
13531
    instance_data = {}
13532
    for iinfo, beinfo in i_list:
13533
      nic_data = []
13534
      for nic in iinfo.nics:
13535
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13536
        nic_dict = {
13537
          "mac": nic.mac,
13538
          "ip": nic.ip,
13539
          "mode": filled_params[constants.NIC_MODE],
13540
          "link": filled_params[constants.NIC_LINK],
13541
          }
13542
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13543
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13544
        nic_data.append(nic_dict)
13545
      pir = {
13546
        "tags": list(iinfo.GetTags()),
13547
        "admin_state": iinfo.admin_state,
13548
        "vcpus": beinfo[constants.BE_VCPUS],
13549
        "memory": beinfo[constants.BE_MEMORY],
13550
        "os": iinfo.os,
13551
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13552
        "nics": nic_data,
13553
        "disks": [{constants.IDISK_SIZE: dsk.size,
13554
                   constants.IDISK_MODE: dsk.mode}
13555
                  for dsk in iinfo.disks],
13556
        "disk_template": iinfo.disk_template,
13557
        "hypervisor": iinfo.hypervisor,
13558
        }
13559
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13560
                                                 pir["disks"])
13561
      instance_data[iinfo.name] = pir
13562

    
13563
    return instance_data
13564

    
13565
  def _AddNewInstance(self):
13566
    """Add new instance data to allocator structure.
13567

13568
    This in combination with _AllocatorGetClusterData will create the
13569
    correct structure needed as input for the allocator.
13570

13571
    The checks for the completeness of the opcode must have already been
13572
    done.
13573

13574
    """
13575
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13576

    
13577
    if self.disk_template in constants.DTS_INT_MIRROR:
13578
      self.required_nodes = 2
13579
    else:
13580
      self.required_nodes = 1
13581

    
13582
    request = {
13583
      "name": self.name,
13584
      "disk_template": self.disk_template,
13585
      "tags": self.tags,
13586
      "os": self.os,
13587
      "vcpus": self.vcpus,
13588
      "memory": self.memory,
13589
      "disks": self.disks,
13590
      "disk_space_total": disk_space,
13591
      "nics": self.nics,
13592
      "required_nodes": self.required_nodes,
13593
      "hypervisor": self.hypervisor,
13594
      }
13595

    
13596
    return request
13597

    
13598
  def _AddRelocateInstance(self):
13599
    """Add relocate instance data to allocator structure.
13600

13601
    This in combination with _IAllocatorGetClusterData will create the
13602
    correct structure needed as input for the allocator.
13603

13604
    The checks for the completeness of the opcode must have already been
13605
    done.
13606

13607
    """
13608
    instance = self.cfg.GetInstanceInfo(self.name)
13609
    if instance is None:
13610
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13611
                                   " IAllocator" % self.name)
13612

    
13613
    if instance.disk_template not in constants.DTS_MIRRORED:
13614
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13615
                                 errors.ECODE_INVAL)
13616

    
13617
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13618
        len(instance.secondary_nodes) != 1:
13619
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13620
                                 errors.ECODE_STATE)
13621

    
13622
    self.required_nodes = 1
13623
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13624
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13625

    
13626
    request = {
13627
      "name": self.name,
13628
      "disk_space_total": disk_space,
13629
      "required_nodes": self.required_nodes,
13630
      "relocate_from": self.relocate_from,
13631
      }
13632
    return request
13633

    
13634
  def _AddNodeEvacuate(self):
13635
    """Get data for node-evacuate requests.
13636

13637
    """
13638
    return {
13639
      "instances": self.instances,
13640
      "evac_mode": self.evac_mode,
13641
      }
13642

    
13643
  def _AddChangeGroup(self):
13644
    """Get data for node-evacuate requests.
13645

13646
    """
13647
    return {
13648
      "instances": self.instances,
13649
      "target_groups": self.target_groups,
13650
      }
13651

    
13652
  def _BuildInputData(self, fn, keydata):
13653
    """Build input data structures.
13654

13655
    """
13656
    self._ComputeClusterData()
13657

    
13658
    request = fn()
13659
    request["type"] = self.mode
13660
    for keyname, keytype in keydata:
13661
      if keyname not in request:
13662
        raise errors.ProgrammerError("Request parameter %s is missing" %
13663
                                     keyname)
13664
      val = request[keyname]
13665
      if not keytype(val):
13666
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13667
                                     " validation, value %s, expected"
13668
                                     " type %s" % (keyname, val, keytype))
13669
    self.in_data["request"] = request
13670

    
13671
    self.in_text = serializer.Dump(self.in_data)
13672

    
13673
  _STRING_LIST = ht.TListOf(ht.TString)
13674
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13675
     # pylint: disable=E1101
13676
     # Class '...' has no 'OP_ID' member
13677
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13678
                          opcodes.OpInstanceMigrate.OP_ID,
13679
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13680
     })))
13681

    
13682
  _NEVAC_MOVED = \
13683
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13684
                       ht.TItems([ht.TNonEmptyString,
13685
                                  ht.TNonEmptyString,
13686
                                  ht.TListOf(ht.TNonEmptyString),
13687
                                 ])))
13688
  _NEVAC_FAILED = \
13689
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13690
                       ht.TItems([ht.TNonEmptyString,
13691
                                  ht.TMaybeString,
13692
                                 ])))
13693
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13694
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13695

    
13696
  _MODE_DATA = {
13697
    constants.IALLOCATOR_MODE_ALLOC:
13698
      (_AddNewInstance,
13699
       [
13700
        ("name", ht.TString),
13701
        ("memory", ht.TInt),
13702
        ("disks", ht.TListOf(ht.TDict)),
13703
        ("disk_template", ht.TString),
13704
        ("os", ht.TString),
13705
        ("tags", _STRING_LIST),
13706
        ("nics", ht.TListOf(ht.TDict)),
13707
        ("vcpus", ht.TInt),
13708
        ("hypervisor", ht.TString),
13709
        ], ht.TList),
13710
    constants.IALLOCATOR_MODE_RELOC:
13711
      (_AddRelocateInstance,
13712
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13713
       ht.TList),
13714
     constants.IALLOCATOR_MODE_NODE_EVAC:
13715
      (_AddNodeEvacuate, [
13716
        ("instances", _STRING_LIST),
13717
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13718
        ], _NEVAC_RESULT),
13719
     constants.IALLOCATOR_MODE_CHG_GROUP:
13720
      (_AddChangeGroup, [
13721
        ("instances", _STRING_LIST),
13722
        ("target_groups", _STRING_LIST),
13723
        ], _NEVAC_RESULT),
13724
    }
13725

    
13726
  def Run(self, name, validate=True, call_fn=None):
13727
    """Run an instance allocator and return the results.
13728

13729
    """
13730
    if call_fn is None:
13731
      call_fn = self.rpc.call_iallocator_runner
13732

    
13733
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13734
    result.Raise("Failure while running the iallocator script")
13735

    
13736
    self.out_text = result.payload
13737
    if validate:
13738
      self._ValidateResult()
13739

    
13740
  def _ValidateResult(self):
13741
    """Process the allocator results.
13742

13743
    This will process and if successful save the result in
13744
    self.out_data and the other parameters.
13745

13746
    """
13747
    try:
13748
      rdict = serializer.Load(self.out_text)
13749
    except Exception, err:
13750
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13751

    
13752
    if not isinstance(rdict, dict):
13753
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13754

    
13755
    # TODO: remove backwards compatiblity in later versions
13756
    if "nodes" in rdict and "result" not in rdict:
13757
      rdict["result"] = rdict["nodes"]
13758
      del rdict["nodes"]
13759

    
13760
    for key in "success", "info", "result":
13761
      if key not in rdict:
13762
        raise errors.OpExecError("Can't parse iallocator results:"
13763
                                 " missing key '%s'" % key)
13764
      setattr(self, key, rdict[key])
13765

    
13766
    if not self._result_check(self.result):
13767
      raise errors.OpExecError("Iallocator returned invalid result,"
13768
                               " expected %s, got %s" %
13769
                               (self._result_check, self.result),
13770
                               errors.ECODE_INVAL)
13771

    
13772
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13773
      assert self.relocate_from is not None
13774
      assert self.required_nodes == 1
13775

    
13776
      node2group = dict((name, ndata["group"])
13777
                        for (name, ndata) in self.in_data["nodes"].items())
13778

    
13779
      fn = compat.partial(self._NodesToGroups, node2group,
13780
                          self.in_data["nodegroups"])
13781

    
13782
      instance = self.cfg.GetInstanceInfo(self.name)
13783
      request_groups = fn(self.relocate_from + [instance.primary_node])
13784
      result_groups = fn(rdict["result"] + [instance.primary_node])
13785

    
13786
      if self.success and not set(result_groups).issubset(request_groups):
13787
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13788
                                 " differ from original groups (%s)" %
13789
                                 (utils.CommaJoin(result_groups),
13790
                                  utils.CommaJoin(request_groups)))
13791

    
13792
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13793
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13794

    
13795
    self.out_data = rdict
13796

    
13797
  @staticmethod
13798
  def _NodesToGroups(node2group, groups, nodes):
13799
    """Returns a list of unique group names for a list of nodes.
13800

13801
    @type node2group: dict
13802
    @param node2group: Map from node name to group UUID
13803
    @type groups: dict
13804
    @param groups: Group information
13805
    @type nodes: list
13806
    @param nodes: Node names
13807

13808
    """
13809
    result = set()
13810

    
13811
    for node in nodes:
13812
      try:
13813
        group_uuid = node2group[node]
13814
      except KeyError:
13815
        # Ignore unknown node
13816
        pass
13817
      else:
13818
        try:
13819
          group = groups[group_uuid]
13820
        except KeyError:
13821
          # Can't find group, let's use UUID
13822
          group_name = group_uuid
13823
        else:
13824
          group_name = group["name"]
13825

    
13826
        result.add(group_name)
13827

    
13828
    return sorted(result)
13829

    
13830

    
13831
class LUTestAllocator(NoHooksLU):
13832
  """Run allocator tests.
13833

13834
  This LU runs the allocator tests
13835

13836
  """
13837
  def CheckPrereq(self):
13838
    """Check prerequisites.
13839

13840
    This checks the opcode parameters depending on the director and mode test.
13841

13842
    """
13843
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13844
      for attr in ["memory", "disks", "disk_template",
13845
                   "os", "tags", "nics", "vcpus"]:
13846
        if not hasattr(self.op, attr):
13847
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13848
                                     attr, errors.ECODE_INVAL)
13849
      iname = self.cfg.ExpandInstanceName(self.op.name)
13850
      if iname is not None:
13851
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13852
                                   iname, errors.ECODE_EXISTS)
13853
      if not isinstance(self.op.nics, list):
13854
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13855
                                   errors.ECODE_INVAL)
13856
      if not isinstance(self.op.disks, list):
13857
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13858
                                   errors.ECODE_INVAL)
13859
      for row in self.op.disks:
13860
        if (not isinstance(row, dict) or
13861
            constants.IDISK_SIZE not in row or
13862
            not isinstance(row[constants.IDISK_SIZE], int) or
13863
            constants.IDISK_MODE not in row or
13864
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13865
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13866
                                     " parameter", errors.ECODE_INVAL)
13867
      if self.op.hypervisor is None:
13868
        self.op.hypervisor = self.cfg.GetHypervisorType()
13869
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13870
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13871
      self.op.name = fname
13872
      self.relocate_from = \
13873
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13874
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13875
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13876
      if not self.op.instances:
13877
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13878
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13879
    else:
13880
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13881
                                 self.op.mode, errors.ECODE_INVAL)
13882

    
13883
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13884
      if self.op.allocator is None:
13885
        raise errors.OpPrereqError("Missing allocator name",
13886
                                   errors.ECODE_INVAL)
13887
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13888
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13889
                                 self.op.direction, errors.ECODE_INVAL)
13890

    
13891
  def Exec(self, feedback_fn):
13892
    """Run the allocator test.
13893

13894
    """
13895
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13896
      ial = IAllocator(self.cfg, self.rpc,
13897
                       mode=self.op.mode,
13898
                       name=self.op.name,
13899
                       memory=self.op.memory,
13900
                       disks=self.op.disks,
13901
                       disk_template=self.op.disk_template,
13902
                       os=self.op.os,
13903
                       tags=self.op.tags,
13904
                       nics=self.op.nics,
13905
                       vcpus=self.op.vcpus,
13906
                       hypervisor=self.op.hypervisor,
13907
                       )
13908
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13909
      ial = IAllocator(self.cfg, self.rpc,
13910
                       mode=self.op.mode,
13911
                       name=self.op.name,
13912
                       relocate_from=list(self.relocate_from),
13913
                       )
13914
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13915
      ial = IAllocator(self.cfg, self.rpc,
13916
                       mode=self.op.mode,
13917
                       instances=self.op.instances,
13918
                       target_groups=self.op.target_groups)
13919
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13920
      ial = IAllocator(self.cfg, self.rpc,
13921
                       mode=self.op.mode,
13922
                       instances=self.op.instances,
13923
                       evac_mode=self.op.evac_mode)
13924
    else:
13925
      raise errors.ProgrammerError("Uncatched mode %s in"
13926
                                   " LUTestAllocator.Exec", self.op.mode)
13927

    
13928
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13929
      result = ial.in_text
13930
    else:
13931
      ial.Run(self.op.allocator, validate=False)
13932
      result = ial.out_text
13933
    return result
13934

    
13935

    
13936
#: Query type implementations
13937
_QUERY_IMPL = {
13938
  constants.QR_INSTANCE: _InstanceQuery,
13939
  constants.QR_NODE: _NodeQuery,
13940
  constants.QR_GROUP: _GroupQuery,
13941
  constants.QR_OS: _OsQuery,
13942
  }
13943

    
13944
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13945

    
13946

    
13947
def _GetQueryImplementation(name):
13948
  """Returns the implemtnation for a query type.
13949

13950
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13951

13952
  """
13953
  try:
13954
    return _QUERY_IMPL[name]
13955
  except KeyError:
13956
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13957
                               errors.ECODE_INVAL)