Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ edea391e

History | View | Annotate | Download (495.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70
# States of instance
71
INSTANCE_UP = [constants.ADMINST_UP]
72
INSTANCE_DOWN = [constants.ADMINST_DOWN]
73
INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc_runner):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    # readability alias
135
    self.owned_locks = context.glm.list_owned
136
    self.context = context
137
    self.rpc = rpc_runner
138
    # Dicts used to declare locking needs to mcpu
139
    self.needed_locks = None
140
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141
    self.add_locks = {}
142
    self.remove_locks = {}
143
    # Used to force good behavior when calling helper functions
144
    self.recalculate_locks = {}
145
    # logging
146
    self.Log = processor.Log # pylint: disable=C0103
147
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
148
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
149
    self.LogStep = processor.LogStep # pylint: disable=C0103
150
    # support for dry-run
151
    self.dry_run_result = None
152
    # support for generic debug attribute
153
    if (not hasattr(self.op, "debug_level") or
154
        not isinstance(self.op.debug_level, int)):
155
      self.op.debug_level = 0
156

    
157
    # Tasklets
158
    self.tasklets = None
159

    
160
    # Validate opcode parameters and set defaults
161
    self.op.Validate(True)
162

    
163
    self.CheckArguments()
164

    
165
  def CheckArguments(self):
166
    """Check syntactic validity for the opcode arguments.
167

168
    This method is for doing a simple syntactic check and ensure
169
    validity of opcode parameters, without any cluster-related
170
    checks. While the same can be accomplished in ExpandNames and/or
171
    CheckPrereq, doing these separate is better because:
172

173
      - ExpandNames is left as as purely a lock-related function
174
      - CheckPrereq is run after we have acquired locks (and possible
175
        waited for them)
176

177
    The function is allowed to change the self.op attribute so that
178
    later methods can no longer worry about missing parameters.
179

180
    """
181
    pass
182

    
183
  def ExpandNames(self):
184
    """Expand names for this LU.
185

186
    This method is called before starting to execute the opcode, and it should
187
    update all the parameters of the opcode to their canonical form (e.g. a
188
    short node name must be fully expanded after this method has successfully
189
    completed). This way locking, hooks, logging, etc. can work correctly.
190

191
    LUs which implement this method must also populate the self.needed_locks
192
    member, as a dict with lock levels as keys, and a list of needed lock names
193
    as values. Rules:
194

195
      - use an empty dict if you don't need any lock
196
      - if you don't need any lock at a particular level omit that level
197
      - don't put anything for the BGL level
198
      - if you want all locks at a level use locking.ALL_SET as a value
199

200
    If you need to share locks (rather than acquire them exclusively) at one
201
    level you can modify self.share_locks, setting a true value (usually 1) for
202
    that level. By default locks are not shared.
203

204
    This function can also define a list of tasklets, which then will be
205
    executed in order instead of the usual LU-level CheckPrereq and Exec
206
    functions, if those are not defined by the LU.
207

208
    Examples::
209

210
      # Acquire all nodes and one instance
211
      self.needed_locks = {
212
        locking.LEVEL_NODE: locking.ALL_SET,
213
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
214
      }
215
      # Acquire just two nodes
216
      self.needed_locks = {
217
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218
      }
219
      # Acquire no locks
220
      self.needed_locks = {} # No, you can't leave it to the default value None
221

222
    """
223
    # The implementation of this method is mandatory only if the new LU is
224
    # concurrent, so that old LUs don't need to be changed all at the same
225
    # time.
226
    if self.REQ_BGL:
227
      self.needed_locks = {} # Exclusive LUs don't need locks.
228
    else:
229
      raise NotImplementedError
230

    
231
  def DeclareLocks(self, level):
232
    """Declare LU locking needs for a level
233

234
    While most LUs can just declare their locking needs at ExpandNames time,
235
    sometimes there's the need to calculate some locks after having acquired
236
    the ones before. This function is called just before acquiring locks at a
237
    particular level, but after acquiring the ones at lower levels, and permits
238
    such calculations. It can be used to modify self.needed_locks, and by
239
    default it does nothing.
240

241
    This function is only called if you have something already set in
242
    self.needed_locks for the level.
243

244
    @param level: Locking level which is going to be locked
245
    @type level: member of ganeti.locking.LEVELS
246

247
    """
248

    
249
  def CheckPrereq(self):
250
    """Check prerequisites for this LU.
251

252
    This method should check that the prerequisites for the execution
253
    of this LU are fulfilled. It can do internode communication, but
254
    it should be idempotent - no cluster or system changes are
255
    allowed.
256

257
    The method should raise errors.OpPrereqError in case something is
258
    not fulfilled. Its return value is ignored.
259

260
    This method should also update all the parameters of the opcode to
261
    their canonical form if it hasn't been done by ExpandNames before.
262

263
    """
264
    if self.tasklets is not None:
265
      for (idx, tl) in enumerate(self.tasklets):
266
        logging.debug("Checking prerequisites for tasklet %s/%s",
267
                      idx + 1, len(self.tasklets))
268
        tl.CheckPrereq()
269
    else:
270
      pass
271

    
272
  def Exec(self, feedback_fn):
273
    """Execute the LU.
274

275
    This method should implement the actual work. It should raise
276
    errors.OpExecError for failures that are somewhat dealt with in
277
    code, or expected.
278

279
    """
280
    if self.tasklets is not None:
281
      for (idx, tl) in enumerate(self.tasklets):
282
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283
        tl.Exec(feedback_fn)
284
    else:
285
      raise NotImplementedError
286

    
287
  def BuildHooksEnv(self):
288
    """Build hooks environment for this LU.
289

290
    @rtype: dict
291
    @return: Dictionary containing the environment that will be used for
292
      running the hooks for this LU. The keys of the dict must not be prefixed
293
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294
      will extend the environment with additional variables. If no environment
295
      should be defined, an empty dictionary should be returned (not C{None}).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def BuildHooksNodes(self):
303
    """Build list of nodes to run LU's hooks.
304

305
    @rtype: tuple; (list, list)
306
    @return: Tuple containing a list of node names on which the hook
307
      should run before the execution and a list of node names on which the
308
      hook should run after the execution. No nodes should be returned as an
309
      empty list (and not None).
310
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311
      will not be called.
312

313
    """
314
    raise NotImplementedError
315

    
316
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317
    """Notify the LU about the results of its hooks.
318

319
    This method is called every time a hooks phase is executed, and notifies
320
    the Logical Unit about the hooks' result. The LU can then use it to alter
321
    its result based on the hooks.  By default the method does nothing and the
322
    previous result is passed back unchanged but any LU can define it if it
323
    wants to use the local cluster hook-scripts somehow.
324

325
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
326
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327
    @param hook_results: the results of the multi-node hooks rpc call
328
    @param feedback_fn: function used send feedback back to the caller
329
    @param lu_result: the previous Exec result this LU had, or None
330
        in the PRE phase
331
    @return: the new Exec result, based on the previous result
332
        and hook results
333

334
    """
335
    # API must be kept, thus we ignore the unused argument and could
336
    # be a function warnings
337
    # pylint: disable=W0613,R0201
338
    return lu_result
339

    
340
  def _ExpandAndLockInstance(self):
341
    """Helper function to expand and lock an instance.
342

343
    Many LUs that work on an instance take its name in self.op.instance_name
344
    and need to expand it and then declare the expanded name for locking. This
345
    function does it, and then updates self.op.instance_name to the expanded
346
    name. It also initializes needed_locks as a dict, if this hasn't been done
347
    before.
348

349
    """
350
    if self.needed_locks is None:
351
      self.needed_locks = {}
352
    else:
353
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354
        "_ExpandAndLockInstance called with instance-level locks set"
355
    self.op.instance_name = _ExpandInstanceName(self.cfg,
356
                                                self.op.instance_name)
357
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358

    
359
  def _LockInstancesNodes(self, primary_only=False,
360
                          level=locking.LEVEL_NODE):
361
    """Helper function to declare instances' nodes for locking.
362

363
    This function should be called after locking one or more instances to lock
364
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365
    with all primary or secondary nodes for instances already locked and
366
    present in self.needed_locks[locking.LEVEL_INSTANCE].
367

368
    It should be called from DeclareLocks, and for safety only works if
369
    self.recalculate_locks[locking.LEVEL_NODE] is set.
370

371
    In the future it may grow parameters to just lock some instance's nodes, or
372
    to just lock primaries or secondary nodes, if needed.
373

374
    If should be called in DeclareLocks in a way similar to::
375

376
      if level == locking.LEVEL_NODE:
377
        self._LockInstancesNodes()
378

379
    @type primary_only: boolean
380
    @param primary_only: only lock primary nodes of locked instances
381
    @param level: Which lock level to use for locking nodes
382

383
    """
384
    assert level in self.recalculate_locks, \
385
      "_LockInstancesNodes helper function called with no nodes to recalculate"
386

    
387
    # TODO: check if we're really been called with the instance locks held
388

    
389
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390
    # future we might want to have different behaviors depending on the value
391
    # of self.recalculate_locks[locking.LEVEL_NODE]
392
    wanted_nodes = []
393
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395
      wanted_nodes.append(instance.primary_node)
396
      if not primary_only:
397
        wanted_nodes.extend(instance.secondary_nodes)
398

    
399
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400
      self.needed_locks[level] = wanted_nodes
401
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402
      self.needed_locks[level].extend(wanted_nodes)
403
    else:
404
      raise errors.ProgrammerError("Unknown recalculation mode")
405

    
406
    del self.recalculate_locks[level]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
426

    
427
  def BuildHooksNodes(self):
428
    """Empty BuildHooksNodes for NoHooksLU.
429

430
    """
431
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
432

    
433

    
434
class Tasklet:
435
  """Tasklet base class.
436

437
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
439
  tasklets know nothing about locks.
440

441
  Subclasses must follow these rules:
442
    - Implement CheckPrereq
443
    - Implement Exec
444

445
  """
446
  def __init__(self, lu):
447
    self.lu = lu
448

    
449
    # Shortcuts
450
    self.cfg = lu.cfg
451
    self.rpc = lu.rpc
452

    
453
  def CheckPrereq(self):
454
    """Check prerequisites for this tasklets.
455

456
    This method should check whether the prerequisites for the execution of
457
    this tasklet are fulfilled. It can do internode communication, but it
458
    should be idempotent - no cluster or system changes are allowed.
459

460
    The method should raise errors.OpPrereqError in case something is not
461
    fulfilled. Its return value is ignored.
462

463
    This method should also update all parameters to their canonical form if it
464
    hasn't been done before.
465

466
    """
467
    pass
468

    
469
  def Exec(self, feedback_fn):
470
    """Execute the tasklet.
471

472
    This method should implement the actual work. It should raise
473
    errors.OpExecError for failures that are somewhat dealt with in code, or
474
    expected.
475

476
    """
477
    raise NotImplementedError
478

    
479

    
480
class _QueryBase:
481
  """Base for query utility classes.
482

483
  """
484
  #: Attribute holding field definitions
485
  FIELDS = None
486

    
487
  def __init__(self, qfilter, fields, use_locking):
488
    """Initializes this class.
489

490
    """
491
    self.use_locking = use_locking
492

    
493
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
494
                             namefield="name")
495
    self.requested_data = self.query.RequestedData()
496
    self.names = self.query.RequestedNames()
497

    
498
    # Sort only if no names were requested
499
    self.sort_by_name = not self.names
500

    
501
    self.do_locking = None
502
    self.wanted = None
503

    
504
  def _GetNames(self, lu, all_names, lock_level):
505
    """Helper function to determine names asked for in the query.
506

507
    """
508
    if self.do_locking:
509
      names = lu.owned_locks(lock_level)
510
    else:
511
      names = all_names
512

    
513
    if self.wanted == locking.ALL_SET:
514
      assert not self.names
515
      # caller didn't specify names, so ordering is not important
516
      return utils.NiceSort(names)
517

    
518
    # caller specified names and we must keep the same order
519
    assert self.names
520
    assert not self.do_locking or lu.glm.is_owned(lock_level)
521

    
522
    missing = set(self.wanted).difference(names)
523
    if missing:
524
      raise errors.OpExecError("Some items were removed before retrieving"
525
                               " their data: %s" % missing)
526

    
527
    # Return expanded names
528
    return self.wanted
529

    
530
  def ExpandNames(self, lu):
531
    """Expand names for this query.
532

533
    See L{LogicalUnit.ExpandNames}.
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def DeclareLocks(self, lu, level):
539
    """Declare locks for this query.
540

541
    See L{LogicalUnit.DeclareLocks}.
542

543
    """
544
    raise NotImplementedError()
545

    
546
  def _GetQueryData(self, lu):
547
    """Collects all data for this query.
548

549
    @return: Query data object
550

551
    """
552
    raise NotImplementedError()
553

    
554
  def NewStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559
                                  sort_by_name=self.sort_by_name)
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu),
566
                                    sort_by_name=self.sort_by_name)
567

    
568

    
569
def _ShareAll():
570
  """Returns a dict declaring all lock levels shared.
571

572
  """
573
  return dict.fromkeys(locking.LEVELS, 1)
574

    
575

    
576
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
577
  """Checks if the owned node groups are still correct for an instance.
578

579
  @type cfg: L{config.ConfigWriter}
580
  @param cfg: The cluster configuration
581
  @type instance_name: string
582
  @param instance_name: Instance name
583
  @type owned_groups: set or frozenset
584
  @param owned_groups: List of currently owned node groups
585

586
  """
587
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
588

    
589
  if not owned_groups.issuperset(inst_groups):
590
    raise errors.OpPrereqError("Instance %s's node groups changed since"
591
                               " locks were acquired, current groups are"
592
                               " are '%s', owning groups '%s'; retry the"
593
                               " operation" %
594
                               (instance_name,
595
                                utils.CommaJoin(inst_groups),
596
                                utils.CommaJoin(owned_groups)),
597
                               errors.ECODE_STATE)
598

    
599
  return inst_groups
600

    
601

    
602
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
603
  """Checks if the instances in a node group are still correct.
604

605
  @type cfg: L{config.ConfigWriter}
606
  @param cfg: The cluster configuration
607
  @type group_uuid: string
608
  @param group_uuid: Node group UUID
609
  @type owned_instances: set or frozenset
610
  @param owned_instances: List of currently owned instances
611

612
  """
613
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
614
  if owned_instances != wanted_instances:
615
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
616
                               " locks were acquired, wanted '%s', have '%s';"
617
                               " retry the operation" %
618
                               (group_uuid,
619
                                utils.CommaJoin(wanted_instances),
620
                                utils.CommaJoin(owned_instances)),
621
                               errors.ECODE_STATE)
622

    
623
  return wanted_instances
624

    
625

    
626
def _SupportsOob(cfg, node):
627
  """Tells if node supports OOB.
628

629
  @type cfg: L{config.ConfigWriter}
630
  @param cfg: The cluster configuration
631
  @type node: L{objects.Node}
632
  @param node: The node
633
  @return: The OOB script if supported or an empty string otherwise
634

635
  """
636
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
637

    
638

    
639
def _GetWantedNodes(lu, nodes):
640
  """Returns list of checked and expanded node names.
641

642
  @type lu: L{LogicalUnit}
643
  @param lu: the logical unit on whose behalf we execute
644
  @type nodes: list
645
  @param nodes: list of node names or None for all nodes
646
  @rtype: list
647
  @return: the list of nodes, sorted
648
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
649

650
  """
651
  if nodes:
652
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
653

    
654
  return utils.NiceSort(lu.cfg.GetNodeList())
655

    
656

    
657
def _GetWantedInstances(lu, instances):
658
  """Returns list of checked and expanded instance names.
659

660
  @type lu: L{LogicalUnit}
661
  @param lu: the logical unit on whose behalf we execute
662
  @type instances: list
663
  @param instances: list of instance names or None for all instances
664
  @rtype: list
665
  @return: the list of instances, sorted
666
  @raise errors.OpPrereqError: if the instances parameter is wrong type
667
  @raise errors.OpPrereqError: if any of the passed instances is not found
668

669
  """
670
  if instances:
671
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
672
  else:
673
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
674
  return wanted
675

    
676

    
677
def _GetUpdatedParams(old_params, update_dict,
678
                      use_default=True, use_none=False):
679
  """Return the new version of a parameter dictionary.
680

681
  @type old_params: dict
682
  @param old_params: old parameters
683
  @type update_dict: dict
684
  @param update_dict: dict containing new parameter values, or
685
      constants.VALUE_DEFAULT to reset the parameter to its default
686
      value
687
  @param use_default: boolean
688
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
689
      values as 'to be deleted' values
690
  @param use_none: boolean
691
  @type use_none: whether to recognise C{None} values as 'to be
692
      deleted' values
693
  @rtype: dict
694
  @return: the new parameter dictionary
695

696
  """
697
  params_copy = copy.deepcopy(old_params)
698
  for key, val in update_dict.iteritems():
699
    if ((use_default and val == constants.VALUE_DEFAULT) or
700
        (use_none and val is None)):
701
      try:
702
        del params_copy[key]
703
      except KeyError:
704
        pass
705
    else:
706
      params_copy[key] = val
707
  return params_copy
708

    
709

    
710
def _ReleaseLocks(lu, level, names=None, keep=None):
711
  """Releases locks owned by an LU.
712

713
  @type lu: L{LogicalUnit}
714
  @param level: Lock level
715
  @type names: list or None
716
  @param names: Names of locks to release
717
  @type keep: list or None
718
  @param keep: Names of locks to retain
719

720
  """
721
  assert not (keep is not None and names is not None), \
722
         "Only one of the 'names' and the 'keep' parameters can be given"
723

    
724
  if names is not None:
725
    should_release = names.__contains__
726
  elif keep:
727
    should_release = lambda name: name not in keep
728
  else:
729
    should_release = None
730

    
731
  owned = lu.owned_locks(level)
732
  if not owned:
733
    # Not owning any lock at this level, do nothing
734
    pass
735

    
736
  elif should_release:
737
    retain = []
738
    release = []
739

    
740
    # Determine which locks to release
741
    for name in owned:
742
      if should_release(name):
743
        release.append(name)
744
      else:
745
        retain.append(name)
746

    
747
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
748

    
749
    # Release just some locks
750
    lu.glm.release(level, names=release)
751

    
752
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
753
  else:
754
    # Release everything
755
    lu.glm.release(level)
756

    
757
    assert not lu.glm.is_owned(level), "No locks should be owned"
758

    
759

    
760
def _MapInstanceDisksToNodes(instances):
761
  """Creates a map from (node, volume) to instance name.
762

763
  @type instances: list of L{objects.Instance}
764
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
765

766
  """
767
  return dict(((node, vol), inst.name)
768
              for inst in instances
769
              for (node, vols) in inst.MapLVsByNode().items()
770
              for vol in vols)
771

    
772

    
773
def _RunPostHook(lu, node_name):
774
  """Runs the post-hook for an opcode on a single node.
775

776
  """
777
  hm = lu.proc.BuildHooksManager(lu)
778
  try:
779
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
780
  except:
781
    # pylint: disable=W0702
782
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
783

    
784

    
785
def _CheckOutputFields(static, dynamic, selected):
786
  """Checks whether all selected fields are valid.
787

788
  @type static: L{utils.FieldSet}
789
  @param static: static fields set
790
  @type dynamic: L{utils.FieldSet}
791
  @param dynamic: dynamic fields set
792

793
  """
794
  f = utils.FieldSet()
795
  f.Extend(static)
796
  f.Extend(dynamic)
797

    
798
  delta = f.NonMatching(selected)
799
  if delta:
800
    raise errors.OpPrereqError("Unknown output fields selected: %s"
801
                               % ",".join(delta), errors.ECODE_INVAL)
802

    
803

    
804
def _CheckGlobalHvParams(params):
805
  """Validates that given hypervisor params are not global ones.
806

807
  This will ensure that instances don't get customised versions of
808
  global params.
809

810
  """
811
  used_globals = constants.HVC_GLOBALS.intersection(params)
812
  if used_globals:
813
    msg = ("The following hypervisor parameters are global and cannot"
814
           " be customized at instance level, please modify them at"
815
           " cluster level: %s" % utils.CommaJoin(used_globals))
816
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
817

    
818

    
819
def _CheckNodeOnline(lu, node, msg=None):
820
  """Ensure that a given node is online.
821

822
  @param lu: the LU on behalf of which we make the check
823
  @param node: the node to check
824
  @param msg: if passed, should be a message to replace the default one
825
  @raise errors.OpPrereqError: if the node is offline
826

827
  """
828
  if msg is None:
829
    msg = "Can't use offline node"
830
  if lu.cfg.GetNodeInfo(node).offline:
831
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
832

    
833

    
834
def _CheckNodeNotDrained(lu, node):
835
  """Ensure that a given node is not drained.
836

837
  @param lu: the LU on behalf of which we make the check
838
  @param node: the node to check
839
  @raise errors.OpPrereqError: if the node is drained
840

841
  """
842
  if lu.cfg.GetNodeInfo(node).drained:
843
    raise errors.OpPrereqError("Can't use drained node %s" % node,
844
                               errors.ECODE_STATE)
845

    
846

    
847
def _CheckNodeVmCapable(lu, node):
848
  """Ensure that a given node is vm capable.
849

850
  @param lu: the LU on behalf of which we make the check
851
  @param node: the node to check
852
  @raise errors.OpPrereqError: if the node is not vm capable
853

854
  """
855
  if not lu.cfg.GetNodeInfo(node).vm_capable:
856
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
857
                               errors.ECODE_STATE)
858

    
859

    
860
def _CheckNodeHasOS(lu, node, os_name, force_variant):
861
  """Ensure that a node supports a given OS.
862

863
  @param lu: the LU on behalf of which we make the check
864
  @param node: the node to check
865
  @param os_name: the OS to query about
866
  @param force_variant: whether to ignore variant errors
867
  @raise errors.OpPrereqError: if the node is not supporting the OS
868

869
  """
870
  result = lu.rpc.call_os_get(node, os_name)
871
  result.Raise("OS '%s' not in supported OS list for node %s" %
872
               (os_name, node),
873
               prereq=True, ecode=errors.ECODE_INVAL)
874
  if not force_variant:
875
    _CheckOSVariant(result.payload, os_name)
876

    
877

    
878
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
879
  """Ensure that a node has the given secondary ip.
880

881
  @type lu: L{LogicalUnit}
882
  @param lu: the LU on behalf of which we make the check
883
  @type node: string
884
  @param node: the node to check
885
  @type secondary_ip: string
886
  @param secondary_ip: the ip to check
887
  @type prereq: boolean
888
  @param prereq: whether to throw a prerequisite or an execute error
889
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
890
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
891

892
  """
893
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
894
  result.Raise("Failure checking secondary ip on node %s" % node,
895
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
896
  if not result.payload:
897
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
898
           " please fix and re-run this command" % secondary_ip)
899
    if prereq:
900
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
901
    else:
902
      raise errors.OpExecError(msg)
903

    
904

    
905
def _GetClusterDomainSecret():
906
  """Reads the cluster domain secret.
907

908
  """
909
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
910
                               strict=True)
911

    
912

    
913
def _CheckInstanceState(lu, instance, req_states, msg=None):
914
  """Ensure that an instance is in one of the required states.
915

916
  @param lu: the LU on behalf of which we make the check
917
  @param instance: the instance to check
918
  @param msg: if passed, should be a message to replace the default one
919
  @raise errors.OpPrereqError: if the instance is not in the required state
920

921
  """
922
  if msg is None:
923
    msg = "can't use instance from outside %s states" % ", ".join(req_states)
924
  if instance.admin_state not in req_states:
925
    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
926
                               (instance, instance.admin_state, msg),
927
                               errors.ECODE_STATE)
928

    
929
  if constants.ADMINST_UP not in req_states:
930
    pnode = instance.primary_node
931
    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
932
    ins_l.Raise("Can't contact node %s for instance information" % pnode,
933
                prereq=True, ecode=errors.ECODE_ENVIRON)
934

    
935
    if instance.name in ins_l.payload:
936
      raise errors.OpPrereqError("Instance %s is running, %s" %
937
                                 (instance.name, msg), errors.ECODE_STATE)
938

    
939

    
940
def _ExpandItemName(fn, name, kind):
941
  """Expand an item name.
942

943
  @param fn: the function to use for expansion
944
  @param name: requested item name
945
  @param kind: text description ('Node' or 'Instance')
946
  @return: the resolved (full) name
947
  @raise errors.OpPrereqError: if the item is not found
948

949
  """
950
  full_name = fn(name)
951
  if full_name is None:
952
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
953
                               errors.ECODE_NOENT)
954
  return full_name
955

    
956

    
957
def _ExpandNodeName(cfg, name):
958
  """Wrapper over L{_ExpandItemName} for nodes."""
959
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
960

    
961

    
962
def _ExpandInstanceName(cfg, name):
963
  """Wrapper over L{_ExpandItemName} for instance."""
964
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
965

    
966

    
967
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
968
                          memory, vcpus, nics, disk_template, disks,
969
                          bep, hvp, hypervisor_name, tags):
970
  """Builds instance related env variables for hooks
971

972
  This builds the hook environment from individual variables.
973

974
  @type name: string
975
  @param name: the name of the instance
976
  @type primary_node: string
977
  @param primary_node: the name of the instance's primary node
978
  @type secondary_nodes: list
979
  @param secondary_nodes: list of secondary nodes as strings
980
  @type os_type: string
981
  @param os_type: the name of the instance's OS
982
  @type status: string
983
  @param status: the desired status of the instance
984
  @type memory: string
985
  @param memory: the memory size of the instance
986
  @type vcpus: string
987
  @param vcpus: the count of VCPUs the instance has
988
  @type nics: list
989
  @param nics: list of tuples (ip, mac, mode, link) representing
990
      the NICs the instance has
991
  @type disk_template: string
992
  @param disk_template: the disk template of the instance
993
  @type disks: list
994
  @param disks: the list of (size, mode) pairs
995
  @type bep: dict
996
  @param bep: the backend parameters for the instance
997
  @type hvp: dict
998
  @param hvp: the hypervisor parameters for the instance
999
  @type hypervisor_name: string
1000
  @param hypervisor_name: the hypervisor for the instance
1001
  @type tags: list
1002
  @param tags: list of instance tags as strings
1003
  @rtype: dict
1004
  @return: the hook environment for this instance
1005

1006
  """
1007
  env = {
1008
    "OP_TARGET": name,
1009
    "INSTANCE_NAME": name,
1010
    "INSTANCE_PRIMARY": primary_node,
1011
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1012
    "INSTANCE_OS_TYPE": os_type,
1013
    "INSTANCE_STATUS": status,
1014
    "INSTANCE_MEMORY": memory,
1015
    "INSTANCE_VCPUS": vcpus,
1016
    "INSTANCE_DISK_TEMPLATE": disk_template,
1017
    "INSTANCE_HYPERVISOR": hypervisor_name,
1018
  }
1019

    
1020
  if nics:
1021
    nic_count = len(nics)
1022
    for idx, (ip, mac, mode, link) in enumerate(nics):
1023
      if ip is None:
1024
        ip = ""
1025
      env["INSTANCE_NIC%d_IP" % idx] = ip
1026
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1027
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1028
      env["INSTANCE_NIC%d_LINK" % idx] = link
1029
      if mode == constants.NIC_MODE_BRIDGED:
1030
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1031
  else:
1032
    nic_count = 0
1033

    
1034
  env["INSTANCE_NIC_COUNT"] = nic_count
1035

    
1036
  if disks:
1037
    disk_count = len(disks)
1038
    for idx, (size, mode) in enumerate(disks):
1039
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1040
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1041
  else:
1042
    disk_count = 0
1043

    
1044
  env["INSTANCE_DISK_COUNT"] = disk_count
1045

    
1046
  if not tags:
1047
    tags = []
1048

    
1049
  env["INSTANCE_TAGS"] = " ".join(tags)
1050

    
1051
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1052
    for key, value in source.items():
1053
      env["INSTANCE_%s_%s" % (kind, key)] = value
1054

    
1055
  return env
1056

    
1057

    
1058
def _NICListToTuple(lu, nics):
1059
  """Build a list of nic information tuples.
1060

1061
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1062
  value in LUInstanceQueryData.
1063

1064
  @type lu:  L{LogicalUnit}
1065
  @param lu: the logical unit on whose behalf we execute
1066
  @type nics: list of L{objects.NIC}
1067
  @param nics: list of nics to convert to hooks tuples
1068

1069
  """
1070
  hooks_nics = []
1071
  cluster = lu.cfg.GetClusterInfo()
1072
  for nic in nics:
1073
    ip = nic.ip
1074
    mac = nic.mac
1075
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1076
    mode = filled_params[constants.NIC_MODE]
1077
    link = filled_params[constants.NIC_LINK]
1078
    hooks_nics.append((ip, mac, mode, link))
1079
  return hooks_nics
1080

    
1081

    
1082
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1083
  """Builds instance related env variables for hooks from an object.
1084

1085
  @type lu: L{LogicalUnit}
1086
  @param lu: the logical unit on whose behalf we execute
1087
  @type instance: L{objects.Instance}
1088
  @param instance: the instance for which we should build the
1089
      environment
1090
  @type override: dict
1091
  @param override: dictionary with key/values that will override
1092
      our values
1093
  @rtype: dict
1094
  @return: the hook environment dictionary
1095

1096
  """
1097
  cluster = lu.cfg.GetClusterInfo()
1098
  bep = cluster.FillBE(instance)
1099
  hvp = cluster.FillHV(instance)
1100
  args = {
1101
    "name": instance.name,
1102
    "primary_node": instance.primary_node,
1103
    "secondary_nodes": instance.secondary_nodes,
1104
    "os_type": instance.os,
1105
    "status": instance.admin_state,
1106
    "memory": bep[constants.BE_MEMORY],
1107
    "vcpus": bep[constants.BE_VCPUS],
1108
    "nics": _NICListToTuple(lu, instance.nics),
1109
    "disk_template": instance.disk_template,
1110
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1111
    "bep": bep,
1112
    "hvp": hvp,
1113
    "hypervisor_name": instance.hypervisor,
1114
    "tags": instance.tags,
1115
  }
1116
  if override:
1117
    args.update(override)
1118
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1119

    
1120

    
1121
def _AdjustCandidatePool(lu, exceptions):
1122
  """Adjust the candidate pool after node operations.
1123

1124
  """
1125
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1126
  if mod_list:
1127
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1128
               utils.CommaJoin(node.name for node in mod_list))
1129
    for name in mod_list:
1130
      lu.context.ReaddNode(name)
1131
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1132
  if mc_now > mc_max:
1133
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1134
               (mc_now, mc_max))
1135

    
1136

    
1137
def _DecideSelfPromotion(lu, exceptions=None):
1138
  """Decide whether I should promote myself as a master candidate.
1139

1140
  """
1141
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1142
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1143
  # the new node will increase mc_max with one, so:
1144
  mc_should = min(mc_should + 1, cp_size)
1145
  return mc_now < mc_should
1146

    
1147

    
1148
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1149
  """Check that the brigdes needed by a list of nics exist.
1150

1151
  """
1152
  cluster = lu.cfg.GetClusterInfo()
1153
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1154
  brlist = [params[constants.NIC_LINK] for params in paramslist
1155
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1156
  if brlist:
1157
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1158
    result.Raise("Error checking bridges on destination node '%s'" %
1159
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1160

    
1161

    
1162
def _CheckInstanceBridgesExist(lu, instance, node=None):
1163
  """Check that the brigdes needed by an instance exist.
1164

1165
  """
1166
  if node is None:
1167
    node = instance.primary_node
1168
  _CheckNicsBridgesExist(lu, instance.nics, node)
1169

    
1170

    
1171
def _CheckOSVariant(os_obj, name):
1172
  """Check whether an OS name conforms to the os variants specification.
1173

1174
  @type os_obj: L{objects.OS}
1175
  @param os_obj: OS object to check
1176
  @type name: string
1177
  @param name: OS name passed by the user, to check for validity
1178

1179
  """
1180
  variant = objects.OS.GetVariant(name)
1181
  if not os_obj.supported_variants:
1182
    if variant:
1183
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1184
                                 " passed)" % (os_obj.name, variant),
1185
                                 errors.ECODE_INVAL)
1186
    return
1187
  if not variant:
1188
    raise errors.OpPrereqError("OS name must include a variant",
1189
                               errors.ECODE_INVAL)
1190

    
1191
  if variant not in os_obj.supported_variants:
1192
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1193

    
1194

    
1195
def _GetNodeInstancesInner(cfg, fn):
1196
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1197

    
1198

    
1199
def _GetNodeInstances(cfg, node_name):
1200
  """Returns a list of all primary and secondary instances on a node.
1201

1202
  """
1203

    
1204
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1205

    
1206

    
1207
def _GetNodePrimaryInstances(cfg, node_name):
1208
  """Returns primary instances on a node.
1209

1210
  """
1211
  return _GetNodeInstancesInner(cfg,
1212
                                lambda inst: node_name == inst.primary_node)
1213

    
1214

    
1215
def _GetNodeSecondaryInstances(cfg, node_name):
1216
  """Returns secondary instances on a node.
1217

1218
  """
1219
  return _GetNodeInstancesInner(cfg,
1220
                                lambda inst: node_name in inst.secondary_nodes)
1221

    
1222

    
1223
def _GetStorageTypeArgs(cfg, storage_type):
1224
  """Returns the arguments for a storage type.
1225

1226
  """
1227
  # Special case for file storage
1228
  if storage_type == constants.ST_FILE:
1229
    # storage.FileStorage wants a list of storage directories
1230
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1231

    
1232
  return []
1233

    
1234

    
1235
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1236
  faulty = []
1237

    
1238
  for dev in instance.disks:
1239
    cfg.SetDiskID(dev, node_name)
1240

    
1241
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1242
  result.Raise("Failed to get disk status from node %s" % node_name,
1243
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1244

    
1245
  for idx, bdev_status in enumerate(result.payload):
1246
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1247
      faulty.append(idx)
1248

    
1249
  return faulty
1250

    
1251

    
1252
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1253
  """Check the sanity of iallocator and node arguments and use the
1254
  cluster-wide iallocator if appropriate.
1255

1256
  Check that at most one of (iallocator, node) is specified. If none is
1257
  specified, then the LU's opcode's iallocator slot is filled with the
1258
  cluster-wide default iallocator.
1259

1260
  @type iallocator_slot: string
1261
  @param iallocator_slot: the name of the opcode iallocator slot
1262
  @type node_slot: string
1263
  @param node_slot: the name of the opcode target node slot
1264

1265
  """
1266
  node = getattr(lu.op, node_slot, None)
1267
  iallocator = getattr(lu.op, iallocator_slot, None)
1268

    
1269
  if node is not None and iallocator is not None:
1270
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1271
                               errors.ECODE_INVAL)
1272
  elif node is None and iallocator is None:
1273
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1274
    if default_iallocator:
1275
      setattr(lu.op, iallocator_slot, default_iallocator)
1276
    else:
1277
      raise errors.OpPrereqError("No iallocator or node given and no"
1278
                                 " cluster-wide default iallocator found;"
1279
                                 " please specify either an iallocator or a"
1280
                                 " node, or set a cluster-wide default"
1281
                                 " iallocator")
1282

    
1283

    
1284
def _GetDefaultIAllocator(cfg, iallocator):
1285
  """Decides on which iallocator to use.
1286

1287
  @type cfg: L{config.ConfigWriter}
1288
  @param cfg: Cluster configuration object
1289
  @type iallocator: string or None
1290
  @param iallocator: Iallocator specified in opcode
1291
  @rtype: string
1292
  @return: Iallocator name
1293

1294
  """
1295
  if not iallocator:
1296
    # Use default iallocator
1297
    iallocator = cfg.GetDefaultIAllocator()
1298

    
1299
  if not iallocator:
1300
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1301
                               " opcode nor as a cluster-wide default",
1302
                               errors.ECODE_INVAL)
1303

    
1304
  return iallocator
1305

    
1306

    
1307
class LUClusterPostInit(LogicalUnit):
1308
  """Logical unit for running hooks after cluster initialization.
1309

1310
  """
1311
  HPATH = "cluster-init"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [self.cfg.GetMasterNode()])
1327

    
1328
  def Exec(self, feedback_fn):
1329
    """Nothing to do.
1330

1331
    """
1332
    return True
1333

    
1334

    
1335
class LUClusterDestroy(LogicalUnit):
1336
  """Logical unit for destroying the cluster.
1337

1338
  """
1339
  HPATH = "cluster-destroy"
1340
  HTYPE = constants.HTYPE_CLUSTER
1341

    
1342
  def BuildHooksEnv(self):
1343
    """Build hooks env.
1344

1345
    """
1346
    return {
1347
      "OP_TARGET": self.cfg.GetClusterName(),
1348
      }
1349

    
1350
  def BuildHooksNodes(self):
1351
    """Build hooks nodes.
1352

1353
    """
1354
    return ([], [])
1355

    
1356
  def CheckPrereq(self):
1357
    """Check prerequisites.
1358

1359
    This checks whether the cluster is empty.
1360

1361
    Any errors are signaled by raising errors.OpPrereqError.
1362

1363
    """
1364
    master = self.cfg.GetMasterNode()
1365

    
1366
    nodelist = self.cfg.GetNodeList()
1367
    if len(nodelist) != 1 or nodelist[0] != master:
1368
      raise errors.OpPrereqError("There are still %d node(s) in"
1369
                                 " this cluster." % (len(nodelist) - 1),
1370
                                 errors.ECODE_INVAL)
1371
    instancelist = self.cfg.GetInstanceList()
1372
    if instancelist:
1373
      raise errors.OpPrereqError("There are still %d instance(s) in"
1374
                                 " this cluster." % len(instancelist),
1375
                                 errors.ECODE_INVAL)
1376

    
1377
  def Exec(self, feedback_fn):
1378
    """Destroys the cluster.
1379

1380
    """
1381
    master_params = self.cfg.GetMasterNetworkParameters()
1382

    
1383
    # Run post hooks on master node before it's removed
1384
    _RunPostHook(self, master_params.name)
1385

    
1386
    ems = self.cfg.GetUseExternalMipScript()
1387
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1388
                                                     master_params, ems)
1389
    result.Raise("Could not disable the master role")
1390

    
1391
    return master_params.name
1392

    
1393

    
1394
def _VerifyCertificate(filename):
1395
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1396

1397
  @type filename: string
1398
  @param filename: Path to PEM file
1399

1400
  """
1401
  try:
1402
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1403
                                           utils.ReadFile(filename))
1404
  except Exception, err: # pylint: disable=W0703
1405
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1406
            "Failed to load X509 certificate %s: %s" % (filename, err))
1407

    
1408
  (errcode, msg) = \
1409
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1410
                                constants.SSL_CERT_EXPIRATION_ERROR)
1411

    
1412
  if msg:
1413
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1414
  else:
1415
    fnamemsg = None
1416

    
1417
  if errcode is None:
1418
    return (None, fnamemsg)
1419
  elif errcode == utils.CERT_WARNING:
1420
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1421
  elif errcode == utils.CERT_ERROR:
1422
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1423

    
1424
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1425

    
1426

    
1427
def _GetAllHypervisorParameters(cluster, instances):
1428
  """Compute the set of all hypervisor parameters.
1429

1430
  @type cluster: L{objects.Cluster}
1431
  @param cluster: the cluster object
1432
  @param instances: list of L{objects.Instance}
1433
  @param instances: additional instances from which to obtain parameters
1434
  @rtype: list of (origin, hypervisor, parameters)
1435
  @return: a list with all parameters found, indicating the hypervisor they
1436
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1437

1438
  """
1439
  hvp_data = []
1440

    
1441
  for hv_name in cluster.enabled_hypervisors:
1442
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1443

    
1444
  for os_name, os_hvp in cluster.os_hvp.items():
1445
    for hv_name, hv_params in os_hvp.items():
1446
      if hv_params:
1447
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1448
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1449

    
1450
  # TODO: collapse identical parameter values in a single one
1451
  for instance in instances:
1452
    if instance.hvparams:
1453
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1454
                       cluster.FillHV(instance)))
1455

    
1456
  return hvp_data
1457

    
1458

    
1459
class _VerifyErrors(object):
1460
  """Mix-in for cluster/group verify LUs.
1461

1462
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1463
  self.op and self._feedback_fn to be available.)
1464

1465
  """
1466

    
1467
  ETYPE_FIELD = "code"
1468
  ETYPE_ERROR = "ERROR"
1469
  ETYPE_WARNING = "WARNING"
1470

    
1471
  def _Error(self, ecode, item, msg, *args, **kwargs):
1472
    """Format an error message.
1473

1474
    Based on the opcode's error_codes parameter, either format a
1475
    parseable error code, or a simpler error string.
1476

1477
    This must be called only from Exec and functions called from Exec.
1478

1479
    """
1480
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1481
    itype, etxt, _ = ecode
1482
    # first complete the msg
1483
    if args:
1484
      msg = msg % args
1485
    # then format the whole message
1486
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1487
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1488
    else:
1489
      if item:
1490
        item = " " + item
1491
      else:
1492
        item = ""
1493
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1494
    # and finally report it via the feedback_fn
1495
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1496

    
1497
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1498
    """Log an error message if the passed condition is True.
1499

1500
    """
1501
    cond = (bool(cond)
1502
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1503

    
1504
    # If the error code is in the list of ignored errors, demote the error to a
1505
    # warning
1506
    (_, etxt, _) = ecode
1507
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1508
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1509

    
1510
    if cond:
1511
      self._Error(ecode, *args, **kwargs)
1512

    
1513
    # do not mark the operation as failed for WARN cases only
1514
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1515
      self.bad = self.bad or cond
1516

    
1517

    
1518
class LUClusterVerify(NoHooksLU):
1519
  """Submits all jobs necessary to verify the cluster.
1520

1521
  """
1522
  REQ_BGL = False
1523

    
1524
  def ExpandNames(self):
1525
    self.needed_locks = {}
1526

    
1527
  def Exec(self, feedback_fn):
1528
    jobs = []
1529

    
1530
    if self.op.group_name:
1531
      groups = [self.op.group_name]
1532
      depends_fn = lambda: None
1533
    else:
1534
      groups = self.cfg.GetNodeGroupList()
1535

    
1536
      # Verify global configuration
1537
      jobs.append([
1538
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1539
        ])
1540

    
1541
      # Always depend on global verification
1542
      depends_fn = lambda: [(-len(jobs), [])]
1543

    
1544
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1545
                                            ignore_errors=self.op.ignore_errors,
1546
                                            depends=depends_fn())]
1547
                for group in groups)
1548

    
1549
    # Fix up all parameters
1550
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1551
      op.debug_simulate_errors = self.op.debug_simulate_errors
1552
      op.verbose = self.op.verbose
1553
      op.error_codes = self.op.error_codes
1554
      try:
1555
        op.skip_checks = self.op.skip_checks
1556
      except AttributeError:
1557
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1558

    
1559
    return ResultWithJobs(jobs)
1560

    
1561

    
1562
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1563
  """Verifies the cluster config.
1564

1565
  """
1566
  REQ_BGL = True
1567

    
1568
  def _VerifyHVP(self, hvp_data):
1569
    """Verifies locally the syntax of the hypervisor parameters.
1570

1571
    """
1572
    for item, hv_name, hv_params in hvp_data:
1573
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1574
             (item, hv_name))
1575
      try:
1576
        hv_class = hypervisor.GetHypervisor(hv_name)
1577
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1578
        hv_class.CheckParameterSyntax(hv_params)
1579
      except errors.GenericError, err:
1580
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1581

    
1582
  def ExpandNames(self):
1583
    # Information can be safely retrieved as the BGL is acquired in exclusive
1584
    # mode
1585
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1586
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1587
    self.all_node_info = self.cfg.GetAllNodesInfo()
1588
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1589
    self.needed_locks = {}
1590

    
1591
  def Exec(self, feedback_fn):
1592
    """Verify integrity of cluster, performing various test on nodes.
1593

1594
    """
1595
    self.bad = False
1596
    self._feedback_fn = feedback_fn
1597

    
1598
    feedback_fn("* Verifying cluster config")
1599

    
1600
    for msg in self.cfg.VerifyConfig():
1601
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1602

    
1603
    feedback_fn("* Verifying cluster certificate files")
1604

    
1605
    for cert_filename in constants.ALL_CERT_FILES:
1606
      (errcode, msg) = _VerifyCertificate(cert_filename)
1607
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1608

    
1609
    feedback_fn("* Verifying hypervisor parameters")
1610

    
1611
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1612
                                                self.all_inst_info.values()))
1613

    
1614
    feedback_fn("* Verifying all nodes belong to an existing group")
1615

    
1616
    # We do this verification here because, should this bogus circumstance
1617
    # occur, it would never be caught by VerifyGroup, which only acts on
1618
    # nodes/instances reachable from existing node groups.
1619

    
1620
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1621
                         if node.group not in self.all_group_info)
1622

    
1623
    dangling_instances = {}
1624
    no_node_instances = []
1625

    
1626
    for inst in self.all_inst_info.values():
1627
      if inst.primary_node in dangling_nodes:
1628
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1629
      elif inst.primary_node not in self.all_node_info:
1630
        no_node_instances.append(inst.name)
1631

    
1632
    pretty_dangling = [
1633
        "%s (%s)" %
1634
        (node.name,
1635
         utils.CommaJoin(dangling_instances.get(node.name,
1636
                                                ["no instances"])))
1637
        for node in dangling_nodes]
1638

    
1639
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1640
                  None,
1641
                  "the following nodes (and their instances) belong to a non"
1642
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1643

    
1644
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1645
                  None,
1646
                  "the following instances have a non-existing primary-node:"
1647
                  " %s", utils.CommaJoin(no_node_instances))
1648

    
1649
    return not self.bad
1650

    
1651

    
1652
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1653
  """Verifies the status of a node group.
1654

1655
  """
1656
  HPATH = "cluster-verify"
1657
  HTYPE = constants.HTYPE_CLUSTER
1658
  REQ_BGL = False
1659

    
1660
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1661

    
1662
  class NodeImage(object):
1663
    """A class representing the logical and physical status of a node.
1664

1665
    @type name: string
1666
    @ivar name: the node name to which this object refers
1667
    @ivar volumes: a structure as returned from
1668
        L{ganeti.backend.GetVolumeList} (runtime)
1669
    @ivar instances: a list of running instances (runtime)
1670
    @ivar pinst: list of configured primary instances (config)
1671
    @ivar sinst: list of configured secondary instances (config)
1672
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1673
        instances for which this node is secondary (config)
1674
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1675
    @ivar dfree: free disk, as reported by the node (runtime)
1676
    @ivar offline: the offline status (config)
1677
    @type rpc_fail: boolean
1678
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1679
        not whether the individual keys were correct) (runtime)
1680
    @type lvm_fail: boolean
1681
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1682
    @type hyp_fail: boolean
1683
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1684
    @type ghost: boolean
1685
    @ivar ghost: whether this is a known node or not (config)
1686
    @type os_fail: boolean
1687
    @ivar os_fail: whether the RPC call didn't return valid OS data
1688
    @type oslist: list
1689
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1690
    @type vm_capable: boolean
1691
    @ivar vm_capable: whether the node can host instances
1692

1693
    """
1694
    def __init__(self, offline=False, name=None, vm_capable=True):
1695
      self.name = name
1696
      self.volumes = {}
1697
      self.instances = []
1698
      self.pinst = []
1699
      self.sinst = []
1700
      self.sbp = {}
1701
      self.mfree = 0
1702
      self.dfree = 0
1703
      self.offline = offline
1704
      self.vm_capable = vm_capable
1705
      self.rpc_fail = False
1706
      self.lvm_fail = False
1707
      self.hyp_fail = False
1708
      self.ghost = False
1709
      self.os_fail = False
1710
      self.oslist = {}
1711

    
1712
  def ExpandNames(self):
1713
    # This raises errors.OpPrereqError on its own:
1714
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1715

    
1716
    # Get instances in node group; this is unsafe and needs verification later
1717
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1718

    
1719
    self.needed_locks = {
1720
      locking.LEVEL_INSTANCE: inst_names,
1721
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1722
      locking.LEVEL_NODE: [],
1723
      }
1724

    
1725
    self.share_locks = _ShareAll()
1726

    
1727
  def DeclareLocks(self, level):
1728
    if level == locking.LEVEL_NODE:
1729
      # Get members of node group; this is unsafe and needs verification later
1730
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1731

    
1732
      all_inst_info = self.cfg.GetAllInstancesInfo()
1733

    
1734
      # In Exec(), we warn about mirrored instances that have primary and
1735
      # secondary living in separate node groups. To fully verify that
1736
      # volumes for these instances are healthy, we will need to do an
1737
      # extra call to their secondaries. We ensure here those nodes will
1738
      # be locked.
1739
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1740
        # Important: access only the instances whose lock is owned
1741
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1742
          nodes.update(all_inst_info[inst].secondary_nodes)
1743

    
1744
      self.needed_locks[locking.LEVEL_NODE] = nodes
1745

    
1746
  def CheckPrereq(self):
1747
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1748
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1749

    
1750
    group_nodes = set(self.group_info.members)
1751
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1752

    
1753
    unlocked_nodes = \
1754
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1755

    
1756
    unlocked_instances = \
1757
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1758

    
1759
    if unlocked_nodes:
1760
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1761
                                 utils.CommaJoin(unlocked_nodes))
1762

    
1763
    if unlocked_instances:
1764
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1765
                                 utils.CommaJoin(unlocked_instances))
1766

    
1767
    self.all_node_info = self.cfg.GetAllNodesInfo()
1768
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1769

    
1770
    self.my_node_names = utils.NiceSort(group_nodes)
1771
    self.my_inst_names = utils.NiceSort(group_instances)
1772

    
1773
    self.my_node_info = dict((name, self.all_node_info[name])
1774
                             for name in self.my_node_names)
1775

    
1776
    self.my_inst_info = dict((name, self.all_inst_info[name])
1777
                             for name in self.my_inst_names)
1778

    
1779
    # We detect here the nodes that will need the extra RPC calls for verifying
1780
    # split LV volumes; they should be locked.
1781
    extra_lv_nodes = set()
1782

    
1783
    for inst in self.my_inst_info.values():
1784
      if inst.disk_template in constants.DTS_INT_MIRROR:
1785
        group = self.my_node_info[inst.primary_node].group
1786
        for nname in inst.secondary_nodes:
1787
          if self.all_node_info[nname].group != group:
1788
            extra_lv_nodes.add(nname)
1789

    
1790
    unlocked_lv_nodes = \
1791
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1792

    
1793
    if unlocked_lv_nodes:
1794
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1795
                                 utils.CommaJoin(unlocked_lv_nodes))
1796
    self.extra_lv_nodes = list(extra_lv_nodes)
1797

    
1798
  def _VerifyNode(self, ninfo, nresult):
1799
    """Perform some basic validation on data returned from a node.
1800

1801
      - check the result data structure is well formed and has all the
1802
        mandatory fields
1803
      - check ganeti version
1804

1805
    @type ninfo: L{objects.Node}
1806
    @param ninfo: the node to check
1807
    @param nresult: the results from the node
1808
    @rtype: boolean
1809
    @return: whether overall this call was successful (and we can expect
1810
         reasonable values in the respose)
1811

1812
    """
1813
    node = ninfo.name
1814
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1815

    
1816
    # main result, nresult should be a non-empty dict
1817
    test = not nresult or not isinstance(nresult, dict)
1818
    _ErrorIf(test, constants.CV_ENODERPC, node,
1819
                  "unable to verify node: no data returned")
1820
    if test:
1821
      return False
1822

    
1823
    # compares ganeti version
1824
    local_version = constants.PROTOCOL_VERSION
1825
    remote_version = nresult.get("version", None)
1826
    test = not (remote_version and
1827
                isinstance(remote_version, (list, tuple)) and
1828
                len(remote_version) == 2)
1829
    _ErrorIf(test, constants.CV_ENODERPC, node,
1830
             "connection to node returned invalid data")
1831
    if test:
1832
      return False
1833

    
1834
    test = local_version != remote_version[0]
1835
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1836
             "incompatible protocol versions: master %s,"
1837
             " node %s", local_version, remote_version[0])
1838
    if test:
1839
      return False
1840

    
1841
    # node seems compatible, we can actually try to look into its results
1842

    
1843
    # full package version
1844
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1845
                  constants.CV_ENODEVERSION, node,
1846
                  "software version mismatch: master %s, node %s",
1847
                  constants.RELEASE_VERSION, remote_version[1],
1848
                  code=self.ETYPE_WARNING)
1849

    
1850
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1851
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1852
      for hv_name, hv_result in hyp_result.iteritems():
1853
        test = hv_result is not None
1854
        _ErrorIf(test, constants.CV_ENODEHV, node,
1855
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1856

    
1857
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1858
    if ninfo.vm_capable and isinstance(hvp_result, list):
1859
      for item, hv_name, hv_result in hvp_result:
1860
        _ErrorIf(True, constants.CV_ENODEHV, node,
1861
                 "hypervisor %s parameter verify failure (source %s): %s",
1862
                 hv_name, item, hv_result)
1863

    
1864
    test = nresult.get(constants.NV_NODESETUP,
1865
                       ["Missing NODESETUP results"])
1866
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1867
             "; ".join(test))
1868

    
1869
    return True
1870

    
1871
  def _VerifyNodeTime(self, ninfo, nresult,
1872
                      nvinfo_starttime, nvinfo_endtime):
1873
    """Check the node time.
1874

1875
    @type ninfo: L{objects.Node}
1876
    @param ninfo: the node to check
1877
    @param nresult: the remote results for the node
1878
    @param nvinfo_starttime: the start time of the RPC call
1879
    @param nvinfo_endtime: the end time of the RPC call
1880

1881
    """
1882
    node = ninfo.name
1883
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1884

    
1885
    ntime = nresult.get(constants.NV_TIME, None)
1886
    try:
1887
      ntime_merged = utils.MergeTime(ntime)
1888
    except (ValueError, TypeError):
1889
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1890
      return
1891

    
1892
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1893
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1894
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1895
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1896
    else:
1897
      ntime_diff = None
1898

    
1899
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1900
             "Node time diverges by at least %s from master node time",
1901
             ntime_diff)
1902

    
1903
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1904
    """Check the node LVM results.
1905

1906
    @type ninfo: L{objects.Node}
1907
    @param ninfo: the node to check
1908
    @param nresult: the remote results for the node
1909
    @param vg_name: the configured VG name
1910

1911
    """
1912
    if vg_name is None:
1913
      return
1914

    
1915
    node = ninfo.name
1916
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1917

    
1918
    # checks vg existence and size > 20G
1919
    vglist = nresult.get(constants.NV_VGLIST, None)
1920
    test = not vglist
1921
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1922
    if not test:
1923
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1924
                                            constants.MIN_VG_SIZE)
1925
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1926

    
1927
    # check pv names
1928
    pvlist = nresult.get(constants.NV_PVLIST, None)
1929
    test = pvlist is None
1930
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1931
    if not test:
1932
      # check that ':' is not present in PV names, since it's a
1933
      # special character for lvcreate (denotes the range of PEs to
1934
      # use on the PV)
1935
      for _, pvname, owner_vg in pvlist:
1936
        test = ":" in pvname
1937
        _ErrorIf(test, constants.CV_ENODELVM, node,
1938
                 "Invalid character ':' in PV '%s' of VG '%s'",
1939
                 pvname, owner_vg)
1940

    
1941
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1942
    """Check the node bridges.
1943

1944
    @type ninfo: L{objects.Node}
1945
    @param ninfo: the node to check
1946
    @param nresult: the remote results for the node
1947
    @param bridges: the expected list of bridges
1948

1949
    """
1950
    if not bridges:
1951
      return
1952

    
1953
    node = ninfo.name
1954
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1955

    
1956
    missing = nresult.get(constants.NV_BRIDGES, None)
1957
    test = not isinstance(missing, list)
1958
    _ErrorIf(test, constants.CV_ENODENET, node,
1959
             "did not return valid bridge information")
1960
    if not test:
1961
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1962
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1963

    
1964
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1965
    """Check the results of user scripts presence and executability on the node
1966

1967
    @type ninfo: L{objects.Node}
1968
    @param ninfo: the node to check
1969
    @param nresult: the remote results for the node
1970

1971
    """
1972
    node = ninfo.name
1973

    
1974
    test = not constants.NV_USERSCRIPTS in nresult
1975
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1976
                  "did not return user scripts information")
1977

    
1978
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1979
    if not test:
1980
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1981
                    "user scripts not present or not executable: %s" %
1982
                    utils.CommaJoin(sorted(broken_scripts)))
1983

    
1984
  def _VerifyNodeNetwork(self, ninfo, nresult):
1985
    """Check the node network connectivity results.
1986

1987
    @type ninfo: L{objects.Node}
1988
    @param ninfo: the node to check
1989
    @param nresult: the remote results for the node
1990

1991
    """
1992
    node = ninfo.name
1993
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1994

    
1995
    test = constants.NV_NODELIST not in nresult
1996
    _ErrorIf(test, constants.CV_ENODESSH, node,
1997
             "node hasn't returned node ssh connectivity data")
1998
    if not test:
1999
      if nresult[constants.NV_NODELIST]:
2000
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2001
          _ErrorIf(True, constants.CV_ENODESSH, node,
2002
                   "ssh communication with node '%s': %s", a_node, a_msg)
2003

    
2004
    test = constants.NV_NODENETTEST not in nresult
2005
    _ErrorIf(test, constants.CV_ENODENET, node,
2006
             "node hasn't returned node tcp connectivity data")
2007
    if not test:
2008
      if nresult[constants.NV_NODENETTEST]:
2009
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2010
        for anode in nlist:
2011
          _ErrorIf(True, constants.CV_ENODENET, node,
2012
                   "tcp communication with node '%s': %s",
2013
                   anode, nresult[constants.NV_NODENETTEST][anode])
2014

    
2015
    test = constants.NV_MASTERIP not in nresult
2016
    _ErrorIf(test, constants.CV_ENODENET, node,
2017
             "node hasn't returned node master IP reachability data")
2018
    if not test:
2019
      if not nresult[constants.NV_MASTERIP]:
2020
        if node == self.master_node:
2021
          msg = "the master node cannot reach the master IP (not configured?)"
2022
        else:
2023
          msg = "cannot reach the master IP"
2024
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2025

    
2026
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2027
                      diskstatus):
2028
    """Verify an instance.
2029

2030
    This function checks to see if the required block devices are
2031
    available on the instance's node.
2032

2033
    """
2034
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2035
    node_current = instanceconfig.primary_node
2036

    
2037
    node_vol_should = {}
2038
    instanceconfig.MapLVsByNode(node_vol_should)
2039

    
2040
    for node in node_vol_should:
2041
      n_img = node_image[node]
2042
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2043
        # ignore missing volumes on offline or broken nodes
2044
        continue
2045
      for volume in node_vol_should[node]:
2046
        test = volume not in n_img.volumes
2047
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2048
                 "volume %s missing on node %s", volume, node)
2049

    
2050
    if instanceconfig.admin_state == constants.ADMINST_UP:
2051
      pri_img = node_image[node_current]
2052
      test = instance not in pri_img.instances and not pri_img.offline
2053
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2054
               "instance not running on its primary node %s",
2055
               node_current)
2056

    
2057
    diskdata = [(nname, success, status, idx)
2058
                for (nname, disks) in diskstatus.items()
2059
                for idx, (success, status) in enumerate(disks)]
2060

    
2061
    for nname, success, bdev_status, idx in diskdata:
2062
      # the 'ghost node' construction in Exec() ensures that we have a
2063
      # node here
2064
      snode = node_image[nname]
2065
      bad_snode = snode.ghost or snode.offline
2066
      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2067
               not success and not bad_snode,
2068
               constants.CV_EINSTANCEFAULTYDISK, instance,
2069
               "couldn't retrieve status for disk/%s on %s: %s",
2070
               idx, nname, bdev_status)
2071
      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2072
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2073
               constants.CV_EINSTANCEFAULTYDISK, instance,
2074
               "disk/%s on %s is faulty", idx, nname)
2075

    
2076
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2077
    """Verify if there are any unknown volumes in the cluster.
2078

2079
    The .os, .swap and backup volumes are ignored. All other volumes are
2080
    reported as unknown.
2081

2082
    @type reserved: L{ganeti.utils.FieldSet}
2083
    @param reserved: a FieldSet of reserved volume names
2084

2085
    """
2086
    for node, n_img in node_image.items():
2087
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2088
        # skip non-healthy nodes
2089
        continue
2090
      for volume in n_img.volumes:
2091
        test = ((node not in node_vol_should or
2092
                volume not in node_vol_should[node]) and
2093
                not reserved.Matches(volume))
2094
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2095
                      "volume %s is unknown", volume)
2096

    
2097
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2098
    """Verify N+1 Memory Resilience.
2099

2100
    Check that if one single node dies we can still start all the
2101
    instances it was primary for.
2102

2103
    """
2104
    cluster_info = self.cfg.GetClusterInfo()
2105
    for node, n_img in node_image.items():
2106
      # This code checks that every node which is now listed as
2107
      # secondary has enough memory to host all instances it is
2108
      # supposed to should a single other node in the cluster fail.
2109
      # FIXME: not ready for failover to an arbitrary node
2110
      # FIXME: does not support file-backed instances
2111
      # WARNING: we currently take into account down instances as well
2112
      # as up ones, considering that even if they're down someone
2113
      # might want to start them even in the event of a node failure.
2114
      if n_img.offline:
2115
        # we're skipping offline nodes from the N+1 warning, since
2116
        # most likely we don't have good memory infromation from them;
2117
        # we already list instances living on such nodes, and that's
2118
        # enough warning
2119
        continue
2120
      for prinode, instances in n_img.sbp.items():
2121
        needed_mem = 0
2122
        for instance in instances:
2123
          bep = cluster_info.FillBE(instance_cfg[instance])
2124
          if bep[constants.BE_AUTO_BALANCE]:
2125
            needed_mem += bep[constants.BE_MEMORY]
2126
        test = n_img.mfree < needed_mem
2127
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2128
                      "not enough memory to accomodate instance failovers"
2129
                      " should node %s fail (%dMiB needed, %dMiB available)",
2130
                      prinode, needed_mem, n_img.mfree)
2131

    
2132
  @classmethod
2133
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2134
                   (files_all, files_opt, files_mc, files_vm)):
2135
    """Verifies file checksums collected from all nodes.
2136

2137
    @param errorif: Callback for reporting errors
2138
    @param nodeinfo: List of L{objects.Node} objects
2139
    @param master_node: Name of master node
2140
    @param all_nvinfo: RPC results
2141

2142
    """
2143
    # Define functions determining which nodes to consider for a file
2144
    files2nodefn = [
2145
      (files_all, None),
2146
      (files_mc, lambda node: (node.master_candidate or
2147
                               node.name == master_node)),
2148
      (files_vm, lambda node: node.vm_capable),
2149
      ]
2150

    
2151
    # Build mapping from filename to list of nodes which should have the file
2152
    nodefiles = {}
2153
    for (files, fn) in files2nodefn:
2154
      if fn is None:
2155
        filenodes = nodeinfo
2156
      else:
2157
        filenodes = filter(fn, nodeinfo)
2158
      nodefiles.update((filename,
2159
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2160
                       for filename in files)
2161

    
2162
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2163

    
2164
    fileinfo = dict((filename, {}) for filename in nodefiles)
2165
    ignore_nodes = set()
2166

    
2167
    for node in nodeinfo:
2168
      if node.offline:
2169
        ignore_nodes.add(node.name)
2170
        continue
2171

    
2172
      nresult = all_nvinfo[node.name]
2173

    
2174
      if nresult.fail_msg or not nresult.payload:
2175
        node_files = None
2176
      else:
2177
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2178

    
2179
      test = not (node_files and isinstance(node_files, dict))
2180
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2181
              "Node did not return file checksum data")
2182
      if test:
2183
        ignore_nodes.add(node.name)
2184
        continue
2185

    
2186
      # Build per-checksum mapping from filename to nodes having it
2187
      for (filename, checksum) in node_files.items():
2188
        assert filename in nodefiles
2189
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2190

    
2191
    for (filename, checksums) in fileinfo.items():
2192
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2193

    
2194
      # Nodes having the file
2195
      with_file = frozenset(node_name
2196
                            for nodes in fileinfo[filename].values()
2197
                            for node_name in nodes) - ignore_nodes
2198

    
2199
      expected_nodes = nodefiles[filename] - ignore_nodes
2200

    
2201
      # Nodes missing file
2202
      missing_file = expected_nodes - with_file
2203

    
2204
      if filename in files_opt:
2205
        # All or no nodes
2206
        errorif(missing_file and missing_file != expected_nodes,
2207
                constants.CV_ECLUSTERFILECHECK, None,
2208
                "File %s is optional, but it must exist on all or no"
2209
                " nodes (not found on %s)",
2210
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2211
      else:
2212
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2213
                "File %s is missing from node(s) %s", filename,
2214
                utils.CommaJoin(utils.NiceSort(missing_file)))
2215

    
2216
        # Warn if a node has a file it shouldn't
2217
        unexpected = with_file - expected_nodes
2218
        errorif(unexpected,
2219
                constants.CV_ECLUSTERFILECHECK, None,
2220
                "File %s should not exist on node(s) %s",
2221
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2222

    
2223
      # See if there are multiple versions of the file
2224
      test = len(checksums) > 1
2225
      if test:
2226
        variants = ["variant %s on %s" %
2227
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2228
                    for (idx, (checksum, nodes)) in
2229
                      enumerate(sorted(checksums.items()))]
2230
      else:
2231
        variants = []
2232

    
2233
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2234
              "File %s found with %s different checksums (%s)",
2235
              filename, len(checksums), "; ".join(variants))
2236

    
2237
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2238
                      drbd_map):
2239
    """Verifies and the node DRBD status.
2240

2241
    @type ninfo: L{objects.Node}
2242
    @param ninfo: the node to check
2243
    @param nresult: the remote results for the node
2244
    @param instanceinfo: the dict of instances
2245
    @param drbd_helper: the configured DRBD usermode helper
2246
    @param drbd_map: the DRBD map as returned by
2247
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2248

2249
    """
2250
    node = ninfo.name
2251
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2252

    
2253
    if drbd_helper:
2254
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2255
      test = (helper_result == None)
2256
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2257
               "no drbd usermode helper returned")
2258
      if helper_result:
2259
        status, payload = helper_result
2260
        test = not status
2261
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2262
                 "drbd usermode helper check unsuccessful: %s", payload)
2263
        test = status and (payload != drbd_helper)
2264
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2265
                 "wrong drbd usermode helper: %s", payload)
2266

    
2267
    # compute the DRBD minors
2268
    node_drbd = {}
2269
    for minor, instance in drbd_map[node].items():
2270
      test = instance not in instanceinfo
2271
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2272
               "ghost instance '%s' in temporary DRBD map", instance)
2273
        # ghost instance should not be running, but otherwise we
2274
        # don't give double warnings (both ghost instance and
2275
        # unallocated minor in use)
2276
      if test:
2277
        node_drbd[minor] = (instance, False)
2278
      else:
2279
        instance = instanceinfo[instance]
2280
        node_drbd[minor] = (instance.name,
2281
                            instance.admin_state == constants.ADMINST_UP)
2282

    
2283
    # and now check them
2284
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2285
    test = not isinstance(used_minors, (tuple, list))
2286
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2287
             "cannot parse drbd status file: %s", str(used_minors))
2288
    if test:
2289
      # we cannot check drbd status
2290
      return
2291

    
2292
    for minor, (iname, must_exist) in node_drbd.items():
2293
      test = minor not in used_minors and must_exist
2294
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2295
               "drbd minor %d of instance %s is not active", minor, iname)
2296
    for minor in used_minors:
2297
      test = minor not in node_drbd
2298
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2299
               "unallocated drbd minor %d is in use", minor)
2300

    
2301
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2302
    """Builds the node OS structures.
2303

2304
    @type ninfo: L{objects.Node}
2305
    @param ninfo: the node to check
2306
    @param nresult: the remote results for the node
2307
    @param nimg: the node image object
2308

2309
    """
2310
    node = ninfo.name
2311
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2312

    
2313
    remote_os = nresult.get(constants.NV_OSLIST, None)
2314
    test = (not isinstance(remote_os, list) or
2315
            not compat.all(isinstance(v, list) and len(v) == 7
2316
                           for v in remote_os))
2317

    
2318
    _ErrorIf(test, constants.CV_ENODEOS, node,
2319
             "node hasn't returned valid OS data")
2320

    
2321
    nimg.os_fail = test
2322

    
2323
    if test:
2324
      return
2325

    
2326
    os_dict = {}
2327

    
2328
    for (name, os_path, status, diagnose,
2329
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2330

    
2331
      if name not in os_dict:
2332
        os_dict[name] = []
2333

    
2334
      # parameters is a list of lists instead of list of tuples due to
2335
      # JSON lacking a real tuple type, fix it:
2336
      parameters = [tuple(v) for v in parameters]
2337
      os_dict[name].append((os_path, status, diagnose,
2338
                            set(variants), set(parameters), set(api_ver)))
2339

    
2340
    nimg.oslist = os_dict
2341

    
2342
  def _VerifyNodeOS(self, ninfo, nimg, base):
2343
    """Verifies the node OS list.
2344

2345
    @type ninfo: L{objects.Node}
2346
    @param ninfo: the node to check
2347
    @param nimg: the node image object
2348
    @param base: the 'template' node we match against (e.g. from the master)
2349

2350
    """
2351
    node = ninfo.name
2352
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2353

    
2354
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2355

    
2356
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2357
    for os_name, os_data in nimg.oslist.items():
2358
      assert os_data, "Empty OS status for OS %s?!" % os_name
2359
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2360
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2361
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2362
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2363
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2364
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2365
      # comparisons with the 'base' image
2366
      test = os_name not in base.oslist
2367
      _ErrorIf(test, constants.CV_ENODEOS, node,
2368
               "Extra OS %s not present on reference node (%s)",
2369
               os_name, base.name)
2370
      if test:
2371
        continue
2372
      assert base.oslist[os_name], "Base node has empty OS status?"
2373
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2374
      if not b_status:
2375
        # base OS is invalid, skipping
2376
        continue
2377
      for kind, a, b in [("API version", f_api, b_api),
2378
                         ("variants list", f_var, b_var),
2379
                         ("parameters", beautify_params(f_param),
2380
                          beautify_params(b_param))]:
2381
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2382
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2383
                 kind, os_name, base.name,
2384
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2385

    
2386
    # check any missing OSes
2387
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2388
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2389
             "OSes present on reference node %s but missing on this node: %s",
2390
             base.name, utils.CommaJoin(missing))
2391

    
2392
  def _VerifyOob(self, ninfo, nresult):
2393
    """Verifies out of band functionality of a node.
2394

2395
    @type ninfo: L{objects.Node}
2396
    @param ninfo: the node to check
2397
    @param nresult: the remote results for the node
2398

2399
    """
2400
    node = ninfo.name
2401
    # We just have to verify the paths on master and/or master candidates
2402
    # as the oob helper is invoked on the master
2403
    if ((ninfo.master_candidate or ninfo.master_capable) and
2404
        constants.NV_OOB_PATHS in nresult):
2405
      for path_result in nresult[constants.NV_OOB_PATHS]:
2406
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2407

    
2408
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2409
    """Verifies and updates the node volume data.
2410

2411
    This function will update a L{NodeImage}'s internal structures
2412
    with data from the remote call.
2413

2414
    @type ninfo: L{objects.Node}
2415
    @param ninfo: the node to check
2416
    @param nresult: the remote results for the node
2417
    @param nimg: the node image object
2418
    @param vg_name: the configured VG name
2419

2420
    """
2421
    node = ninfo.name
2422
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2423

    
2424
    nimg.lvm_fail = True
2425
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2426
    if vg_name is None:
2427
      pass
2428
    elif isinstance(lvdata, basestring):
2429
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2430
               utils.SafeEncode(lvdata))
2431
    elif not isinstance(lvdata, dict):
2432
      _ErrorIf(True, constants.CV_ENODELVM, node,
2433
               "rpc call to node failed (lvlist)")
2434
    else:
2435
      nimg.volumes = lvdata
2436
      nimg.lvm_fail = False
2437

    
2438
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2439
    """Verifies and updates the node instance list.
2440

2441
    If the listing was successful, then updates this node's instance
2442
    list. Otherwise, it marks the RPC call as failed for the instance
2443
    list key.
2444

2445
    @type ninfo: L{objects.Node}
2446
    @param ninfo: the node to check
2447
    @param nresult: the remote results for the node
2448
    @param nimg: the node image object
2449

2450
    """
2451
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2452
    test = not isinstance(idata, list)
2453
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2454
                  "rpc call to node failed (instancelist): %s",
2455
                  utils.SafeEncode(str(idata)))
2456
    if test:
2457
      nimg.hyp_fail = True
2458
    else:
2459
      nimg.instances = idata
2460

    
2461
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2462
    """Verifies and computes a node information map
2463

2464
    @type ninfo: L{objects.Node}
2465
    @param ninfo: the node to check
2466
    @param nresult: the remote results for the node
2467
    @param nimg: the node image object
2468
    @param vg_name: the configured VG name
2469

2470
    """
2471
    node = ninfo.name
2472
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2473

    
2474
    # try to read free memory (from the hypervisor)
2475
    hv_info = nresult.get(constants.NV_HVINFO, None)
2476
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2477
    _ErrorIf(test, constants.CV_ENODEHV, node,
2478
             "rpc call to node failed (hvinfo)")
2479
    if not test:
2480
      try:
2481
        nimg.mfree = int(hv_info["memory_free"])
2482
      except (ValueError, TypeError):
2483
        _ErrorIf(True, constants.CV_ENODERPC, node,
2484
                 "node returned invalid nodeinfo, check hypervisor")
2485

    
2486
    # FIXME: devise a free space model for file based instances as well
2487
    if vg_name is not None:
2488
      test = (constants.NV_VGLIST not in nresult or
2489
              vg_name not in nresult[constants.NV_VGLIST])
2490
      _ErrorIf(test, constants.CV_ENODELVM, node,
2491
               "node didn't return data for the volume group '%s'"
2492
               " - it is either missing or broken", vg_name)
2493
      if not test:
2494
        try:
2495
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2496
        except (ValueError, TypeError):
2497
          _ErrorIf(True, constants.CV_ENODERPC, node,
2498
                   "node returned invalid LVM info, check LVM status")
2499

    
2500
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2501
    """Gets per-disk status information for all instances.
2502

2503
    @type nodelist: list of strings
2504
    @param nodelist: Node names
2505
    @type node_image: dict of (name, L{objects.Node})
2506
    @param node_image: Node objects
2507
    @type instanceinfo: dict of (name, L{objects.Instance})
2508
    @param instanceinfo: Instance objects
2509
    @rtype: {instance: {node: [(succes, payload)]}}
2510
    @return: a dictionary of per-instance dictionaries with nodes as
2511
        keys and disk information as values; the disk information is a
2512
        list of tuples (success, payload)
2513

2514
    """
2515
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2516

    
2517
    node_disks = {}
2518
    node_disks_devonly = {}
2519
    diskless_instances = set()
2520
    diskless = constants.DT_DISKLESS
2521

    
2522
    for nname in nodelist:
2523
      node_instances = list(itertools.chain(node_image[nname].pinst,
2524
                                            node_image[nname].sinst))
2525
      diskless_instances.update(inst for inst in node_instances
2526
                                if instanceinfo[inst].disk_template == diskless)
2527
      disks = [(inst, disk)
2528
               for inst in node_instances
2529
               for disk in instanceinfo[inst].disks]
2530

    
2531
      if not disks:
2532
        # No need to collect data
2533
        continue
2534

    
2535
      node_disks[nname] = disks
2536

    
2537
      # Creating copies as SetDiskID below will modify the objects and that can
2538
      # lead to incorrect data returned from nodes
2539
      devonly = [dev.Copy() for (_, dev) in disks]
2540

    
2541
      for dev in devonly:
2542
        self.cfg.SetDiskID(dev, nname)
2543

    
2544
      node_disks_devonly[nname] = devonly
2545

    
2546
    assert len(node_disks) == len(node_disks_devonly)
2547

    
2548
    # Collect data from all nodes with disks
2549
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2550
                                                          node_disks_devonly)
2551

    
2552
    assert len(result) == len(node_disks)
2553

    
2554
    instdisk = {}
2555

    
2556
    for (nname, nres) in result.items():
2557
      disks = node_disks[nname]
2558

    
2559
      if nres.offline:
2560
        # No data from this node
2561
        data = len(disks) * [(False, "node offline")]
2562
      else:
2563
        msg = nres.fail_msg
2564
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2565
                 "while getting disk information: %s", msg)
2566
        if msg:
2567
          # No data from this node
2568
          data = len(disks) * [(False, msg)]
2569
        else:
2570
          data = []
2571
          for idx, i in enumerate(nres.payload):
2572
            if isinstance(i, (tuple, list)) and len(i) == 2:
2573
              data.append(i)
2574
            else:
2575
              logging.warning("Invalid result from node %s, entry %d: %s",
2576
                              nname, idx, i)
2577
              data.append((False, "Invalid result from the remote node"))
2578

    
2579
      for ((inst, _), status) in zip(disks, data):
2580
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2581

    
2582
    # Add empty entries for diskless instances.
2583
    for inst in diskless_instances:
2584
      assert inst not in instdisk
2585
      instdisk[inst] = {}
2586

    
2587
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2588
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2589
                      compat.all(isinstance(s, (tuple, list)) and
2590
                                 len(s) == 2 for s in statuses)
2591
                      for inst, nnames in instdisk.items()
2592
                      for nname, statuses in nnames.items())
2593
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2594

    
2595
    return instdisk
2596

    
2597
  @staticmethod
2598
  def _SshNodeSelector(group_uuid, all_nodes):
2599
    """Create endless iterators for all potential SSH check hosts.
2600

2601
    """
2602
    nodes = [node for node in all_nodes
2603
             if (node.group != group_uuid and
2604
                 not node.offline)]
2605
    keyfunc = operator.attrgetter("group")
2606

    
2607
    return map(itertools.cycle,
2608
               [sorted(map(operator.attrgetter("name"), names))
2609
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2610
                                                  keyfunc)])
2611

    
2612
  @classmethod
2613
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2614
    """Choose which nodes should talk to which other nodes.
2615

2616
    We will make nodes contact all nodes in their group, and one node from
2617
    every other group.
2618

2619
    @warning: This algorithm has a known issue if one node group is much
2620
      smaller than others (e.g. just one node). In such a case all other
2621
      nodes will talk to the single node.
2622

2623
    """
2624
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2625
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2626

    
2627
    return (online_nodes,
2628
            dict((name, sorted([i.next() for i in sel]))
2629
                 for name in online_nodes))
2630

    
2631
  def BuildHooksEnv(self):
2632
    """Build hooks env.
2633

2634
    Cluster-Verify hooks just ran in the post phase and their failure makes
2635
    the output be logged in the verify output and the verification to fail.
2636

2637
    """
2638
    env = {
2639
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2640
      }
2641

    
2642
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2643
               for node in self.my_node_info.values())
2644

    
2645
    return env
2646

    
2647
  def BuildHooksNodes(self):
2648
    """Build hooks nodes.
2649

2650
    """
2651
    return ([], self.my_node_names)
2652

    
2653
  def Exec(self, feedback_fn):
2654
    """Verify integrity of the node group, performing various test on nodes.
2655

2656
    """
2657
    # This method has too many local variables. pylint: disable=R0914
2658
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2659

    
2660
    if not self.my_node_names:
2661
      # empty node group
2662
      feedback_fn("* Empty node group, skipping verification")
2663
      return True
2664

    
2665
    self.bad = False
2666
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2667
    verbose = self.op.verbose
2668
    self._feedback_fn = feedback_fn
2669

    
2670
    vg_name = self.cfg.GetVGName()
2671
    drbd_helper = self.cfg.GetDRBDHelper()
2672
    cluster = self.cfg.GetClusterInfo()
2673
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2674
    hypervisors = cluster.enabled_hypervisors
2675
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2676

    
2677
    i_non_redundant = [] # Non redundant instances
2678
    i_non_a_balanced = [] # Non auto-balanced instances
2679
    i_offline = 0 # Count of offline instances
2680
    n_offline = 0 # Count of offline nodes
2681
    n_drained = 0 # Count of nodes being drained
2682
    node_vol_should = {}
2683

    
2684
    # FIXME: verify OS list
2685

    
2686
    # File verification
2687
    filemap = _ComputeAncillaryFiles(cluster, False)
2688

    
2689
    # do local checksums
2690
    master_node = self.master_node = self.cfg.GetMasterNode()
2691
    master_ip = self.cfg.GetMasterIP()
2692

    
2693
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2694

    
2695
    user_scripts = []
2696
    if self.cfg.GetUseExternalMipScript():
2697
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2698

    
2699
    node_verify_param = {
2700
      constants.NV_FILELIST:
2701
        utils.UniqueSequence(filename
2702
                             for files in filemap
2703
                             for filename in files),
2704
      constants.NV_NODELIST:
2705
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2706
                                  self.all_node_info.values()),
2707
      constants.NV_HYPERVISOR: hypervisors,
2708
      constants.NV_HVPARAMS:
2709
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2710
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2711
                                 for node in node_data_list
2712
                                 if not node.offline],
2713
      constants.NV_INSTANCELIST: hypervisors,
2714
      constants.NV_VERSION: None,
2715
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2716
      constants.NV_NODESETUP: None,
2717
      constants.NV_TIME: None,
2718
      constants.NV_MASTERIP: (master_node, master_ip),
2719
      constants.NV_OSLIST: None,
2720
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2721
      constants.NV_USERSCRIPTS: user_scripts,
2722
      }
2723

    
2724
    if vg_name is not None:
2725
      node_verify_param[constants.NV_VGLIST] = None
2726
      node_verify_param[constants.NV_LVLIST] = vg_name
2727
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2728
      node_verify_param[constants.NV_DRBDLIST] = None
2729

    
2730
    if drbd_helper:
2731
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2732

    
2733
    # bridge checks
2734
    # FIXME: this needs to be changed per node-group, not cluster-wide
2735
    bridges = set()
2736
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2737
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2738
      bridges.add(default_nicpp[constants.NIC_LINK])
2739
    for instance in self.my_inst_info.values():
2740
      for nic in instance.nics:
2741
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2742
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2743
          bridges.add(full_nic[constants.NIC_LINK])
2744

    
2745
    if bridges:
2746
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2747

    
2748
    # Build our expected cluster state
2749
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2750
                                                 name=node.name,
2751
                                                 vm_capable=node.vm_capable))
2752
                      for node in node_data_list)
2753

    
2754
    # Gather OOB paths
2755
    oob_paths = []
2756
    for node in self.all_node_info.values():
2757
      path = _SupportsOob(self.cfg, node)
2758
      if path and path not in oob_paths:
2759
        oob_paths.append(path)
2760

    
2761
    if oob_paths:
2762
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2763

    
2764
    for instance in self.my_inst_names:
2765
      inst_config = self.my_inst_info[instance]
2766

    
2767
      for nname in inst_config.all_nodes:
2768
        if nname not in node_image:
2769
          gnode = self.NodeImage(name=nname)
2770
          gnode.ghost = (nname not in self.all_node_info)
2771
          node_image[nname] = gnode
2772

    
2773
      inst_config.MapLVsByNode(node_vol_should)
2774

    
2775
      pnode = inst_config.primary_node
2776
      node_image[pnode].pinst.append(instance)
2777

    
2778
      for snode in inst_config.secondary_nodes:
2779
        nimg = node_image[snode]
2780
        nimg.sinst.append(instance)
2781
        if pnode not in nimg.sbp:
2782
          nimg.sbp[pnode] = []
2783
        nimg.sbp[pnode].append(instance)
2784

    
2785
    # At this point, we have the in-memory data structures complete,
2786
    # except for the runtime information, which we'll gather next
2787

    
2788
    # Due to the way our RPC system works, exact response times cannot be
2789
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2790
    # time before and after executing the request, we can at least have a time
2791
    # window.
2792
    nvinfo_starttime = time.time()
2793
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2794
                                           node_verify_param,
2795
                                           self.cfg.GetClusterName())
2796
    nvinfo_endtime = time.time()
2797

    
2798
    if self.extra_lv_nodes and vg_name is not None:
2799
      extra_lv_nvinfo = \
2800
          self.rpc.call_node_verify(self.extra_lv_nodes,
2801
                                    {constants.NV_LVLIST: vg_name},
2802
                                    self.cfg.GetClusterName())
2803
    else:
2804
      extra_lv_nvinfo = {}
2805

    
2806
    all_drbd_map = self.cfg.ComputeDRBDMap()
2807

    
2808
    feedback_fn("* Gathering disk information (%s nodes)" %
2809
                len(self.my_node_names))
2810
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2811
                                     self.my_inst_info)
2812

    
2813
    feedback_fn("* Verifying configuration file consistency")
2814

    
2815
    # If not all nodes are being checked, we need to make sure the master node
2816
    # and a non-checked vm_capable node are in the list.
2817
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2818
    if absent_nodes:
2819
      vf_nvinfo = all_nvinfo.copy()
2820
      vf_node_info = list(self.my_node_info.values())
2821
      additional_nodes = []
2822
      if master_node not in self.my_node_info:
2823
        additional_nodes.append(master_node)
2824
        vf_node_info.append(self.all_node_info[master_node])
2825
      # Add the first vm_capable node we find which is not included
2826
      for node in absent_nodes:
2827
        nodeinfo = self.all_node_info[node]
2828
        if nodeinfo.vm_capable and not nodeinfo.offline:
2829
          additional_nodes.append(node)
2830
          vf_node_info.append(self.all_node_info[node])
2831
          break
2832
      key = constants.NV_FILELIST
2833
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2834
                                                 {key: node_verify_param[key]},
2835
                                                 self.cfg.GetClusterName()))
2836
    else:
2837
      vf_nvinfo = all_nvinfo
2838
      vf_node_info = self.my_node_info.values()
2839

    
2840
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2841

    
2842
    feedback_fn("* Verifying node status")
2843

    
2844
    refos_img = None
2845

    
2846
    for node_i in node_data_list:
2847
      node = node_i.name
2848
      nimg = node_image[node]
2849

    
2850
      if node_i.offline:
2851
        if verbose:
2852
          feedback_fn("* Skipping offline node %s" % (node,))
2853
        n_offline += 1
2854
        continue
2855

    
2856
      if node == master_node:
2857
        ntype = "master"
2858
      elif node_i.master_candidate:
2859
        ntype = "master candidate"
2860
      elif node_i.drained:
2861
        ntype = "drained"
2862
        n_drained += 1
2863
      else:
2864
        ntype = "regular"
2865
      if verbose:
2866
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2867

    
2868
      msg = all_nvinfo[node].fail_msg
2869
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2870
               msg)
2871
      if msg:
2872
        nimg.rpc_fail = True
2873
        continue
2874

    
2875
      nresult = all_nvinfo[node].payload
2876

    
2877
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2878
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2879
      self._VerifyNodeNetwork(node_i, nresult)
2880
      self._VerifyNodeUserScripts(node_i, nresult)
2881
      self._VerifyOob(node_i, nresult)
2882

    
2883
      if nimg.vm_capable:
2884
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2885
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2886
                             all_drbd_map)
2887

    
2888
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2889
        self._UpdateNodeInstances(node_i, nresult, nimg)
2890
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2891
        self._UpdateNodeOS(node_i, nresult, nimg)
2892

    
2893
        if not nimg.os_fail:
2894
          if refos_img is None:
2895
            refos_img = nimg
2896
          self._VerifyNodeOS(node_i, nimg, refos_img)
2897
        self._VerifyNodeBridges(node_i, nresult, bridges)
2898

    
2899
        # Check whether all running instancies are primary for the node. (This
2900
        # can no longer be done from _VerifyInstance below, since some of the
2901
        # wrong instances could be from other node groups.)
2902
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2903

    
2904
        for inst in non_primary_inst:
2905
          # FIXME: investigate best way to handle offline insts
2906
          if inst.admin_state == constants.ADMINST_OFFLINE:
2907
            if verbose:
2908
              feedback_fn("* Skipping offline instance %s" % inst.name)
2909
            i_offline += 1
2910
            continue
2911
          test = inst in self.all_inst_info
2912
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2913
                   "instance should not run on node %s", node_i.name)
2914
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2915
                   "node is running unknown instance %s", inst)
2916

    
2917
    for node, result in extra_lv_nvinfo.items():
2918
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2919
                              node_image[node], vg_name)
2920

    
2921
    feedback_fn("* Verifying instance status")
2922
    for instance in self.my_inst_names:
2923
      if verbose:
2924
        feedback_fn("* Verifying instance %s" % instance)
2925
      inst_config = self.my_inst_info[instance]
2926
      self._VerifyInstance(instance, inst_config, node_image,
2927
                           instdisk[instance])
2928
      inst_nodes_offline = []
2929

    
2930
      pnode = inst_config.primary_node
2931
      pnode_img = node_image[pnode]
2932
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2933
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2934
               " primary node failed", instance)
2935

    
2936
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2937
               pnode_img.offline,
2938
               constants.CV_EINSTANCEBADNODE, instance,
2939
               "instance is marked as running and lives on offline node %s",
2940
               inst_config.primary_node)
2941

    
2942
      # If the instance is non-redundant we cannot survive losing its primary
2943
      # node, so we are not N+1 compliant. On the other hand we have no disk
2944
      # templates with more than one secondary so that situation is not well
2945
      # supported either.
2946
      # FIXME: does not support file-backed instances
2947
      if not inst_config.secondary_nodes:
2948
        i_non_redundant.append(instance)
2949

    
2950
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2951
               constants.CV_EINSTANCELAYOUT,
2952
               instance, "instance has multiple secondary nodes: %s",
2953
               utils.CommaJoin(inst_config.secondary_nodes),
2954
               code=self.ETYPE_WARNING)
2955

    
2956
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2957
        pnode = inst_config.primary_node
2958
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2959
        instance_groups = {}
2960

    
2961
        for node in instance_nodes:
2962
          instance_groups.setdefault(self.all_node_info[node].group,
2963
                                     []).append(node)
2964

    
2965
        pretty_list = [
2966
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2967
          # Sort so that we always list the primary node first.
2968
          for group, nodes in sorted(instance_groups.items(),
2969
                                     key=lambda (_, nodes): pnode in nodes,
2970
                                     reverse=True)]
2971

    
2972
        self._ErrorIf(len(instance_groups) > 1,
2973
                      constants.CV_EINSTANCESPLITGROUPS,
2974
                      instance, "instance has primary and secondary nodes in"
2975
                      " different groups: %s", utils.CommaJoin(pretty_list),
2976
                      code=self.ETYPE_WARNING)
2977

    
2978
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2979
        i_non_a_balanced.append(instance)
2980

    
2981
      for snode in inst_config.secondary_nodes:
2982
        s_img = node_image[snode]
2983
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2984
                 snode, "instance %s, connection to secondary node failed",
2985
                 instance)
2986

    
2987
        if s_img.offline:
2988
          inst_nodes_offline.append(snode)
2989

    
2990
      # warn that the instance lives on offline nodes
2991
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2992
               "instance has offline secondary node(s) %s",
2993
               utils.CommaJoin(inst_nodes_offline))
2994
      # ... or ghost/non-vm_capable nodes
2995
      for node in inst_config.all_nodes:
2996
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2997
                 instance, "instance lives on ghost node %s", node)
2998
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2999
                 instance, "instance lives on non-vm_capable node %s", node)
3000

    
3001
    feedback_fn("* Verifying orphan volumes")
3002
    reserved = utils.FieldSet(*cluster.reserved_lvs)
3003

    
3004
    # We will get spurious "unknown volume" warnings if any node of this group
3005
    # is secondary for an instance whose primary is in another group. To avoid
3006
    # them, we find these instances and add their volumes to node_vol_should.
3007
    for inst in self.all_inst_info.values():
3008
      for secondary in inst.secondary_nodes:
3009
        if (secondary in self.my_node_info
3010
            and inst.name not in self.my_inst_info):
3011
          inst.MapLVsByNode(node_vol_should)
3012
          break
3013

    
3014
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3015

    
3016
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3017
      feedback_fn("* Verifying N+1 Memory redundancy")
3018
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3019

    
3020
    feedback_fn("* Other Notes")
3021
    if i_non_redundant:
3022
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3023
                  % len(i_non_redundant))
3024

    
3025
    if i_non_a_balanced:
3026
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3027
                  % len(i_non_a_balanced))
3028

    
3029
    if i_offline:
3030
      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3031

    
3032
    if n_offline:
3033
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3034

    
3035
    if n_drained:
3036
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3037

    
3038
    return not self.bad
3039

    
3040
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3041
    """Analyze the post-hooks' result
3042

3043
    This method analyses the hook result, handles it, and sends some
3044
    nicely-formatted feedback back to the user.
3045

3046
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3047
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3048
    @param hooks_results: the results of the multi-node hooks rpc call
3049
    @param feedback_fn: function used send feedback back to the caller
3050
    @param lu_result: previous Exec result
3051
    @return: the new Exec result, based on the previous result
3052
        and hook results
3053

3054
    """
3055
    # We only really run POST phase hooks, only for non-empty groups,
3056
    # and are only interested in their results
3057
    if not self.my_node_names:
3058
      # empty node group
3059
      pass
3060
    elif phase == constants.HOOKS_PHASE_POST:
3061
      # Used to change hooks' output to proper indentation
3062
      feedback_fn("* Hooks Results")
3063
      assert hooks_results, "invalid result from hooks"
3064

    
3065
      for node_name in hooks_results:
3066
        res = hooks_results[node_name]
3067
        msg = res.fail_msg
3068
        test = msg and not res.offline
3069
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3070
                      "Communication failure in hooks execution: %s", msg)
3071
        if res.offline or msg:
3072
          # No need to investigate payload if node is offline or gave
3073
          # an error.
3074
          continue
3075
        for script, hkr, output in res.payload:
3076
          test = hkr == constants.HKR_FAIL
3077
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3078
                        "Script %s failed, output:", script)
3079
          if test:
3080
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3081
            feedback_fn("%s" % output)
3082
            lu_result = False
3083

    
3084
    return lu_result
3085

    
3086

    
3087
class LUClusterVerifyDisks(NoHooksLU):
3088
  """Verifies the cluster disks status.
3089

3090
  """
3091
  REQ_BGL = False
3092

    
3093
  def ExpandNames(self):
3094
    self.share_locks = _ShareAll()
3095
    self.needed_locks = {
3096
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3097
      }
3098

    
3099
  def Exec(self, feedback_fn):
3100
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3101

    
3102
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3103
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3104
                           for group in group_names])
3105

    
3106

    
3107
class LUGroupVerifyDisks(NoHooksLU):
3108
  """Verifies the status of all disks in a node group.
3109

3110
  """
3111
  REQ_BGL = False
3112

    
3113
  def ExpandNames(self):
3114
    # Raises errors.OpPrereqError on its own if group can't be found
3115
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3116

    
3117
    self.share_locks = _ShareAll()
3118
    self.needed_locks = {
3119
      locking.LEVEL_INSTANCE: [],
3120
      locking.LEVEL_NODEGROUP: [],
3121
      locking.LEVEL_NODE: [],
3122
      }
3123

    
3124
  def DeclareLocks(self, level):
3125
    if level == locking.LEVEL_INSTANCE:
3126
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3127

    
3128
      # Lock instances optimistically, needs verification once node and group
3129
      # locks have been acquired
3130
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3131
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3132

    
3133
    elif level == locking.LEVEL_NODEGROUP:
3134
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3135

    
3136
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3137
        set([self.group_uuid] +
3138
            # Lock all groups used by instances optimistically; this requires
3139
            # going via the node before it's locked, requiring verification
3140
            # later on
3141
            [group_uuid
3142
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3143
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3144

    
3145
    elif level == locking.LEVEL_NODE:
3146
      # This will only lock the nodes in the group to be verified which contain
3147
      # actual instances
3148
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3149
      self._LockInstancesNodes()
3150

    
3151
      # Lock all nodes in group to be verified
3152
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3153
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3154
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3155

    
3156
  def CheckPrereq(self):
3157
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3158
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3159
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3160

    
3161
    assert self.group_uuid in owned_groups
3162

    
3163
    # Check if locked instances are still correct
3164
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3165

    
3166
    # Get instance information
3167
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3168

    
3169
    # Check if node groups for locked instances are still correct
3170
    for (instance_name, inst) in self.instances.items():
3171
      assert owned_nodes.issuperset(inst.all_nodes), \
3172
        "Instance %s's nodes changed while we kept the lock" % instance_name
3173

    
3174
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3175
                                             owned_groups)
3176

    
3177
      assert self.group_uuid in inst_groups, \
3178
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3179

    
3180
  def Exec(self, feedback_fn):
3181
    """Verify integrity of cluster disks.
3182

3183
    @rtype: tuple of three items
3184
    @return: a tuple of (dict of node-to-node_error, list of instances
3185
        which need activate-disks, dict of instance: (node, volume) for
3186
        missing volumes
3187

3188
    """
3189
    res_nodes = {}
3190
    res_instances = set()
3191
    res_missing = {}
3192

    
3193
    nv_dict = _MapInstanceDisksToNodes([inst
3194
            for inst in self.instances.values()
3195
            if inst.admin_state == constants.ADMINST_UP])
3196

    
3197
    if nv_dict:
3198
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3199
                             set(self.cfg.GetVmCapableNodeList()))
3200

    
3201
      node_lvs = self.rpc.call_lv_list(nodes, [])
3202

    
3203
      for (node, node_res) in node_lvs.items():
3204
        if node_res.offline:
3205
          continue
3206

    
3207
        msg = node_res.fail_msg
3208
        if msg:
3209
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3210
          res_nodes[node] = msg
3211
          continue
3212

    
3213
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3214
          inst = nv_dict.pop((node, lv_name), None)
3215
          if not (lv_online or inst is None):
3216
            res_instances.add(inst)
3217

    
3218
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3219
      # better
3220
      for key, inst in nv_dict.iteritems():
3221
        res_missing.setdefault(inst, []).append(list(key))
3222

    
3223
    return (res_nodes, list(res_instances), res_missing)
3224

    
3225

    
3226
class LUClusterRepairDiskSizes(NoHooksLU):
3227
  """Verifies the cluster disks sizes.
3228

3229
  """
3230
  REQ_BGL = False
3231

    
3232
  def ExpandNames(self):
3233
    if self.op.instances:
3234
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3235
      self.needed_locks = {
3236
        locking.LEVEL_NODE_RES: [],
3237
        locking.LEVEL_INSTANCE: self.wanted_names,
3238
        }
3239
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3240
    else:
3241
      self.wanted_names = None
3242
      self.needed_locks = {
3243
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3244
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3245
        }
3246
    self.share_locks = {
3247
      locking.LEVEL_NODE_RES: 1,
3248
      locking.LEVEL_INSTANCE: 0,
3249
      }
3250

    
3251
  def DeclareLocks(self, level):
3252
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3253
      self._LockInstancesNodes(primary_only=True, level=level)
3254

    
3255
  def CheckPrereq(self):
3256
    """Check prerequisites.
3257

3258
    This only checks the optional instance list against the existing names.
3259

3260
    """
3261
    if self.wanted_names is None:
3262
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3263

    
3264
    self.wanted_instances = \
3265
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3266

    
3267
  def _EnsureChildSizes(self, disk):
3268
    """Ensure children of the disk have the needed disk size.
3269

3270
    This is valid mainly for DRBD8 and fixes an issue where the
3271
    children have smaller disk size.
3272

3273
    @param disk: an L{ganeti.objects.Disk} object
3274

3275
    """
3276
    if disk.dev_type == constants.LD_DRBD8:
3277
      assert disk.children, "Empty children for DRBD8?"
3278
      fchild = disk.children[0]
3279
      mismatch = fchild.size < disk.size
3280
      if mismatch:
3281
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3282
                     fchild.size, disk.size)
3283
        fchild.size = disk.size
3284

    
3285
      # and we recurse on this child only, not on the metadev
3286
      return self._EnsureChildSizes(fchild) or mismatch
3287
    else:
3288
      return False
3289

    
3290
  def Exec(self, feedback_fn):
3291
    """Verify the size of cluster disks.
3292

3293
    """
3294
    # TODO: check child disks too
3295
    # TODO: check differences in size between primary/secondary nodes
3296
    per_node_disks = {}
3297
    for instance in self.wanted_instances:
3298
      pnode = instance.primary_node
3299
      if pnode not in per_node_disks:
3300
        per_node_disks[pnode] = []
3301
      for idx, disk in enumerate(instance.disks):
3302
        per_node_disks[pnode].append((instance, idx, disk))
3303

    
3304
    assert not (frozenset(per_node_disks.keys()) -
3305
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3306
      "Not owning correct locks"
3307
    assert not self.owned_locks(locking.LEVEL_NODE)
3308

    
3309
    changed = []
3310
    for node, dskl in per_node_disks.items():
3311
      newl = [v[2].Copy() for v in dskl]
3312
      for dsk in newl:
3313
        self.cfg.SetDiskID(dsk, node)
3314
      result = self.rpc.call_blockdev_getsize(node, newl)
3315
      if result.fail_msg:
3316
        self.LogWarning("Failure in blockdev_getsize call to node"
3317
                        " %s, ignoring", node)
3318
        continue
3319
      if len(result.payload) != len(dskl):
3320
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3321
                        " result.payload=%s", node, len(dskl), result.payload)
3322
        self.LogWarning("Invalid result from node %s, ignoring node results",
3323
                        node)
3324
        continue
3325
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3326
        if size is None:
3327
          self.LogWarning("Disk %d of instance %s did not return size"
3328
                          " information, ignoring", idx, instance.name)
3329
          continue
3330
        if not isinstance(size, (int, long)):
3331
          self.LogWarning("Disk %d of instance %s did not return valid"
3332
                          " size information, ignoring", idx, instance.name)
3333
          continue
3334
        size = size >> 20
3335
        if size != disk.size:
3336
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3337
                       " correcting: recorded %d, actual %d", idx,
3338
                       instance.name, disk.size, size)
3339
          disk.size = size
3340
          self.cfg.Update(instance, feedback_fn)
3341
          changed.append((instance.name, idx, size))
3342
        if self._EnsureChildSizes(disk):
3343
          self.cfg.Update(instance, feedback_fn)
3344
          changed.append((instance.name, idx, disk.size))
3345
    return changed
3346

    
3347

    
3348
class LUClusterRename(LogicalUnit):
3349
  """Rename the cluster.
3350

3351
  """
3352
  HPATH = "cluster-rename"
3353
  HTYPE = constants.HTYPE_CLUSTER
3354

    
3355
  def BuildHooksEnv(self):
3356
    """Build hooks env.
3357

3358
    """
3359
    return {
3360
      "OP_TARGET": self.cfg.GetClusterName(),
3361
      "NEW_NAME": self.op.name,
3362
      }
3363

    
3364
  def BuildHooksNodes(self):
3365
    """Build hooks nodes.
3366

3367
    """
3368
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3369

    
3370
  def CheckPrereq(self):
3371
    """Verify that the passed name is a valid one.
3372

3373
    """
3374
    hostname = netutils.GetHostname(name=self.op.name,
3375
                                    family=self.cfg.GetPrimaryIPFamily())
3376

    
3377
    new_name = hostname.name
3378
    self.ip = new_ip = hostname.ip
3379
    old_name = self.cfg.GetClusterName()
3380
    old_ip = self.cfg.GetMasterIP()
3381
    if new_name == old_name and new_ip == old_ip:
3382
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3383
                                 " cluster has changed",
3384
                                 errors.ECODE_INVAL)
3385
    if new_ip != old_ip:
3386
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3387
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3388
                                   " reachable on the network" %
3389
                                   new_ip, errors.ECODE_NOTUNIQUE)
3390

    
3391
    self.op.name = new_name
3392

    
3393
  def Exec(self, feedback_fn):
3394
    """Rename the cluster.
3395

3396
    """
3397
    clustername = self.op.name
3398
    new_ip = self.ip
3399

    
3400
    # shutdown the master IP
3401
    master_params = self.cfg.GetMasterNetworkParameters()
3402
    ems = self.cfg.GetUseExternalMipScript()
3403
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3404
                                                     master_params, ems)
3405
    result.Raise("Could not disable the master role")
3406

    
3407
    try:
3408
      cluster = self.cfg.GetClusterInfo()
3409
      cluster.cluster_name = clustername
3410
      cluster.master_ip = new_ip
3411
      self.cfg.Update(cluster, feedback_fn)
3412

    
3413
      # update the known hosts file
3414
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3415
      node_list = self.cfg.GetOnlineNodeList()
3416
      try:
3417
        node_list.remove(master_params.name)
3418
      except ValueError:
3419
        pass
3420
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3421
    finally:
3422
      master_params.ip = new_ip
3423
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3424
                                                     master_params, ems)
3425
      msg = result.fail_msg
3426
      if msg:
3427
        self.LogWarning("Could not re-enable the master role on"
3428
                        " the master, please restart manually: %s", msg)
3429

    
3430
    return clustername
3431

    
3432

    
3433
def _ValidateNetmask(cfg, netmask):
3434
  """Checks if a netmask is valid.
3435

3436
  @type cfg: L{config.ConfigWriter}
3437
  @param cfg: The cluster configuration
3438
  @type netmask: int
3439
  @param netmask: the netmask to be verified
3440
  @raise errors.OpPrereqError: if the validation fails
3441

3442
  """
3443
  ip_family = cfg.GetPrimaryIPFamily()
3444
  try:
3445
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3446
  except errors.ProgrammerError:
3447
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3448
                               ip_family)
3449
  if not ipcls.ValidateNetmask(netmask):
3450
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3451
                                (netmask))
3452

    
3453

    
3454
class LUClusterSetParams(LogicalUnit):
3455
  """Change the parameters of the cluster.
3456

3457
  """
3458
  HPATH = "cluster-modify"
3459
  HTYPE = constants.HTYPE_CLUSTER
3460
  REQ_BGL = False
3461

    
3462
  def CheckArguments(self):
3463
    """Check parameters
3464

3465
    """
3466
    if self.op.uid_pool:
3467
      uidpool.CheckUidPool(self.op.uid_pool)
3468

    
3469
    if self.op.add_uids:
3470
      uidpool.CheckUidPool(self.op.add_uids)
3471

    
3472
    if self.op.remove_uids:
3473
      uidpool.CheckUidPool(self.op.remove_uids)
3474

    
3475
    if self.op.master_netmask is not None:
3476
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3477

    
3478
  def ExpandNames(self):
3479
    # FIXME: in the future maybe other cluster params won't require checking on
3480
    # all nodes to be modified.
3481
    self.needed_locks = {
3482
      locking.LEVEL_NODE: locking.ALL_SET,
3483
    }
3484
    self.share_locks[locking.LEVEL_NODE] = 1
3485

    
3486
  def BuildHooksEnv(self):
3487
    """Build hooks env.
3488

3489
    """
3490
    return {
3491
      "OP_TARGET": self.cfg.GetClusterName(),
3492
      "NEW_VG_NAME": self.op.vg_name,
3493
      }
3494

    
3495
  def BuildHooksNodes(self):
3496
    """Build hooks nodes.
3497

3498
    """
3499
    mn = self.cfg.GetMasterNode()
3500
    return ([mn], [mn])
3501

    
3502
  def CheckPrereq(self):
3503
    """Check prerequisites.
3504

3505
    This checks whether the given params don't conflict and
3506
    if the given volume group is valid.
3507

3508
    """
3509
    if self.op.vg_name is not None and not self.op.vg_name:
3510
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3511
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3512
                                   " instances exist", errors.ECODE_INVAL)
3513

    
3514
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3515
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3516
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3517
                                   " drbd-based instances exist",
3518
                                   errors.ECODE_INVAL)
3519

    
3520
    node_list = self.owned_locks(locking.LEVEL_NODE)
3521

    
3522
    # if vg_name not None, checks given volume group on all nodes
3523
    if self.op.vg_name:
3524
      vglist = self.rpc.call_vg_list(node_list)
3525
      for node in node_list:
3526
        msg = vglist[node].fail_msg
3527
        if msg:
3528
          # ignoring down node
3529
          self.LogWarning("Error while gathering data on node %s"
3530
                          " (ignoring node): %s", node, msg)
3531
          continue
3532
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3533
                                              self.op.vg_name,
3534
                                              constants.MIN_VG_SIZE)
3535
        if vgstatus:
3536
          raise errors.OpPrereqError("Error on node '%s': %s" %
3537
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3538

    
3539
    if self.op.drbd_helper:
3540
      # checks given drbd helper on all nodes
3541
      helpers = self.rpc.call_drbd_helper(node_list)
3542
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3543
        if ninfo.offline:
3544
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3545
          continue
3546
        msg = helpers[node].fail_msg
3547
        if msg:
3548
          raise errors.OpPrereqError("Error checking drbd helper on node"
3549
                                     " '%s': %s" % (node, msg),
3550
                                     errors.ECODE_ENVIRON)
3551
        node_helper = helpers[node].payload
3552
        if node_helper != self.op.drbd_helper:
3553
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3554
                                     (node, node_helper), errors.ECODE_ENVIRON)
3555

    
3556
    self.cluster = cluster = self.cfg.GetClusterInfo()
3557
    # validate params changes
3558
    if self.op.beparams:
3559
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3560
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3561

    
3562
    if self.op.ndparams:
3563
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3564
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3565

    
3566
      # TODO: we need a more general way to handle resetting
3567
      # cluster-level parameters to default values
3568
      if self.new_ndparams["oob_program"] == "":
3569
        self.new_ndparams["oob_program"] = \
3570
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3571

    
3572
    if self.op.nicparams:
3573
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3574
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3575
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3576
      nic_errors = []
3577

    
3578
      # check all instances for consistency
3579
      for instance in self.cfg.GetAllInstancesInfo().values():
3580
        for nic_idx, nic in enumerate(instance.nics):
3581
          params_copy = copy.deepcopy(nic.nicparams)
3582
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3583

    
3584
          # check parameter syntax
3585
          try:
3586
            objects.NIC.CheckParameterSyntax(params_filled)
3587
          except errors.ConfigurationError, err:
3588
            nic_errors.append("Instance %s, nic/%d: %s" %
3589
                              (instance.name, nic_idx, err))
3590

    
3591
          # if we're moving instances to routed, check that they have an ip
3592
          target_mode = params_filled[constants.NIC_MODE]
3593
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3594
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3595
                              " address" % (instance.name, nic_idx))
3596
      if nic_errors:
3597
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3598
                                   "\n".join(nic_errors))
3599

    
3600
    # hypervisor list/parameters
3601
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3602
    if self.op.hvparams:
3603
      for hv_name, hv_dict in self.op.hvparams.items():
3604
        if hv_name not in self.new_hvparams:
3605
          self.new_hvparams[hv_name] = hv_dict
3606
        else:
3607
          self.new_hvparams[hv_name].update(hv_dict)
3608

    
3609
    # os hypervisor parameters
3610
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3611
    if self.op.os_hvp:
3612
      for os_name, hvs in self.op.os_hvp.items():
3613
        if os_name not in self.new_os_hvp:
3614
          self.new_os_hvp[os_name] = hvs
3615
        else:
3616
          for hv_name, hv_dict in hvs.items():
3617
            if hv_name not in self.new_os_hvp[os_name]:
3618
              self.new_os_hvp[os_name][hv_name] = hv_dict
3619
            else:
3620
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3621

    
3622
    # os parameters
3623
    self.new_osp = objects.FillDict(cluster.osparams, {})
3624
    if self.op.osparams:
3625
      for os_name, osp in self.op.osparams.items():
3626
        if os_name not in self.new_osp:
3627
          self.new_osp[os_name] = {}
3628

    
3629
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3630
                                                  use_none=True)
3631

    
3632
        if not self.new_osp[os_name]:
3633
          # we removed all parameters
3634
          del self.new_osp[os_name]
3635
        else:
3636
          # check the parameter validity (remote check)
3637
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3638
                         os_name, self.new_osp[os_name])
3639

    
3640
    # changes to the hypervisor list
3641
    if self.op.enabled_hypervisors is not None:
3642
      self.hv_list = self.op.enabled_hypervisors
3643
      for hv in self.hv_list:
3644
        # if the hypervisor doesn't already exist in the cluster
3645
        # hvparams, we initialize it to empty, and then (in both
3646
        # cases) we make sure to fill the defaults, as we might not
3647
        # have a complete defaults list if the hypervisor wasn't
3648
        # enabled before
3649
        if hv not in new_hvp:
3650
          new_hvp[hv] = {}
3651
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3652
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3653
    else:
3654
      self.hv_list = cluster.enabled_hypervisors
3655

    
3656
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3657
      # either the enabled list has changed, or the parameters have, validate
3658
      for hv_name, hv_params in self.new_hvparams.items():
3659
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3660
            (self.op.enabled_hypervisors and
3661
             hv_name in self.op.enabled_hypervisors)):
3662
          # either this is a new hypervisor, or its parameters have changed
3663
          hv_class = hypervisor.GetHypervisor(hv_name)
3664
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3665
          hv_class.CheckParameterSyntax(hv_params)
3666
          _CheckHVParams(self, node_list, hv_name, hv_params)
3667

    
3668
    if self.op.os_hvp:
3669
      # no need to check any newly-enabled hypervisors, since the
3670
      # defaults have already been checked in the above code-block
3671
      for os_name, os_hvp in self.new_os_hvp.items():
3672
        for hv_name, hv_params in os_hvp.items():
3673
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3674
          # we need to fill in the new os_hvp on top of the actual hv_p
3675
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3676
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3677
          hv_class = hypervisor.GetHypervisor(hv_name)
3678
          hv_class.CheckParameterSyntax(new_osp)
3679
          _CheckHVParams(self, node_list, hv_name, new_osp)
3680

    
3681
    if self.op.default_iallocator:
3682
      alloc_script = utils.FindFile(self.op.default_iallocator,
3683
                                    constants.IALLOCATOR_SEARCH_PATH,
3684
                                    os.path.isfile)
3685
      if alloc_script is None:
3686
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3687
                                   " specified" % self.op.default_iallocator,
3688
                                   errors.ECODE_INVAL)
3689

    
3690
  def Exec(self, feedback_fn):
3691
    """Change the parameters of the cluster.
3692

3693
    """
3694
    if self.op.vg_name is not None:
3695
      new_volume = self.op.vg_name
3696
      if not new_volume:
3697
        new_volume = None
3698
      if new_volume != self.cfg.GetVGName():
3699
        self.cfg.SetVGName(new_volume)
3700
      else:
3701
        feedback_fn("Cluster LVM configuration already in desired"
3702
                    " state, not changing")
3703
    if self.op.drbd_helper is not None:
3704
      new_helper = self.op.drbd_helper
3705
      if not new_helper:
3706
        new_helper = None
3707
      if new_helper != self.cfg.GetDRBDHelper():
3708
        self.cfg.SetDRBDHelper(new_helper)
3709
      else:
3710
        feedback_fn("Cluster DRBD helper already in desired state,"
3711
                    " not changing")
3712
    if self.op.hvparams:
3713
      self.cluster.hvparams = self.new_hvparams
3714
    if self.op.os_hvp:
3715
      self.cluster.os_hvp = self.new_os_hvp
3716
    if self.op.enabled_hypervisors is not None:
3717
      self.cluster.hvparams = self.new_hvparams
3718
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3719
    if self.op.beparams:
3720
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3721
    if self.op.nicparams:
3722
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3723
    if self.op.osparams:
3724
      self.cluster.osparams = self.new_osp
3725
    if self.op.ndparams:
3726
      self.cluster.ndparams = self.new_ndparams
3727

    
3728
    if self.op.candidate_pool_size is not None:
3729
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3730
      # we need to update the pool size here, otherwise the save will fail
3731
      _AdjustCandidatePool(self, [])
3732

    
3733
    if self.op.maintain_node_health is not None:
3734
      if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3735
        feedback_fn("Note: CONFD was disabled at build time, node health"
3736
                    " maintenance is not useful (still enabling it)")
3737
      self.cluster.maintain_node_health = self.op.maintain_node_health
3738

    
3739
    if self.op.prealloc_wipe_disks is not None:
3740
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3741

    
3742
    if self.op.add_uids is not None:
3743
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3744

    
3745
    if self.op.remove_uids is not None:
3746
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3747

    
3748
    if self.op.uid_pool is not None:
3749
      self.cluster.uid_pool = self.op.uid_pool
3750

    
3751
    if self.op.default_iallocator is not None:
3752
      self.cluster.default_iallocator = self.op.default_iallocator
3753

    
3754
    if self.op.reserved_lvs is not None:
3755
      self.cluster.reserved_lvs = self.op.reserved_lvs
3756

    
3757
    if self.op.use_external_mip_script is not None:
3758
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3759

    
3760
    def helper_os(aname, mods, desc):
3761
      desc += " OS list"
3762
      lst = getattr(self.cluster, aname)
3763
      for key, val in mods:
3764
        if key == constants.DDM_ADD:
3765
          if val in lst:
3766
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3767
          else:
3768
            lst.append(val)
3769
        elif key == constants.DDM_REMOVE:
3770
          if val in lst:
3771
            lst.remove(val)
3772
          else:
3773
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3774
        else:
3775
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3776

    
3777
    if self.op.hidden_os:
3778
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3779

    
3780
    if self.op.blacklisted_os:
3781
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3782

    
3783
    if self.op.master_netdev:
3784
      master_params = self.cfg.GetMasterNetworkParameters()
3785
      ems = self.cfg.GetUseExternalMipScript()
3786
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3787
                  self.cluster.master_netdev)
3788
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3789
                                                       master_params, ems)
3790
      result.Raise("Could not disable the master ip")
3791
      feedback_fn("Changing master_netdev from %s to %s" %
3792
                  (master_params.netdev, self.op.master_netdev))
3793
      self.cluster.master_netdev = self.op.master_netdev
3794

    
3795
    if self.op.master_netmask:
3796
      master_params = self.cfg.GetMasterNetworkParameters()
3797
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3798
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3799
                                                        master_params.netmask,
3800
                                                        self.op.master_netmask,
3801
                                                        master_params.ip,
3802
                                                        master_params.netdev)
3803
      if result.fail_msg:
3804
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3805
        feedback_fn(msg)
3806

    
3807
      self.cluster.master_netmask = self.op.master_netmask
3808

    
3809
    self.cfg.Update(self.cluster, feedback_fn)
3810

    
3811
    if self.op.master_netdev:
3812
      master_params = self.cfg.GetMasterNetworkParameters()
3813
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3814
                  self.op.master_netdev)
3815
      ems = self.cfg.GetUseExternalMipScript()
3816
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3817
                                                     master_params, ems)
3818
      if result.fail_msg:
3819
        self.LogWarning("Could not re-enable the master ip on"
3820
                        " the master, please restart manually: %s",
3821
                        result.fail_msg)
3822

    
3823

    
3824
def _UploadHelper(lu, nodes, fname):
3825
  """Helper for uploading a file and showing warnings.
3826

3827
  """
3828
  if os.path.exists(fname):
3829
    result = lu.rpc.call_upload_file(nodes, fname)
3830
    for to_node, to_result in result.items():
3831
      msg = to_result.fail_msg
3832
      if msg:
3833
        msg = ("Copy of file %s to node %s failed: %s" %
3834
               (fname, to_node, msg))
3835
        lu.proc.LogWarning(msg)
3836

    
3837

    
3838
def _ComputeAncillaryFiles(cluster, redist):
3839
  """Compute files external to Ganeti which need to be consistent.
3840

3841
  @type redist: boolean
3842
  @param redist: Whether to include files which need to be redistributed
3843

3844
  """
3845
  # Compute files for all nodes
3846
  files_all = set([
3847
    constants.SSH_KNOWN_HOSTS_FILE,
3848
    constants.CONFD_HMAC_KEY,
3849
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3850
    constants.SPICE_CERT_FILE,
3851
    constants.SPICE_CACERT_FILE,
3852
    constants.RAPI_USERS_FILE,
3853
    ])
3854

    
3855
  if not redist:
3856
    files_all.update(constants.ALL_CERT_FILES)
3857
    files_all.update(ssconf.SimpleStore().GetFileList())
3858
  else:
3859
    # we need to ship at least the RAPI certificate
3860
    files_all.add(constants.RAPI_CERT_FILE)
3861

    
3862
  if cluster.modify_etc_hosts:
3863
    files_all.add(constants.ETC_HOSTS)
3864

    
3865
  # Files which are optional, these must:
3866
  # - be present in one other category as well
3867
  # - either exist or not exist on all nodes of that category (mc, vm all)
3868
  files_opt = set([
3869
    constants.RAPI_USERS_FILE,
3870
    ])
3871

    
3872
  # Files which should only be on master candidates
3873
  files_mc = set()
3874

    
3875
  if not redist:
3876
    files_mc.add(constants.CLUSTER_CONF_FILE)
3877

    
3878
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3879
    # replication
3880
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3881

    
3882
  # Files which should only be on VM-capable nodes
3883
  files_vm = set(filename
3884
    for hv_name in cluster.enabled_hypervisors
3885
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3886

    
3887
  files_opt |= set(filename
3888
    for hv_name in cluster.enabled_hypervisors
3889
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3890

    
3891
  # Filenames in each category must be unique
3892
  all_files_set = files_all | files_mc | files_vm
3893
  assert (len(all_files_set) ==
3894
          sum(map(len, [files_all, files_mc, files_vm]))), \
3895
         "Found file listed in more than one file list"
3896

    
3897
  # Optional files must be present in one other category
3898
  assert all_files_set.issuperset(files_opt), \
3899
         "Optional file not in a different required list"
3900

    
3901
  return (files_all, files_opt, files_mc, files_vm)
3902

    
3903

    
3904
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3905
  """Distribute additional files which are part of the cluster configuration.
3906

3907
  ConfigWriter takes care of distributing the config and ssconf files, but
3908
  there are more files which should be distributed to all nodes. This function
3909
  makes sure those are copied.
3910

3911
  @param lu: calling logical unit
3912
  @param additional_nodes: list of nodes not in the config to distribute to
3913
  @type additional_vm: boolean
3914
  @param additional_vm: whether the additional nodes are vm-capable or not
3915

3916
  """
3917
  # Gather target nodes
3918
  cluster = lu.cfg.GetClusterInfo()
3919
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3920

    
3921
  online_nodes = lu.cfg.GetOnlineNodeList()
3922
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3923

    
3924
  if additional_nodes is not None:
3925
    online_nodes.extend(additional_nodes)
3926
    if additional_vm:
3927
      vm_nodes.extend(additional_nodes)
3928

    
3929
  # Never distribute to master node
3930
  for nodelist in [online_nodes, vm_nodes]:
3931
    if master_info.name in nodelist:
3932
      nodelist.remove(master_info.name)
3933

    
3934
  # Gather file lists
3935
  (files_all, _, files_mc, files_vm) = \
3936
    _ComputeAncillaryFiles(cluster, True)
3937

    
3938
  # Never re-distribute configuration file from here
3939
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3940
              constants.CLUSTER_CONF_FILE in files_vm)
3941
  assert not files_mc, "Master candidates not handled in this function"
3942

    
3943
  filemap = [
3944
    (online_nodes, files_all),
3945
    (vm_nodes, files_vm),
3946
    ]
3947

    
3948
  # Upload the files
3949
  for (node_list, files) in filemap:
3950
    for fname in files:
3951
      _UploadHelper(lu, node_list, fname)
3952

    
3953

    
3954
class LUClusterRedistConf(NoHooksLU):
3955
  """Force the redistribution of cluster configuration.
3956

3957
  This is a very simple LU.
3958

3959
  """
3960
  REQ_BGL = False
3961

    
3962
  def ExpandNames(self):
3963
    self.needed_locks = {
3964
      locking.LEVEL_NODE: locking.ALL_SET,
3965
    }
3966
    self.share_locks[locking.LEVEL_NODE] = 1
3967

    
3968
  def Exec(self, feedback_fn):
3969
    """Redistribute the configuration.
3970

3971
    """
3972
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3973
    _RedistributeAncillaryFiles(self)
3974

    
3975

    
3976
class LUClusterActivateMasterIp(NoHooksLU):
3977
  """Activate the master IP on the master node.
3978

3979
  """
3980
  def Exec(self, feedback_fn):
3981
    """Activate the master IP.
3982

3983
    """
3984
    master_params = self.cfg.GetMasterNetworkParameters()
3985
    ems = self.cfg.GetUseExternalMipScript()
3986
    result = self.rpc.call_node_activate_master_ip(master_params.name,
3987
                                                   master_params, ems)
3988
    result.Raise("Could not activate the master IP")
3989

    
3990

    
3991
class LUClusterDeactivateMasterIp(NoHooksLU):
3992
  """Deactivate the master IP on the master node.
3993

3994
  """
3995
  def Exec(self, feedback_fn):
3996
    """Deactivate the master IP.
3997

3998
    """
3999
    master_params = self.cfg.GetMasterNetworkParameters()
4000
    ems = self.cfg.GetUseExternalMipScript()
4001
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4002
                                                     master_params, ems)
4003
    result.Raise("Could not deactivate the master IP")
4004

    
4005

    
4006
def _WaitForSync(lu, instance, disks=None, oneshot=False):
4007
  """Sleep and poll for an instance's disk to sync.
4008

4009
  """
4010
  if not instance.disks or disks is not None and not disks:
4011
    return True
4012

    
4013
  disks = _ExpandCheckDisks(instance, disks)
4014

    
4015
  if not oneshot:
4016
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4017

    
4018
  node = instance.primary_node
4019

    
4020
  for dev in disks:
4021
    lu.cfg.SetDiskID(dev, node)
4022

    
4023
  # TODO: Convert to utils.Retry
4024

    
4025
  retries = 0
4026
  degr_retries = 10 # in seconds, as we sleep 1 second each time
4027
  while True:
4028
    max_time = 0
4029
    done = True
4030
    cumul_degraded = False
4031
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4032
    msg = rstats.fail_msg
4033
    if msg:
4034
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4035
      retries += 1
4036
      if retries >= 10:
4037
        raise errors.RemoteError("Can't contact node %s for mirror data,"
4038
                                 " aborting." % node)
4039
      time.sleep(6)
4040
      continue
4041
    rstats = rstats.payload
4042
    retries = 0
4043
    for i, mstat in enumerate(rstats):
4044
      if mstat is None:
4045
        lu.LogWarning("Can't compute data for node %s/%s",
4046
                           node, disks[i].iv_name)
4047
        continue
4048

    
4049
      cumul_degraded = (cumul_degraded or
4050
                        (mstat.is_degraded and mstat.sync_percent is None))
4051
      if mstat.sync_percent is not None:
4052
        done = False
4053
        if mstat.estimated_time is not None:
4054
          rem_time = ("%s remaining (estimated)" %
4055
                      utils.FormatSeconds(mstat.estimated_time))
4056
          max_time = mstat.estimated_time
4057
        else:
4058
          rem_time = "no time estimate"
4059
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4060
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4061

    
4062
    # if we're done but degraded, let's do a few small retries, to
4063
    # make sure we see a stable and not transient situation; therefore
4064
    # we force restart of the loop
4065
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4066
      logging.info("Degraded disks found, %d retries left", degr_retries)
4067
      degr_retries -= 1
4068
      time.sleep(1)
4069
      continue
4070

    
4071
    if done or oneshot:
4072
      break
4073

    
4074
    time.sleep(min(60, max_time))
4075

    
4076
  if done:
4077
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4078
  return not cumul_degraded
4079

    
4080

    
4081
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4082
  """Check that mirrors are not degraded.
4083

4084
  The ldisk parameter, if True, will change the test from the
4085
  is_degraded attribute (which represents overall non-ok status for
4086
  the device(s)) to the ldisk (representing the local storage status).
4087

4088
  """
4089
  lu.cfg.SetDiskID(dev, node)
4090

    
4091
  result = True
4092

    
4093
  if on_primary or dev.AssembleOnSecondary():
4094
    rstats = lu.rpc.call_blockdev_find(node, dev)
4095
    msg = rstats.fail_msg
4096
    if msg:
4097
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4098
      result = False
4099
    elif not rstats.payload:
4100
      lu.LogWarning("Can't find disk on node %s", node)
4101
      result = False
4102
    else:
4103
      if ldisk:
4104
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4105
      else:
4106
        result = result and not rstats.payload.is_degraded
4107

    
4108
  if dev.children:
4109
    for child in dev.children:
4110
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4111

    
4112
  return result
4113

    
4114

    
4115
class LUOobCommand(NoHooksLU):
4116
  """Logical unit for OOB handling.
4117

4118
  """
4119
  REG_BGL = False
4120
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4121

    
4122
  def ExpandNames(self):
4123
    """Gather locks we need.
4124

4125
    """
4126
    if self.op.node_names:
4127
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4128
      lock_names = self.op.node_names
4129
    else:
4130
      lock_names = locking.ALL_SET
4131

    
4132
    self.needed_locks = {
4133
      locking.LEVEL_NODE: lock_names,
4134
      }
4135

    
4136
  def CheckPrereq(self):
4137
    """Check prerequisites.
4138

4139
    This checks:
4140
     - the node exists in the configuration
4141
     - OOB is supported
4142

4143
    Any errors are signaled by raising errors.OpPrereqError.
4144

4145
    """
4146
    self.nodes = []
4147
    self.master_node = self.cfg.GetMasterNode()
4148

    
4149
    assert self.op.power_delay >= 0.0
4150

    
4151
    if self.op.node_names:
4152
      if (self.op.command in self._SKIP_MASTER and
4153
          self.master_node in self.op.node_names):
4154
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4155
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4156

    
4157
        if master_oob_handler:
4158
          additional_text = ("run '%s %s %s' if you want to operate on the"
4159
                             " master regardless") % (master_oob_handler,
4160
                                                      self.op.command,
4161
                                                      self.master_node)
4162
        else:
4163
          additional_text = "it does not support out-of-band operations"
4164

    
4165
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4166
                                    " allowed for %s; %s") %
4167
                                   (self.master_node, self.op.command,
4168
                                    additional_text), errors.ECODE_INVAL)
4169
    else:
4170
      self.op.node_names = self.cfg.GetNodeList()
4171
      if self.op.command in self._SKIP_MASTER:
4172
        self.op.node_names.remove(self.master_node)
4173

    
4174
    if self.op.command in self._SKIP_MASTER:
4175
      assert self.master_node not in self.op.node_names
4176

    
4177
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4178
      if node is None:
4179
        raise errors.OpPrereqError("Node %s not found" % node_name,
4180
                                   errors.ECODE_NOENT)
4181
      else:
4182
        self.nodes.append(node)
4183

    
4184
      if (not self.op.ignore_status and
4185
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4186
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4187
                                    " not marked offline") % node_name,
4188
                                   errors.ECODE_STATE)
4189

    
4190
  def Exec(self, feedback_fn):
4191
    """Execute OOB and return result if we expect any.
4192

4193
    """
4194
    master_node = self.master_node
4195
    ret = []
4196

    
4197
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4198
                                              key=lambda node: node.name)):
4199
      node_entry = [(constants.RS_NORMAL, node.name)]
4200
      ret.append(node_entry)
4201

    
4202
      oob_program = _SupportsOob(self.cfg, node)
4203

    
4204
      if not oob_program:
4205
        node_entry.append((constants.RS_UNAVAIL, None))
4206
        continue
4207

    
4208
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4209
                   self.op.command, oob_program, node.name)
4210
      result = self.rpc.call_run_oob(master_node, oob_program,
4211
                                     self.op.command, node.name,
4212
                                     self.op.timeout)
4213

    
4214
      if result.fail_msg:
4215
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4216
                        node.name, result.fail_msg)
4217
        node_entry.append((constants.RS_NODATA, None))
4218
      else:
4219
        try:
4220
          self._CheckPayload(result)
4221
        except errors.OpExecError, err:
4222
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4223
                          node.name, err)
4224
          node_entry.append((constants.RS_NODATA, None))
4225
        else:
4226
          if self.op.command == constants.OOB_HEALTH:
4227
            # For health we should log important events
4228
            for item, status in result.payload:
4229
              if status in [constants.OOB_STATUS_WARNING,
4230
                            constants.OOB_STATUS_CRITICAL]:
4231
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4232
                                item, node.name, status)
4233

    
4234
          if self.op.command == constants.OOB_POWER_ON:
4235
            node.powered = True
4236
          elif self.op.command == constants.OOB_POWER_OFF:
4237
            node.powered = False
4238
          elif self.op.command == constants.OOB_POWER_STATUS:
4239
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4240
            if powered != node.powered:
4241
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4242
                               " match actual power state (%s)"), node.powered,
4243
                              node.name, powered)
4244

    
4245
          # For configuration changing commands we should update the node
4246
          if self.op.command in (constants.OOB_POWER_ON,
4247
                                 constants.OOB_POWER_OFF):
4248
            self.cfg.Update(node, feedback_fn)
4249

    
4250
          node_entry.append((constants.RS_NORMAL, result.payload))
4251

    
4252
          if (self.op.command == constants.OOB_POWER_ON and
4253
              idx < len(self.nodes) - 1):
4254
            time.sleep(self.op.power_delay)
4255

    
4256
    return ret
4257

    
4258
  def _CheckPayload(self, result):
4259
    """Checks if the payload is valid.
4260

4261
    @param result: RPC result
4262
    @raises errors.OpExecError: If payload is not valid
4263

4264
    """
4265
    errs = []
4266
    if self.op.command == constants.OOB_HEALTH:
4267
      if not isinstance(result.payload, list):
4268
        errs.append("command 'health' is expected to return a list but got %s" %
4269
                    type(result.payload))
4270
      else:
4271
        for item, status in result.payload:
4272
          if status not in constants.OOB_STATUSES:
4273
            errs.append("health item '%s' has invalid status '%s'" %
4274
                        (item, status))
4275

    
4276
    if self.op.command == constants.OOB_POWER_STATUS:
4277
      if not isinstance(result.payload, dict):
4278
        errs.append("power-status is expected to return a dict but got %s" %
4279
                    type(result.payload))
4280

    
4281
    if self.op.command in [
4282
        constants.OOB_POWER_ON,
4283
        constants.OOB_POWER_OFF,
4284
        constants.OOB_POWER_CYCLE,
4285
        ]:
4286
      if result.payload is not None:
4287
        errs.append("%s is expected to not return payload but got '%s'" %
4288
                    (self.op.command, result.payload))
4289

    
4290
    if errs:
4291
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4292
                               utils.CommaJoin(errs))
4293

    
4294

    
4295
class _OsQuery(_QueryBase):
4296
  FIELDS = query.OS_FIELDS
4297

    
4298
  def ExpandNames(self, lu):
4299
    # Lock all nodes in shared mode
4300
    # Temporary removal of locks, should be reverted later
4301
    # TODO: reintroduce locks when they are lighter-weight
4302
    lu.needed_locks = {}
4303
    #self.share_locks[locking.LEVEL_NODE] = 1
4304
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4305

    
4306
    # The following variables interact with _QueryBase._GetNames
4307
    if self.names:
4308
      self.wanted = self.names
4309
    else:
4310
      self.wanted = locking.ALL_SET
4311

    
4312
    self.do_locking = self.use_locking
4313

    
4314
  def DeclareLocks(self, lu, level):
4315
    pass
4316

    
4317
  @staticmethod
4318
  def _DiagnoseByOS(rlist):
4319
    """Remaps a per-node return list into an a per-os per-node dictionary
4320

4321
    @param rlist: a map with node names as keys and OS objects as values
4322

4323
    @rtype: dict
4324
    @return: a dictionary with osnames as keys and as value another
4325
        map, with nodes as keys and tuples of (path, status, diagnose,
4326
        variants, parameters, api_versions) as values, eg::
4327

4328
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4329
                                     (/srv/..., False, "invalid api")],
4330
                           "node2": [(/srv/..., True, "", [], [])]}
4331
          }
4332

4333
    """
4334
    all_os = {}
4335
    # we build here the list of nodes that didn't fail the RPC (at RPC
4336
    # level), so that nodes with a non-responding node daemon don't
4337
    # make all OSes invalid
4338
    good_nodes = [node_name for node_name in rlist
4339
                  if not rlist[node_name].fail_msg]
4340
    for node_name, nr in rlist.items():
4341
      if nr.fail_msg or not nr.payload:
4342
        continue
4343
      for (name, path, status, diagnose, variants,
4344
           params, api_versions) in nr.payload:
4345
        if name not in all_os:
4346
          # build a list of nodes for this os containing empty lists
4347
          # for each node in node_list
4348
          all_os[name] = {}
4349
          for nname in good_nodes:
4350
            all_os[name][nname] = []
4351
        # convert params from [name, help] to (name, help)
4352
        params = [tuple(v) for v in params]
4353
        all_os[name][node_name].append((path, status, diagnose,
4354
                                        variants, params, api_versions))
4355
    return all_os
4356

    
4357
  def _GetQueryData(self, lu):
4358
    """Computes the list of nodes and their attributes.
4359

4360
    """
4361
    # Locking is not used
4362
    assert not (compat.any(lu.glm.is_owned(level)
4363
                           for level in locking.LEVELS
4364
                           if level != locking.LEVEL_CLUSTER) or
4365
                self.do_locking or self.use_locking)
4366

    
4367
    valid_nodes = [node.name
4368
                   for node in lu.cfg.GetAllNodesInfo().values()
4369
                   if not node.offline and node.vm_capable]
4370
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4371
    cluster = lu.cfg.GetClusterInfo()
4372

    
4373
    data = {}
4374

    
4375
    for (os_name, os_data) in pol.items():
4376
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4377
                          hidden=(os_name in cluster.hidden_os),
4378
                          blacklisted=(os_name in cluster.blacklisted_os))
4379

    
4380
      variants = set()
4381
      parameters = set()
4382
      api_versions = set()
4383

    
4384
      for idx, osl in enumerate(os_data.values()):
4385
        info.valid = bool(info.valid and osl and osl[0][1])
4386
        if not info.valid:
4387
          break
4388

    
4389
        (node_variants, node_params, node_api) = osl[0][3:6]
4390
        if idx == 0:
4391
          # First entry
4392
          variants.update(node_variants)
4393
          parameters.update(node_params)
4394
          api_versions.update(node_api)
4395
        else:
4396
          # Filter out inconsistent values
4397
          variants.intersection_update(node_variants)
4398
          parameters.intersection_update(node_params)
4399
          api_versions.intersection_update(node_api)
4400

    
4401
      info.variants = list(variants)
4402
      info.parameters = list(parameters)
4403
      info.api_versions = list(api_versions)
4404

    
4405
      data[os_name] = info
4406

    
4407
    # Prepare data in requested order
4408
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4409
            if name in data]
4410

    
4411

    
4412
class LUOsDiagnose(NoHooksLU):
4413
  """Logical unit for OS diagnose/query.
4414

4415
  """
4416
  REQ_BGL = False
4417

    
4418
  @staticmethod
4419
  def _BuildFilter(fields, names):
4420
    """Builds a filter for querying OSes.
4421

4422
    """
4423
    name_filter = qlang.MakeSimpleFilter("name", names)
4424

    
4425
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4426
    # respective field is not requested
4427
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4428
                     for fname in ["hidden", "blacklisted"]
4429
                     if fname not in fields]
4430
    if "valid" not in fields:
4431
      status_filter.append([qlang.OP_TRUE, "valid"])
4432

    
4433
    if status_filter:
4434
      status_filter.insert(0, qlang.OP_AND)
4435
    else:
4436
      status_filter = None
4437

    
4438
    if name_filter and status_filter:
4439
      return [qlang.OP_AND, name_filter, status_filter]
4440
    elif name_filter:
4441
      return name_filter
4442
    else:
4443
      return status_filter
4444

    
4445
  def CheckArguments(self):
4446
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4447
                       self.op.output_fields, False)
4448

    
4449
  def ExpandNames(self):
4450
    self.oq.ExpandNames(self)
4451

    
4452
  def Exec(self, feedback_fn):
4453
    return self.oq.OldStyleQuery(self)
4454

    
4455

    
4456
class LUNodeRemove(LogicalUnit):
4457
  """Logical unit for removing a node.
4458

4459
  """
4460
  HPATH = "node-remove"
4461
  HTYPE = constants.HTYPE_NODE
4462

    
4463
  def BuildHooksEnv(self):
4464
    """Build hooks env.
4465

4466
    This doesn't run on the target node in the pre phase as a failed
4467
    node would then be impossible to remove.
4468

4469
    """
4470
    return {
4471
      "OP_TARGET": self.op.node_name,
4472
      "NODE_NAME": self.op.node_name,
4473
      }
4474

    
4475
  def BuildHooksNodes(self):
4476
    """Build hooks nodes.
4477

4478
    """
4479
    all_nodes = self.cfg.GetNodeList()
4480
    try:
4481
      all_nodes.remove(self.op.node_name)
4482
    except ValueError:
4483
      logging.warning("Node '%s', which is about to be removed, was not found"
4484
                      " in the list of all nodes", self.op.node_name)
4485
    return (all_nodes, all_nodes)
4486

    
4487
  def CheckPrereq(self):
4488
    """Check prerequisites.
4489

4490
    This checks:
4491
     - the node exists in the configuration
4492
     - it does not have primary or secondary instances
4493
     - it's not the master
4494

4495
    Any errors are signaled by raising errors.OpPrereqError.
4496

4497
    """
4498
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4499
    node = self.cfg.GetNodeInfo(self.op.node_name)
4500
    assert node is not None
4501

    
4502
    masternode = self.cfg.GetMasterNode()
4503
    if node.name == masternode:
4504
      raise errors.OpPrereqError("Node is the master node, failover to another"
4505
                                 " node is required", errors.ECODE_INVAL)
4506

    
4507
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4508
      if node.name in instance.all_nodes:
4509
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4510
                                   " please remove first" % instance_name,
4511
                                   errors.ECODE_INVAL)
4512
    self.op.node_name = node.name
4513
    self.node = node
4514

    
4515
  def Exec(self, feedback_fn):
4516
    """Removes the node from the cluster.
4517

4518
    """
4519
    node = self.node
4520
    logging.info("Stopping the node daemon and removing configs from node %s",
4521
                 node.name)
4522

    
4523
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4524

    
4525
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4526
      "Not owning BGL"
4527

    
4528
    # Promote nodes to master candidate as needed
4529
    _AdjustCandidatePool(self, exceptions=[node.name])
4530
    self.context.RemoveNode(node.name)
4531

    
4532
    # Run post hooks on the node before it's removed
4533
    _RunPostHook(self, node.name)
4534

    
4535
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4536
    msg = result.fail_msg
4537
    if msg:
4538
      self.LogWarning("Errors encountered on the remote node while leaving"
4539
                      " the cluster: %s", msg)
4540

    
4541
    # Remove node from our /etc/hosts
4542
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4543
      master_node = self.cfg.GetMasterNode()
4544
      result = self.rpc.call_etc_hosts_modify(master_node,
4545
                                              constants.ETC_HOSTS_REMOVE,
4546
                                              node.name, None)
4547
      result.Raise("Can't update hosts file with new host data")
4548
      _RedistributeAncillaryFiles(self)
4549

    
4550

    
4551
class _NodeQuery(_QueryBase):
4552
  FIELDS = query.NODE_FIELDS
4553

    
4554
  def ExpandNames(self, lu):
4555
    lu.needed_locks = {}
4556
    lu.share_locks = _ShareAll()
4557

    
4558
    if self.names:
4559
      self.wanted = _GetWantedNodes(lu, self.names)
4560
    else:
4561
      self.wanted = locking.ALL_SET
4562

    
4563
    self.do_locking = (self.use_locking and
4564
                       query.NQ_LIVE in self.requested_data)
4565

    
4566
    if self.do_locking:
4567
      # If any non-static field is requested we need to lock the nodes
4568
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4569

    
4570
  def DeclareLocks(self, lu, level):
4571
    pass
4572

    
4573
  def _GetQueryData(self, lu):
4574
    """Computes the list of nodes and their attributes.
4575

4576
    """
4577
    all_info = lu.cfg.GetAllNodesInfo()
4578

    
4579
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4580

    
4581
    # Gather data as requested
4582
    if query.NQ_LIVE in self.requested_data:
4583
      # filter out non-vm_capable nodes
4584
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4585

    
4586
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4587
                                        lu.cfg.GetHypervisorType())
4588
      live_data = dict((name, nresult.payload)
4589
                       for (name, nresult) in node_data.items()
4590
                       if not nresult.fail_msg and nresult.payload)
4591
    else:
4592
      live_data = None
4593

    
4594
    if query.NQ_INST in self.requested_data:
4595
      node_to_primary = dict([(name, set()) for name in nodenames])
4596
      node_to_secondary = dict([(name, set()) for name in nodenames])
4597

    
4598
      inst_data = lu.cfg.GetAllInstancesInfo()
4599

    
4600
      for inst in inst_data.values():
4601
        if inst.primary_node in node_to_primary:
4602
          node_to_primary[inst.primary_node].add(inst.name)
4603
        for secnode in inst.secondary_nodes:
4604
          if secnode in node_to_secondary:
4605
            node_to_secondary[secnode].add(inst.name)
4606
    else:
4607
      node_to_primary = None
4608
      node_to_secondary = None
4609

    
4610
    if query.NQ_OOB in self.requested_data:
4611
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4612
                         for name, node in all_info.iteritems())
4613
    else:
4614
      oob_support = None
4615

    
4616
    if query.NQ_GROUP in self.requested_data:
4617
      groups = lu.cfg.GetAllNodeGroupsInfo()
4618
    else:
4619
      groups = {}
4620

    
4621
    return query.NodeQueryData([all_info[name] for name in nodenames],
4622
                               live_data, lu.cfg.GetMasterNode(),
4623
                               node_to_primary, node_to_secondary, groups,
4624
                               oob_support, lu.cfg.GetClusterInfo())
4625

    
4626

    
4627
class LUNodeQuery(NoHooksLU):
4628
  """Logical unit for querying nodes.
4629

4630
  """
4631
  # pylint: disable=W0142
4632
  REQ_BGL = False
4633

    
4634
  def CheckArguments(self):
4635
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4636
                         self.op.output_fields, self.op.use_locking)
4637

    
4638
  def ExpandNames(self):
4639
    self.nq.ExpandNames(self)
4640

    
4641
  def DeclareLocks(self, level):
4642
    self.nq.DeclareLocks(self, level)
4643

    
4644
  def Exec(self, feedback_fn):
4645
    return self.nq.OldStyleQuery(self)
4646

    
4647

    
4648
class LUNodeQueryvols(NoHooksLU):
4649
  """Logical unit for getting volumes on node(s).
4650

4651
  """
4652
  REQ_BGL = False
4653
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4654
  _FIELDS_STATIC = utils.FieldSet("node")
4655

    
4656
  def CheckArguments(self):
4657
    _CheckOutputFields(static=self._FIELDS_STATIC,
4658
                       dynamic=self._FIELDS_DYNAMIC,
4659
                       selected=self.op.output_fields)
4660

    
4661
  def ExpandNames(self):
4662
    self.share_locks = _ShareAll()
4663
    self.needed_locks = {}
4664

    
4665
    if not self.op.nodes:
4666
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4667
    else:
4668
      self.needed_locks[locking.LEVEL_NODE] = \
4669
        _GetWantedNodes(self, self.op.nodes)
4670

    
4671
  def Exec(self, feedback_fn):
4672
    """Computes the list of nodes and their attributes.
4673

4674
    """
4675
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4676
    volumes = self.rpc.call_node_volumes(nodenames)
4677

    
4678
    ilist = self.cfg.GetAllInstancesInfo()
4679
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4680

    
4681
    output = []
4682
    for node in nodenames:
4683
      nresult = volumes[node]
4684
      if nresult.offline:
4685
        continue
4686
      msg = nresult.fail_msg
4687
      if msg:
4688
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4689
        continue
4690

    
4691
      node_vols = sorted(nresult.payload,
4692
                         key=operator.itemgetter("dev"))
4693

    
4694
      for vol in node_vols:
4695
        node_output = []
4696
        for field in self.op.output_fields:
4697
          if field == "node":
4698
            val = node
4699
          elif field == "phys":
4700
            val = vol["dev"]
4701
          elif field == "vg":
4702
            val = vol["vg"]
4703
          elif field == "name":
4704
            val = vol["name"]
4705
          elif field == "size":
4706
            val = int(float(vol["size"]))
4707
          elif field == "instance":
4708
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4709
          else:
4710
            raise errors.ParameterError(field)
4711
          node_output.append(str(val))
4712

    
4713
        output.append(node_output)
4714

    
4715
    return output
4716

    
4717

    
4718
class LUNodeQueryStorage(NoHooksLU):
4719
  """Logical unit for getting information on storage units on node(s).
4720

4721
  """
4722
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4723
  REQ_BGL = False
4724

    
4725
  def CheckArguments(self):
4726
    _CheckOutputFields(static=self._FIELDS_STATIC,
4727
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4728
                       selected=self.op.output_fields)
4729

    
4730
  def ExpandNames(self):
4731
    self.share_locks = _ShareAll()
4732
    self.needed_locks = {}
4733

    
4734
    if self.op.nodes:
4735
      self.needed_locks[locking.LEVEL_NODE] = \
4736
        _GetWantedNodes(self, self.op.nodes)
4737
    else:
4738
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4739

    
4740
  def Exec(self, feedback_fn):
4741
    """Computes the list of nodes and their attributes.
4742

4743
    """
4744
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4745

    
4746
    # Always get name to sort by
4747
    if constants.SF_NAME in self.op.output_fields:
4748
      fields = self.op.output_fields[:]
4749
    else:
4750
      fields = [constants.SF_NAME] + self.op.output_fields
4751

    
4752
    # Never ask for node or type as it's only known to the LU
4753
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4754
      while extra in fields:
4755
        fields.remove(extra)
4756

    
4757
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4758
    name_idx = field_idx[constants.SF_NAME]
4759

    
4760
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4761
    data = self.rpc.call_storage_list(self.nodes,
4762
                                      self.op.storage_type, st_args,
4763
                                      self.op.name, fields)
4764

    
4765
    result = []
4766

    
4767
    for node in utils.NiceSort(self.nodes):
4768
      nresult = data[node]
4769
      if nresult.offline:
4770
        continue
4771

    
4772
      msg = nresult.fail_msg
4773
      if msg:
4774
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4775
        continue
4776

    
4777
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4778

    
4779
      for name in utils.NiceSort(rows.keys()):
4780
        row = rows[name]
4781

    
4782
        out = []
4783

    
4784
        for field in self.op.output_fields:
4785
          if field == constants.SF_NODE:
4786
            val = node
4787
          elif field == constants.SF_TYPE:
4788
            val = self.op.storage_type
4789
          elif field in field_idx:
4790
            val = row[field_idx[field]]
4791
          else:
4792
            raise errors.ParameterError(field)
4793

    
4794
          out.append(val)
4795

    
4796
        result.append(out)
4797

    
4798
    return result
4799

    
4800

    
4801
class _InstanceQuery(_QueryBase):
4802
  FIELDS = query.INSTANCE_FIELDS
4803

    
4804
  def ExpandNames(self, lu):
4805
    lu.needed_locks = {}
4806
    lu.share_locks = _ShareAll()
4807

    
4808
    if self.names:
4809
      self.wanted = _GetWantedInstances(lu, self.names)
4810
    else:
4811
      self.wanted = locking.ALL_SET
4812

    
4813
    self.do_locking = (self.use_locking and
4814
                       query.IQ_LIVE in self.requested_data)
4815
    if self.do_locking:
4816
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4817
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4818
      lu.needed_locks[locking.LEVEL_NODE] = []
4819
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4820

    
4821
    self.do_grouplocks = (self.do_locking and
4822
                          query.IQ_NODES in self.requested_data)
4823

    
4824
  def DeclareLocks(self, lu, level):
4825
    if self.do_locking:
4826
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4827
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4828

    
4829
        # Lock all groups used by instances optimistically; this requires going
4830
        # via the node before it's locked, requiring verification later on
4831
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4832
          set(group_uuid
4833
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4834
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4835
      elif level == locking.LEVEL_NODE:
4836
        lu._LockInstancesNodes() # pylint: disable=W0212
4837

    
4838
  @staticmethod
4839
  def _CheckGroupLocks(lu):
4840
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4841
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4842

    
4843
    # Check if node groups for locked instances are still correct
4844
    for instance_name in owned_instances:
4845
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4846

    
4847
  def _GetQueryData(self, lu):
4848
    """Computes the list of instances and their attributes.
4849

4850
    """
4851
    if self.do_grouplocks:
4852
      self._CheckGroupLocks(lu)
4853

    
4854
    cluster = lu.cfg.GetClusterInfo()
4855
    all_info = lu.cfg.GetAllInstancesInfo()
4856

    
4857
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4858

    
4859
    instance_list = [all_info[name] for name in instance_names]
4860
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4861
                                        for inst in instance_list)))
4862
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4863
    bad_nodes = []
4864
    offline_nodes = []
4865
    wrongnode_inst = set()
4866

    
4867
    # Gather data as requested
4868
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4869
      live_data = {}
4870
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4871
      for name in nodes:
4872
        result = node_data[name]
4873
        if result.offline:
4874
          # offline nodes will be in both lists
4875
          assert result.fail_msg
4876
          offline_nodes.append(name)
4877
        if result.fail_msg:
4878
          bad_nodes.append(name)
4879
        elif result.payload:
4880
          for inst in result.payload:
4881
            if inst in all_info:
4882
              if all_info[inst].primary_node == name:
4883
                live_data.update(result.payload)
4884
              else:
4885
                wrongnode_inst.add(inst)
4886
            else:
4887
              # orphan instance; we don't list it here as we don't
4888
              # handle this case yet in the output of instance listing
4889
              logging.warning("Orphan instance '%s' found on node %s",
4890
                              inst, name)
4891
        # else no instance is alive
4892
    else:
4893
      live_data = {}
4894

    
4895
    if query.IQ_DISKUSAGE in self.requested_data:
4896
      disk_usage = dict((inst.name,
4897
                         _ComputeDiskSize(inst.disk_template,
4898
                                          [{constants.IDISK_SIZE: disk.size}
4899
                                           for disk in inst.disks]))
4900
                        for inst in instance_list)
4901
    else:
4902
      disk_usage = None
4903

    
4904
    if query.IQ_CONSOLE in self.requested_data:
4905
      consinfo = {}
4906
      for inst in instance_list:
4907
        if inst.name in live_data:
4908
          # Instance is running
4909
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4910
        else:
4911
          consinfo[inst.name] = None
4912
      assert set(consinfo.keys()) == set(instance_names)
4913
    else:
4914
      consinfo = None
4915

    
4916
    if query.IQ_NODES in self.requested_data:
4917
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4918
                                            instance_list)))
4919
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4920
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4921
                    for uuid in set(map(operator.attrgetter("group"),
4922
                                        nodes.values())))
4923
    else:
4924
      nodes = None
4925
      groups = None
4926

    
4927
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4928
                                   disk_usage, offline_nodes, bad_nodes,
4929
                                   live_data, wrongnode_inst, consinfo,
4930
                                   nodes, groups)
4931

    
4932

    
4933
class LUQuery(NoHooksLU):
4934
  """Query for resources/items of a certain kind.
4935

4936
  """
4937
  # pylint: disable=W0142
4938
  REQ_BGL = False
4939

    
4940
  def CheckArguments(self):
4941
    qcls = _GetQueryImplementation(self.op.what)
4942

    
4943
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4944

    
4945
  def ExpandNames(self):
4946
    self.impl.ExpandNames(self)
4947

    
4948
  def DeclareLocks(self, level):
4949
    self.impl.DeclareLocks(self, level)
4950

    
4951
  def Exec(self, feedback_fn):
4952
    return self.impl.NewStyleQuery(self)
4953

    
4954

    
4955
class LUQueryFields(NoHooksLU):
4956
  """Query for resources/items of a certain kind.
4957

4958
  """
4959
  # pylint: disable=W0142
4960
  REQ_BGL = False
4961

    
4962
  def CheckArguments(self):
4963
    self.qcls = _GetQueryImplementation(self.op.what)
4964

    
4965
  def ExpandNames(self):
4966
    self.needed_locks = {}
4967

    
4968
  def Exec(self, feedback_fn):
4969
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4970

    
4971

    
4972
class LUNodeModifyStorage(NoHooksLU):
4973
  """Logical unit for modifying a storage volume on a node.
4974

4975
  """
4976
  REQ_BGL = False
4977

    
4978
  def CheckArguments(self):
4979
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4980

    
4981
    storage_type = self.op.storage_type
4982

    
4983
    try:
4984
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4985
    except KeyError:
4986
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4987
                                 " modified" % storage_type,
4988
                                 errors.ECODE_INVAL)
4989

    
4990
    diff = set(self.op.changes.keys()) - modifiable
4991
    if diff:
4992
      raise errors.OpPrereqError("The following fields can not be modified for"
4993
                                 " storage units of type '%s': %r" %
4994
                                 (storage_type, list(diff)),
4995
                                 errors.ECODE_INVAL)
4996

    
4997
  def ExpandNames(self):
4998
    self.needed_locks = {
4999
      locking.LEVEL_NODE: self.op.node_name,
5000
      }
5001

    
5002
  def Exec(self, feedback_fn):
5003
    """Computes the list of nodes and their attributes.
5004

5005
    """
5006
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5007
    result = self.rpc.call_storage_modify(self.op.node_name,
5008
                                          self.op.storage_type, st_args,
5009
                                          self.op.name, self.op.changes)
5010
    result.Raise("Failed to modify storage unit '%s' on %s" %
5011
                 (self.op.name, self.op.node_name))
5012

    
5013

    
5014
class LUNodeAdd(LogicalUnit):
5015
  """Logical unit for adding node to the cluster.
5016

5017
  """
5018
  HPATH = "node-add"
5019
  HTYPE = constants.HTYPE_NODE
5020
  _NFLAGS = ["master_capable", "vm_capable"]
5021

    
5022
  def CheckArguments(self):
5023
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5024
    # validate/normalize the node name
5025
    self.hostname = netutils.GetHostname(name=self.op.node_name,
5026
                                         family=self.primary_ip_family)
5027
    self.op.node_name = self.hostname.name
5028

    
5029
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5030
      raise errors.OpPrereqError("Cannot readd the master node",
5031
                                 errors.ECODE_STATE)
5032

    
5033
    if self.op.readd and self.op.group:
5034
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
5035
                                 " being readded", errors.ECODE_INVAL)
5036

    
5037
  def BuildHooksEnv(self):
5038
    """Build hooks env.
5039

5040
    This will run on all nodes before, and on all nodes + the new node after.
5041

5042
    """
5043
    return {
5044
      "OP_TARGET": self.op.node_name,
5045
      "NODE_NAME": self.op.node_name,
5046
      "NODE_PIP": self.op.primary_ip,
5047
      "NODE_SIP": self.op.secondary_ip,
5048
      "MASTER_CAPABLE": str(self.op.master_capable),
5049
      "VM_CAPABLE": str(self.op.vm_capable),
5050
      }
5051

    
5052
  def BuildHooksNodes(self):
5053
    """Build hooks nodes.
5054

5055
    """
5056
    # Exclude added node
5057
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5058
    post_nodes = pre_nodes + [self.op.node_name, ]
5059

    
5060
    return (pre_nodes, post_nodes)
5061

    
5062
  def CheckPrereq(self):
5063
    """Check prerequisites.
5064

5065
    This checks:
5066
     - the new node is not already in the config
5067
     - it is resolvable
5068
     - its parameters (single/dual homed) matches the cluster
5069

5070
    Any errors are signaled by raising errors.OpPrereqError.
5071

5072
    """
5073
    cfg = self.cfg
5074
    hostname = self.hostname
5075
    node = hostname.name
5076
    primary_ip = self.op.primary_ip = hostname.ip
5077
    if self.op.secondary_ip is None:
5078
      if self.primary_ip_family == netutils.IP6Address.family:
5079
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5080
                                   " IPv4 address must be given as secondary",
5081
                                   errors.ECODE_INVAL)
5082
      self.op.secondary_ip = primary_ip
5083

    
5084
    secondary_ip = self.op.secondary_ip
5085
    if not netutils.IP4Address.IsValid(secondary_ip):
5086
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5087
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5088

    
5089
    node_list = cfg.GetNodeList()
5090
    if not self.op.readd and node in node_list:
5091
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5092
                                 node, errors.ECODE_EXISTS)
5093
    elif self.op.readd and node not in node_list:
5094
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5095
                                 errors.ECODE_NOENT)
5096

    
5097
    self.changed_primary_ip = False
5098

    
5099
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5100
      if self.op.readd and node == existing_node_name:
5101
        if existing_node.secondary_ip != secondary_ip:
5102
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5103
                                     " address configuration as before",
5104
                                     errors.ECODE_INVAL)
5105
        if existing_node.primary_ip != primary_ip:
5106
          self.changed_primary_ip = True
5107

    
5108
        continue
5109

    
5110
      if (existing_node.primary_ip == primary_ip or
5111
          existing_node.secondary_ip == primary_ip or
5112
          existing_node.primary_ip == secondary_ip or
5113
          existing_node.secondary_ip == secondary_ip):
5114
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5115
                                   " existing node %s" % existing_node.name,
5116
                                   errors.ECODE_NOTUNIQUE)
5117

    
5118
    # After this 'if' block, None is no longer a valid value for the
5119
    # _capable op attributes
5120
    if self.op.readd:
5121
      old_node = self.cfg.GetNodeInfo(node)
5122
      assert old_node is not None, "Can't retrieve locked node %s" % node
5123
      for attr in self._NFLAGS:
5124
        if getattr(self.op, attr) is None:
5125
          setattr(self.op, attr, getattr(old_node, attr))
5126
    else:
5127
      for attr in self._NFLAGS:
5128
        if getattr(self.op, attr) is None:
5129
          setattr(self.op, attr, True)
5130

    
5131
    if self.op.readd and not self.op.vm_capable:
5132
      pri, sec = cfg.GetNodeInstances(node)
5133
      if pri or sec:
5134
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5135
                                   " flag set to false, but it already holds"
5136
                                   " instances" % node,
5137
                                   errors.ECODE_STATE)
5138

    
5139
    # check that the type of the node (single versus dual homed) is the
5140
    # same as for the master
5141
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5142
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5143
    newbie_singlehomed = secondary_ip == primary_ip
5144
    if master_singlehomed != newbie_singlehomed:
5145
      if master_singlehomed:
5146
        raise errors.OpPrereqError("The master has no secondary ip but the"
5147
                                   " new node has one",
5148
                                   errors.ECODE_INVAL)
5149
      else:
5150
        raise errors.OpPrereqError("The master has a secondary ip but the"
5151
                                   " new node doesn't have one",
5152
                                   errors.ECODE_INVAL)
5153

    
5154
    # checks reachability
5155
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5156
      raise errors.OpPrereqError("Node not reachable by ping",
5157
                                 errors.ECODE_ENVIRON)
5158

    
5159
    if not newbie_singlehomed:
5160
      # check reachability from my secondary ip to newbie's secondary ip
5161
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5162
                           source=myself.secondary_ip):
5163
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5164
                                   " based ping to node daemon port",
5165
                                   errors.ECODE_ENVIRON)
5166

    
5167
    if self.op.readd:
5168
      exceptions = [node]
5169
    else:
5170
      exceptions = []
5171

    
5172
    if self.op.master_capable:
5173
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5174
    else:
5175
      self.master_candidate = False
5176

    
5177
    if self.op.readd:
5178
      self.new_node = old_node
5179
    else:
5180
      node_group = cfg.LookupNodeGroup(self.op.group)
5181
      self.new_node = objects.Node(name=node,
5182
                                   primary_ip=primary_ip,
5183
                                   secondary_ip=secondary_ip,
5184
                                   master_candidate=self.master_candidate,
5185
                                   offline=False, drained=False,
5186
                                   group=node_group)
5187

    
5188
    if self.op.ndparams:
5189
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5190

    
5191
  def Exec(self, feedback_fn):
5192
    """Adds the new node to the cluster.
5193

5194
    """
5195
    new_node = self.new_node
5196
    node = new_node.name
5197

    
5198
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5199
      "Not owning BGL"
5200

    
5201
    # We adding a new node so we assume it's powered
5202
    new_node.powered = True
5203

    
5204
    # for re-adds, reset the offline/drained/master-candidate flags;
5205
    # we need to reset here, otherwise offline would prevent RPC calls
5206
    # later in the procedure; this also means that if the re-add
5207
    # fails, we are left with a non-offlined, broken node
5208
    if self.op.readd:
5209
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5210
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5211
      # if we demote the node, we do cleanup later in the procedure
5212
      new_node.master_candidate = self.master_candidate
5213
      if self.changed_primary_ip:
5214
        new_node.primary_ip = self.op.primary_ip
5215

    
5216
    # copy the master/vm_capable flags
5217
    for attr in self._NFLAGS:
5218
      setattr(new_node, attr, getattr(self.op, attr))
5219

    
5220
    # notify the user about any possible mc promotion
5221
    if new_node.master_candidate:
5222
      self.LogInfo("Node will be a master candidate")
5223

    
5224
    if self.op.ndparams:
5225
      new_node.ndparams = self.op.ndparams
5226
    else:
5227
      new_node.ndparams = {}
5228

    
5229
    # check connectivity
5230
    result = self.rpc.call_version([node])[node]
5231
    result.Raise("Can't get version information from node %s" % node)
5232
    if constants.PROTOCOL_VERSION == result.payload:
5233
      logging.info("Communication to node %s fine, sw version %s match",
5234
                   node, result.payload)
5235
    else:
5236
      raise errors.OpExecError("Version mismatch master version %s,"
5237
                               " node version %s" %
5238
                               (constants.PROTOCOL_VERSION, result.payload))
5239

    
5240
    # Add node to our /etc/hosts, and add key to known_hosts
5241
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5242
      master_node = self.cfg.GetMasterNode()
5243
      result = self.rpc.call_etc_hosts_modify(master_node,
5244
                                              constants.ETC_HOSTS_ADD,
5245
                                              self.hostname.name,
5246
                                              self.hostname.ip)
5247
      result.Raise("Can't update hosts file with new host data")
5248

    
5249
    if new_node.secondary_ip != new_node.primary_ip:
5250
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5251
                               False)
5252

    
5253
    node_verify_list = [self.cfg.GetMasterNode()]
5254
    node_verify_param = {
5255
      constants.NV_NODELIST: ([node], {}),
5256
      # TODO: do a node-net-test as well?
5257
    }
5258

    
5259
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5260
                                       self.cfg.GetClusterName())
5261
    for verifier in node_verify_list:
5262
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5263
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5264
      if nl_payload:
5265
        for failed in nl_payload:
5266
          feedback_fn("ssh/hostname verification failed"
5267
                      " (checking from %s): %s" %
5268
                      (verifier, nl_payload[failed]))
5269
        raise errors.OpExecError("ssh/hostname verification failed")
5270

    
5271
    if self.op.readd:
5272
      _RedistributeAncillaryFiles(self)
5273
      self.context.ReaddNode(new_node)
5274
      # make sure we redistribute the config
5275
      self.cfg.Update(new_node, feedback_fn)
5276
      # and make sure the new node will not have old files around
5277
      if not new_node.master_candidate:
5278
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5279
        msg = result.fail_msg
5280
        if msg:
5281
          self.LogWarning("Node failed to demote itself from master"
5282
                          " candidate status: %s" % msg)
5283
    else:
5284
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5285
                                  additional_vm=self.op.vm_capable)
5286
      self.context.AddNode(new_node, self.proc.GetECId())
5287

    
5288

    
5289
class LUNodeSetParams(LogicalUnit):
5290
  """Modifies the parameters of a node.
5291

5292
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5293
      to the node role (as _ROLE_*)
5294
  @cvar _R2F: a dictionary from node role to tuples of flags
5295
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5296

5297
  """
5298
  HPATH = "node-modify"
5299
  HTYPE = constants.HTYPE_NODE
5300
  REQ_BGL = False
5301
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5302
  _F2R = {
5303
    (True, False, False): _ROLE_CANDIDATE,
5304
    (False, True, False): _ROLE_DRAINED,
5305
    (False, False, True): _ROLE_OFFLINE,
5306
    (False, False, False): _ROLE_REGULAR,
5307
    }
5308
  _R2F = dict((v, k) for k, v in _F2R.items())
5309
  _FLAGS = ["master_candidate", "drained", "offline"]
5310

    
5311
  def CheckArguments(self):
5312
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5313
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5314
                self.op.master_capable, self.op.vm_capable,
5315
                self.op.secondary_ip, self.op.ndparams]
5316
    if all_mods.count(None) == len(all_mods):
5317
      raise errors.OpPrereqError("Please pass at least one modification",
5318
                                 errors.ECODE_INVAL)
5319
    if all_mods.count(True) > 1:
5320
      raise errors.OpPrereqError("Can't set the node into more than one"
5321
                                 " state at the same time",
5322
                                 errors.ECODE_INVAL)
5323

    
5324
    # Boolean value that tells us whether we might be demoting from MC
5325
    self.might_demote = (self.op.master_candidate == False or
5326
                         self.op.offline == True or
5327
                         self.op.drained == True or
5328
                         self.op.master_capable == False)
5329

    
5330
    if self.op.secondary_ip:
5331
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5332
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5333
                                   " address" % self.op.secondary_ip,
5334
                                   errors.ECODE_INVAL)
5335

    
5336
    self.lock_all = self.op.auto_promote and self.might_demote
5337
    self.lock_instances = self.op.secondary_ip is not None
5338

    
5339
  def _InstanceFilter(self, instance):
5340
    """Filter for getting affected instances.
5341

5342
    """
5343
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5344
            self.op.node_name in instance.all_nodes)
5345

    
5346
  def ExpandNames(self):
5347
    if self.lock_all:
5348
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5349
    else:
5350
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5351

    
5352
    # Since modifying a node can have severe effects on currently running
5353
    # operations the resource lock is at least acquired in shared mode
5354
    self.needed_locks[locking.LEVEL_NODE_RES] = \
5355
      self.needed_locks[locking.LEVEL_NODE]
5356

    
5357
    # Get node resource and instance locks in shared mode; they are not used
5358
    # for anything but read-only access
5359
    self.share_locks[locking.LEVEL_NODE_RES] = 1
5360
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5361

    
5362
    if self.lock_instances:
5363
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5364
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5365

    
5366
  def BuildHooksEnv(self):
5367
    """Build hooks env.
5368

5369
    This runs on the master node.
5370

5371
    """
5372
    return {
5373
      "OP_TARGET": self.op.node_name,
5374
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5375
      "OFFLINE": str(self.op.offline),
5376
      "DRAINED": str(self.op.drained),
5377
      "MASTER_CAPABLE": str(self.op.master_capable),
5378
      "VM_CAPABLE": str(self.op.vm_capable),
5379
      }
5380

    
5381
  def BuildHooksNodes(self):
5382
    """Build hooks nodes.
5383

5384
    """
5385
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5386
    return (nl, nl)
5387

    
5388
  def CheckPrereq(self):
5389
    """Check prerequisites.
5390

5391
    This only checks the instance list against the existing names.
5392

5393
    """
5394
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5395

    
5396
    if self.lock_instances:
5397
      affected_instances = \
5398
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5399

    
5400
      # Verify instance locks
5401
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5402
      wanted_instances = frozenset(affected_instances.keys())
5403
      if wanted_instances - owned_instances:
5404
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5405
                                   " secondary IP address have changed since"
5406
                                   " locks were acquired, wanted '%s', have"
5407
                                   " '%s'; retry the operation" %
5408
                                   (self.op.node_name,
5409
                                    utils.CommaJoin(wanted_instances),
5410
                                    utils.CommaJoin(owned_instances)),
5411
                                   errors.ECODE_STATE)
5412
    else:
5413
      affected_instances = None
5414

    
5415
    if (self.op.master_candidate is not None or
5416
        self.op.drained is not None or
5417
        self.op.offline is not None):
5418
      # we can't change the master's node flags
5419
      if self.op.node_name == self.cfg.GetMasterNode():
5420
        raise errors.OpPrereqError("The master role can be changed"
5421
                                   " only via master-failover",
5422
                                   errors.ECODE_INVAL)
5423

    
5424
    if self.op.master_candidate and not node.master_capable:
5425
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5426
                                 " it a master candidate" % node.name,
5427
                                 errors.ECODE_STATE)
5428

    
5429
    if self.op.vm_capable == False:
5430
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5431
      if ipri or isec:
5432
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5433
                                   " the vm_capable flag" % node.name,
5434
                                   errors.ECODE_STATE)
5435

    
5436
    if node.master_candidate and self.might_demote and not self.lock_all:
5437
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5438
      # check if after removing the current node, we're missing master
5439
      # candidates
5440
      (mc_remaining, mc_should, _) = \
5441
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5442
      if mc_remaining < mc_should:
5443
        raise errors.OpPrereqError("Not enough master candidates, please"
5444
                                   " pass auto promote option to allow"
5445
                                   " promotion", errors.ECODE_STATE)
5446

    
5447
    self.old_flags = old_flags = (node.master_candidate,
5448
                                  node.drained, node.offline)
5449
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5450
    self.old_role = old_role = self._F2R[old_flags]
5451

    
5452
    # Check for ineffective changes
5453
    for attr in self._FLAGS:
5454
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5455
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5456
        setattr(self.op, attr, None)
5457

    
5458
    # Past this point, any flag change to False means a transition
5459
    # away from the respective state, as only real changes are kept
5460

    
5461
    # TODO: We might query the real power state if it supports OOB
5462
    if _SupportsOob(self.cfg, node):
5463
      if self.op.offline is False and not (node.powered or
5464
                                           self.op.powered == True):
5465
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5466
                                    " offline status can be reset") %
5467
                                   self.op.node_name)
5468
    elif self.op.powered is not None:
5469
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5470
                                  " as it does not support out-of-band"
5471
                                  " handling") % self.op.node_name)
5472

    
5473
    # If we're being deofflined/drained, we'll MC ourself if needed
5474
    if (self.op.drained == False or self.op.offline == False or
5475
        (self.op.master_capable and not node.master_capable)):
5476
      if _DecideSelfPromotion(self):
5477
        self.op.master_candidate = True
5478
        self.LogInfo("Auto-promoting node to master candidate")
5479

    
5480
    # If we're no longer master capable, we'll demote ourselves from MC
5481
    if self.op.master_capable == False and node.master_candidate:
5482
      self.LogInfo("Demoting from master candidate")
5483
      self.op.master_candidate = False
5484

    
5485
    # Compute new role
5486
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5487
    if self.op.master_candidate:
5488
      new_role = self._ROLE_CANDIDATE
5489
    elif self.op.drained:
5490
      new_role = self._ROLE_DRAINED
5491
    elif self.op.offline:
5492
      new_role = self._ROLE_OFFLINE
5493
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5494
      # False is still in new flags, which means we're un-setting (the
5495
      # only) True flag
5496
      new_role = self._ROLE_REGULAR
5497
    else: # no new flags, nothing, keep old role
5498
      new_role = old_role
5499

    
5500
    self.new_role = new_role
5501

    
5502
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5503
      # Trying to transition out of offline status
5504
      # TODO: Use standard RPC runner, but make sure it works when the node is
5505
      # still marked offline
5506
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5507
      if result.fail_msg:
5508
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5509
                                   " to report its version: %s" %
5510
                                   (node.name, result.fail_msg),
5511
                                   errors.ECODE_STATE)
5512
      else:
5513
        self.LogWarning("Transitioning node from offline to online state"
5514
                        " without using re-add. Please make sure the node"
5515
                        " is healthy!")
5516

    
5517
    if self.op.secondary_ip:
5518
      # Ok even without locking, because this can't be changed by any LU
5519
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5520
      master_singlehomed = master.secondary_ip == master.primary_ip
5521
      if master_singlehomed and self.op.secondary_ip:
5522
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5523
                                   " homed cluster", errors.ECODE_INVAL)
5524

    
5525
      assert not (frozenset(affected_instances) -
5526
                  self.owned_locks(locking.LEVEL_INSTANCE))
5527

    
5528
      if node.offline:
5529
        if affected_instances:
5530
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5531
                                     " offline node has instances (%s)"
5532
                                     " configured to use it" %
5533
                                     utils.CommaJoin(affected_instances.keys()))
5534
      else:
5535
        # On online nodes, check that no instances are running, and that
5536
        # the node has the new ip and we can reach it.
5537
        for instance in affected_instances.values():
5538
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
5539
                              msg="cannot change secondary ip")
5540

    
5541
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5542
        if master.name != node.name:
5543
          # check reachability from master secondary ip to new secondary ip
5544
          if not netutils.TcpPing(self.op.secondary_ip,
5545
                                  constants.DEFAULT_NODED_PORT,
5546
                                  source=master.secondary_ip):
5547
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5548
                                       " based ping to node daemon port",
5549
                                       errors.ECODE_ENVIRON)
5550

    
5551
    if self.op.ndparams:
5552
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5553
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5554
      self.new_ndparams = new_ndparams
5555

    
5556
  def Exec(self, feedback_fn):
5557
    """Modifies a node.
5558

5559
    """
5560
    node = self.node
5561
    old_role = self.old_role
5562
    new_role = self.new_role
5563

    
5564
    result = []
5565

    
5566
    if self.op.ndparams:
5567
      node.ndparams = self.new_ndparams
5568

    
5569
    if self.op.powered is not None:
5570
      node.powered = self.op.powered
5571

    
5572
    for attr in ["master_capable", "vm_capable"]:
5573
      val = getattr(self.op, attr)
5574
      if val is not None:
5575
        setattr(node, attr, val)
5576
        result.append((attr, str(val)))
5577

    
5578
    if new_role != old_role:
5579
      # Tell the node to demote itself, if no longer MC and not offline
5580
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5581
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5582
        if msg:
5583
          self.LogWarning("Node failed to demote itself: %s", msg)
5584

    
5585
      new_flags = self._R2F[new_role]
5586
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5587
        if of != nf:
5588
          result.append((desc, str(nf)))
5589
      (node.master_candidate, node.drained, node.offline) = new_flags
5590

    
5591
      # we locked all nodes, we adjust the CP before updating this node
5592
      if self.lock_all:
5593
        _AdjustCandidatePool(self, [node.name])
5594

    
5595
    if self.op.secondary_ip:
5596
      node.secondary_ip = self.op.secondary_ip
5597
      result.append(("secondary_ip", self.op.secondary_ip))
5598

    
5599
    # this will trigger configuration file update, if needed
5600
    self.cfg.Update(node, feedback_fn)
5601

    
5602
    # this will trigger job queue propagation or cleanup if the mc
5603
    # flag changed
5604
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5605
      self.context.ReaddNode(node)
5606

    
5607
    return result
5608

    
5609

    
5610
class LUNodePowercycle(NoHooksLU):
5611
  """Powercycles a node.
5612

5613
  """
5614
  REQ_BGL = False
5615

    
5616
  def CheckArguments(self):
5617
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5618
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5619
      raise errors.OpPrereqError("The node is the master and the force"
5620
                                 " parameter was not set",
5621
                                 errors.ECODE_INVAL)
5622

    
5623
  def ExpandNames(self):
5624
    """Locking for PowercycleNode.
5625

5626
    This is a last-resort option and shouldn't block on other
5627
    jobs. Therefore, we grab no locks.
5628

5629
    """
5630
    self.needed_locks = {}
5631

    
5632
  def Exec(self, feedback_fn):
5633
    """Reboots a node.
5634

5635
    """
5636
    result = self.rpc.call_node_powercycle(self.op.node_name,
5637
                                           self.cfg.GetHypervisorType())
5638
    result.Raise("Failed to schedule the reboot")
5639
    return result.payload
5640

    
5641

    
5642
class LUClusterQuery(NoHooksLU):
5643
  """Query cluster configuration.
5644

5645
  """
5646
  REQ_BGL = False
5647

    
5648
  def ExpandNames(self):
5649
    self.needed_locks = {}
5650

    
5651
  def Exec(self, feedback_fn):
5652
    """Return cluster config.
5653

5654
    """
5655
    cluster = self.cfg.GetClusterInfo()
5656
    os_hvp = {}
5657

    
5658
    # Filter just for enabled hypervisors
5659
    for os_name, hv_dict in cluster.os_hvp.items():
5660
      os_hvp[os_name] = {}
5661
      for hv_name, hv_params in hv_dict.items():
5662
        if hv_name in cluster.enabled_hypervisors:
5663
          os_hvp[os_name][hv_name] = hv_params
5664

    
5665
    # Convert ip_family to ip_version
5666
    primary_ip_version = constants.IP4_VERSION
5667
    if cluster.primary_ip_family == netutils.IP6Address.family:
5668
      primary_ip_version = constants.IP6_VERSION
5669

    
5670
    result = {
5671
      "software_version": constants.RELEASE_VERSION,
5672
      "protocol_version": constants.PROTOCOL_VERSION,
5673
      "config_version": constants.CONFIG_VERSION,
5674
      "os_api_version": max(constants.OS_API_VERSIONS),
5675
      "export_version": constants.EXPORT_VERSION,
5676
      "architecture": (platform.architecture()[0], platform.machine()),
5677
      "name": cluster.cluster_name,
5678
      "master": cluster.master_node,
5679
      "default_hypervisor": cluster.enabled_hypervisors[0],
5680
      "enabled_hypervisors": cluster.enabled_hypervisors,
5681
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5682
                        for hypervisor_name in cluster.enabled_hypervisors]),
5683
      "os_hvp": os_hvp,
5684
      "beparams": cluster.beparams,
5685
      "osparams": cluster.osparams,
5686
      "nicparams": cluster.nicparams,
5687
      "ndparams": cluster.ndparams,
5688
      "candidate_pool_size": cluster.candidate_pool_size,
5689
      "master_netdev": cluster.master_netdev,
5690
      "master_netmask": cluster.master_netmask,
5691
      "use_external_mip_script": cluster.use_external_mip_script,
5692
      "volume_group_name": cluster.volume_group_name,
5693
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5694
      "file_storage_dir": cluster.file_storage_dir,
5695
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5696
      "maintain_node_health": cluster.maintain_node_health,
5697
      "ctime": cluster.ctime,
5698
      "mtime": cluster.mtime,
5699
      "uuid": cluster.uuid,
5700
      "tags": list(cluster.GetTags()),
5701
      "uid_pool": cluster.uid_pool,
5702
      "default_iallocator": cluster.default_iallocator,
5703
      "reserved_lvs": cluster.reserved_lvs,
5704
      "primary_ip_version": primary_ip_version,
5705
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5706
      "hidden_os": cluster.hidden_os,
5707
      "blacklisted_os": cluster.blacklisted_os,
5708
      }
5709

    
5710
    return result
5711

    
5712

    
5713
class LUClusterConfigQuery(NoHooksLU):
5714
  """Return configuration values.
5715

5716
  """
5717
  REQ_BGL = False
5718
  _FIELDS_DYNAMIC = utils.FieldSet()
5719
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5720
                                  "watcher_pause", "volume_group_name")
5721

    
5722
  def CheckArguments(self):
5723
    _CheckOutputFields(static=self._FIELDS_STATIC,
5724
                       dynamic=self._FIELDS_DYNAMIC,
5725
                       selected=self.op.output_fields)
5726

    
5727
  def ExpandNames(self):
5728
    self.needed_locks = {}
5729

    
5730
  def Exec(self, feedback_fn):
5731
    """Dump a representation of the cluster config to the standard output.
5732

5733
    """
5734
    values = []
5735
    for field in self.op.output_fields:
5736
      if field == "cluster_name":
5737
        entry = self.cfg.GetClusterName()
5738
      elif field == "master_node":
5739
        entry = self.cfg.GetMasterNode()
5740
      elif field == "drain_flag":
5741
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5742
      elif field == "watcher_pause":
5743
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5744
      elif field == "volume_group_name":
5745
        entry = self.cfg.GetVGName()
5746
      else:
5747
        raise errors.ParameterError(field)
5748
      values.append(entry)
5749
    return values
5750

    
5751

    
5752
class LUInstanceActivateDisks(NoHooksLU):
5753
  """Bring up an instance's disks.
5754

5755
  """
5756
  REQ_BGL = False
5757

    
5758
  def ExpandNames(self):
5759
    self._ExpandAndLockInstance()
5760
    self.needed_locks[locking.LEVEL_NODE] = []
5761
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5762

    
5763
  def DeclareLocks(self, level):
5764
    if level == locking.LEVEL_NODE:
5765
      self._LockInstancesNodes()
5766

    
5767
  def CheckPrereq(self):
5768
    """Check prerequisites.
5769

5770
    This checks that the instance is in the cluster.
5771

5772
    """
5773
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5774
    assert self.instance is not None, \
5775
      "Cannot retrieve locked instance %s" % self.op.instance_name
5776
    _CheckNodeOnline(self, self.instance.primary_node)
5777

    
5778
  def Exec(self, feedback_fn):
5779
    """Activate the disks.
5780

5781
    """
5782
    disks_ok, disks_info = \
5783
              _AssembleInstanceDisks(self, self.instance,
5784
                                     ignore_size=self.op.ignore_size)
5785
    if not disks_ok:
5786
      raise errors.OpExecError("Cannot activate block devices")
5787

    
5788
    return disks_info
5789

    
5790

    
5791
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5792
                           ignore_size=False):
5793
  """Prepare the block devices for an instance.
5794

5795
  This sets up the block devices on all nodes.
5796

5797
  @type lu: L{LogicalUnit}
5798
  @param lu: the logical unit on whose behalf we execute
5799
  @type instance: L{objects.Instance}
5800
  @param instance: the instance for whose disks we assemble
5801
  @type disks: list of L{objects.Disk} or None
5802
  @param disks: which disks to assemble (or all, if None)
5803
  @type ignore_secondaries: boolean
5804
  @param ignore_secondaries: if true, errors on secondary nodes
5805
      won't result in an error return from the function
5806
  @type ignore_size: boolean
5807
  @param ignore_size: if true, the current known size of the disk
5808
      will not be used during the disk activation, useful for cases
5809
      when the size is wrong
5810
  @return: False if the operation failed, otherwise a list of
5811
      (host, instance_visible_name, node_visible_name)
5812
      with the mapping from node devices to instance devices
5813

5814
  """
5815
  device_info = []
5816
  disks_ok = True
5817
  iname = instance.name
5818
  disks = _ExpandCheckDisks(instance, disks)
5819

    
5820
  # With the two passes mechanism we try to reduce the window of
5821
  # opportunity for the race condition of switching DRBD to primary
5822
  # before handshaking occured, but we do not eliminate it
5823

    
5824
  # The proper fix would be to wait (with some limits) until the
5825
  # connection has been made and drbd transitions from WFConnection
5826
  # into any other network-connected state (Connected, SyncTarget,
5827
  # SyncSource, etc.)
5828

    
5829
  # 1st pass, assemble on all nodes in secondary mode
5830
  for idx, inst_disk in enumerate(disks):
5831
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5832
      if ignore_size:
5833
        node_disk = node_disk.Copy()
5834
        node_disk.UnsetSize()
5835
      lu.cfg.SetDiskID(node_disk, node)
5836
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5837
      msg = result.fail_msg
5838
      if msg:
5839
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5840
                           " (is_primary=False, pass=1): %s",
5841
                           inst_disk.iv_name, node, msg)
5842
        if not ignore_secondaries:
5843
          disks_ok = False
5844

    
5845
  # FIXME: race condition on drbd migration to primary
5846

    
5847
  # 2nd pass, do only the primary node
5848
  for idx, inst_disk in enumerate(disks):
5849
    dev_path = None
5850

    
5851
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5852
      if node != instance.primary_node:
5853
        continue
5854
      if ignore_size:
5855
        node_disk = node_disk.Copy()
5856
        node_disk.UnsetSize()
5857
      lu.cfg.SetDiskID(node_disk, node)
5858
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5859
      msg = result.fail_msg
5860
      if msg:
5861
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5862
                           " (is_primary=True, pass=2): %s",
5863
                           inst_disk.iv_name, node, msg)
5864
        disks_ok = False
5865
      else:
5866
        dev_path = result.payload
5867

    
5868
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5869

    
5870
  # leave the disks configured for the primary node
5871
  # this is a workaround that would be fixed better by
5872
  # improving the logical/physical id handling
5873
  for disk in disks:
5874
    lu.cfg.SetDiskID(disk, instance.primary_node)
5875

    
5876
  return disks_ok, device_info
5877

    
5878

    
5879
def _StartInstanceDisks(lu, instance, force):
5880
  """Start the disks of an instance.
5881

5882
  """
5883
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5884
                                           ignore_secondaries=force)
5885
  if not disks_ok:
5886
    _ShutdownInstanceDisks(lu, instance)
5887
    if force is not None and not force:
5888
      lu.proc.LogWarning("", hint="If the message above refers to a"
5889
                         " secondary node,"
5890
                         " you can retry the operation using '--force'.")
5891
    raise errors.OpExecError("Disk consistency error")
5892

    
5893

    
5894
class LUInstanceDeactivateDisks(NoHooksLU):
5895
  """Shutdown an instance's disks.
5896

5897
  """
5898
  REQ_BGL = False
5899

    
5900
  def ExpandNames(self):
5901
    self._ExpandAndLockInstance()
5902
    self.needed_locks[locking.LEVEL_NODE] = []
5903
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5904

    
5905
  def DeclareLocks(self, level):
5906
    if level == locking.LEVEL_NODE:
5907
      self._LockInstancesNodes()
5908

    
5909
  def CheckPrereq(self):
5910
    """Check prerequisites.
5911

5912
    This checks that the instance is in the cluster.
5913

5914
    """
5915
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5916
    assert self.instance is not None, \
5917
      "Cannot retrieve locked instance %s" % self.op.instance_name
5918

    
5919
  def Exec(self, feedback_fn):
5920
    """Deactivate the disks
5921

5922
    """
5923
    instance = self.instance
5924
    if self.op.force:
5925
      _ShutdownInstanceDisks(self, instance)
5926
    else:
5927
      _SafeShutdownInstanceDisks(self, instance)
5928

    
5929

    
5930
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5931
  """Shutdown block devices of an instance.
5932

5933
  This function checks if an instance is running, before calling
5934
  _ShutdownInstanceDisks.
5935

5936
  """
5937
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5938
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5939

    
5940

    
5941
def _ExpandCheckDisks(instance, disks):
5942
  """Return the instance disks selected by the disks list
5943

5944
  @type disks: list of L{objects.Disk} or None
5945
  @param disks: selected disks
5946
  @rtype: list of L{objects.Disk}
5947
  @return: selected instance disks to act on
5948

5949
  """
5950
  if disks is None:
5951
    return instance.disks
5952
  else:
5953
    if not set(disks).issubset(instance.disks):
5954
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5955
                                   " target instance")
5956
    return disks
5957

    
5958

    
5959
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5960
  """Shutdown block devices of an instance.
5961

5962
  This does the shutdown on all nodes of the instance.
5963

5964
  If the ignore_primary is false, errors on the primary node are
5965
  ignored.
5966

5967
  """
5968
  all_result = True
5969
  disks = _ExpandCheckDisks(instance, disks)
5970

    
5971
  for disk in disks:
5972
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5973
      lu.cfg.SetDiskID(top_disk, node)
5974
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5975
      msg = result.fail_msg
5976
      if msg:
5977
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5978
                      disk.iv_name, node, msg)
5979
        if ((node == instance.primary_node and not ignore_primary) or
5980
            (node != instance.primary_node and not result.offline)):
5981
          all_result = False
5982
  return all_result
5983

    
5984

    
5985
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5986
  """Checks if a node has enough free memory.
5987

5988
  This function check if a given node has the needed amount of free
5989
  memory. In case the node has less memory or we cannot get the
5990
  information from the node, this function raise an OpPrereqError
5991
  exception.
5992

5993
  @type lu: C{LogicalUnit}
5994
  @param lu: a logical unit from which we get configuration data
5995
  @type node: C{str}
5996
  @param node: the node to check
5997
  @type reason: C{str}
5998
  @param reason: string to use in the error message
5999
  @type requested: C{int}
6000
  @param requested: the amount of memory in MiB to check for
6001
  @type hypervisor_name: C{str}
6002
  @param hypervisor_name: the hypervisor to ask for memory stats
6003
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6004
      we cannot check the node
6005

6006
  """
6007
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
6008
  nodeinfo[node].Raise("Can't get data from node %s" % node,
6009
                       prereq=True, ecode=errors.ECODE_ENVIRON)
6010
  free_mem = nodeinfo[node].payload.get("memory_free", None)
6011
  if not isinstance(free_mem, int):
6012
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6013
                               " was '%s'" % (node, free_mem),
6014
                               errors.ECODE_ENVIRON)
6015
  if requested > free_mem:
6016
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6017
                               " needed %s MiB, available %s MiB" %
6018
                               (node, reason, requested, free_mem),
6019
                               errors.ECODE_NORES)
6020

    
6021

    
6022
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6023
  """Checks if nodes have enough free disk space in the all VGs.
6024

6025
  This function check if all given nodes have the needed amount of
6026
  free disk. In case any node has less disk or we cannot get the
6027
  information from the node, this function raise an OpPrereqError
6028
  exception.
6029

6030
  @type lu: C{LogicalUnit}
6031
  @param lu: a logical unit from which we get configuration data
6032
  @type nodenames: C{list}
6033
  @param nodenames: the list of node names to check
6034
  @type req_sizes: C{dict}
6035
  @param req_sizes: the hash of vg and corresponding amount of disk in
6036
      MiB to check for
6037
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6038
      or we cannot check the node
6039

6040
  """
6041
  for vg, req_size in req_sizes.items():
6042
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6043

    
6044

    
6045
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6046
  """Checks if nodes have enough free disk space in the specified VG.
6047

6048
  This function check if all given nodes have the needed amount of
6049
  free disk. In case any node has less disk or we cannot get the
6050
  information from the node, this function raise an OpPrereqError
6051
  exception.
6052

6053
  @type lu: C{LogicalUnit}
6054
  @param lu: a logical unit from which we get configuration data
6055
  @type nodenames: C{list}
6056
  @param nodenames: the list of node names to check
6057
  @type vg: C{str}
6058
  @param vg: the volume group to check
6059
  @type requested: C{int}
6060
  @param requested: the amount of disk in MiB to check for
6061
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6062
      or we cannot check the node
6063

6064
  """
6065
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6066
  for node in nodenames:
6067
    info = nodeinfo[node]
6068
    info.Raise("Cannot get current information from node %s" % node,
6069
               prereq=True, ecode=errors.ECODE_ENVIRON)
6070
    vg_free = info.payload.get("vg_free", None)
6071
    if not isinstance(vg_free, int):
6072
      raise errors.OpPrereqError("Can't compute free disk space on node"
6073
                                 " %s for vg %s, result was '%s'" %
6074
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6075
    if requested > vg_free:
6076
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6077
                                 " vg %s: required %d MiB, available %d MiB" %
6078
                                 (node, vg, requested, vg_free),
6079
                                 errors.ECODE_NORES)
6080

    
6081

    
6082
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6083
  """Checks if nodes have enough physical CPUs
6084

6085
  This function checks if all given nodes have the needed number of
6086
  physical CPUs. In case any node has less CPUs or we cannot get the
6087
  information from the node, this function raises an OpPrereqError
6088
  exception.
6089

6090
  @type lu: C{LogicalUnit}
6091
  @param lu: a logical unit from which we get configuration data
6092
  @type nodenames: C{list}
6093
  @param nodenames: the list of node names to check
6094
  @type requested: C{int}
6095
  @param requested: the minimum acceptable number of physical CPUs
6096
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6097
      or we cannot check the node
6098

6099
  """
6100
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6101
  for node in nodenames:
6102
    info = nodeinfo[node]
6103
    info.Raise("Cannot get current information from node %s" % node,
6104
               prereq=True, ecode=errors.ECODE_ENVIRON)
6105
    num_cpus = info.payload.get("cpu_total", None)
6106
    if not isinstance(num_cpus, int):
6107
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6108
                                 " on node %s, result was '%s'" %
6109
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6110
    if requested > num_cpus:
6111
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6112
                                 "required" % (node, num_cpus, requested),
6113
                                 errors.ECODE_NORES)
6114

    
6115

    
6116
class LUInstanceStartup(LogicalUnit):
6117
  """Starts an instance.
6118

6119
  """
6120
  HPATH = "instance-start"
6121
  HTYPE = constants.HTYPE_INSTANCE
6122
  REQ_BGL = False
6123

    
6124
  def CheckArguments(self):
6125
    # extra beparams
6126
    if self.op.beparams:
6127
      # fill the beparams dict
6128
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6129

    
6130
  def ExpandNames(self):
6131
    self._ExpandAndLockInstance()
6132

    
6133
  def BuildHooksEnv(self):
6134
    """Build hooks env.
6135

6136
    This runs on master, primary and secondary nodes of the instance.
6137

6138
    """
6139
    env = {
6140
      "FORCE": self.op.force,
6141
      }
6142

    
6143
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6144

    
6145
    return env
6146

    
6147
  def BuildHooksNodes(self):
6148
    """Build hooks nodes.
6149

6150
    """
6151
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6152
    return (nl, nl)
6153

    
6154
  def CheckPrereq(self):
6155
    """Check prerequisites.
6156

6157
    This checks that the instance is in the cluster.
6158

6159
    """
6160
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6161
    assert self.instance is not None, \
6162
      "Cannot retrieve locked instance %s" % self.op.instance_name
6163

    
6164
    # extra hvparams
6165
    if self.op.hvparams:
6166
      # check hypervisor parameter syntax (locally)
6167
      cluster = self.cfg.GetClusterInfo()
6168
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6169
      filled_hvp = cluster.FillHV(instance)
6170
      filled_hvp.update(self.op.hvparams)
6171
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6172
      hv_type.CheckParameterSyntax(filled_hvp)
6173
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6174

    
6175
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6176

    
6177
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6178

    
6179
    if self.primary_offline and self.op.ignore_offline_nodes:
6180
      self.proc.LogWarning("Ignoring offline primary node")
6181

    
6182
      if self.op.hvparams or self.op.beparams:
6183
        self.proc.LogWarning("Overridden parameters are ignored")
6184
    else:
6185
      _CheckNodeOnline(self, instance.primary_node)
6186

    
6187
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6188

    
6189
      # check bridges existence
6190
      _CheckInstanceBridgesExist(self, instance)
6191

    
6192
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6193
                                                instance.name,
6194
                                                instance.hypervisor)
6195
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6196
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6197
      if not remote_info.payload: # not running already
6198
        _CheckNodeFreeMemory(self, instance.primary_node,
6199
                             "starting instance %s" % instance.name,
6200
                             bep[constants.BE_MEMORY], instance.hypervisor)
6201

    
6202
  def Exec(self, feedback_fn):
6203
    """Start the instance.
6204

6205
    """
6206
    instance = self.instance
6207
    force = self.op.force
6208

    
6209
    if not self.op.no_remember:
6210
      self.cfg.MarkInstanceUp(instance.name)
6211

    
6212
    if self.primary_offline:
6213
      assert self.op.ignore_offline_nodes
6214
      self.proc.LogInfo("Primary node offline, marked instance as started")
6215
    else:
6216
      node_current = instance.primary_node
6217

    
6218
      _StartInstanceDisks(self, instance, force)
6219

    
6220
      result = \
6221
        self.rpc.call_instance_start(node_current,
6222
                                     (instance, self.op.hvparams,
6223
                                      self.op.beparams),
6224
                                     self.op.startup_paused)
6225
      msg = result.fail_msg
6226
      if msg:
6227
        _ShutdownInstanceDisks(self, instance)
6228
        raise errors.OpExecError("Could not start instance: %s" % msg)
6229

    
6230

    
6231
class LUInstanceReboot(LogicalUnit):
6232
  """Reboot an instance.
6233

6234
  """
6235
  HPATH = "instance-reboot"
6236
  HTYPE = constants.HTYPE_INSTANCE
6237
  REQ_BGL = False
6238

    
6239
  def ExpandNames(self):
6240
    self._ExpandAndLockInstance()
6241

    
6242
  def BuildHooksEnv(self):
6243
    """Build hooks env.
6244

6245
    This runs on master, primary and secondary nodes of the instance.
6246

6247
    """
6248
    env = {
6249
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6250
      "REBOOT_TYPE": self.op.reboot_type,
6251
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6252
      }
6253

    
6254
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6255

    
6256
    return env
6257

    
6258
  def BuildHooksNodes(self):
6259
    """Build hooks nodes.
6260

6261
    """
6262
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6263
    return (nl, nl)
6264

    
6265
  def CheckPrereq(self):
6266
    """Check prerequisites.
6267

6268
    This checks that the instance is in the cluster.
6269

6270
    """
6271
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6272
    assert self.instance is not None, \
6273
      "Cannot retrieve locked instance %s" % self.op.instance_name
6274
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6275
    _CheckNodeOnline(self, instance.primary_node)
6276

    
6277
    # check bridges existence
6278
    _CheckInstanceBridgesExist(self, instance)
6279

    
6280
  def Exec(self, feedback_fn):
6281
    """Reboot the instance.
6282

6283
    """
6284
    instance = self.instance
6285
    ignore_secondaries = self.op.ignore_secondaries
6286
    reboot_type = self.op.reboot_type
6287

    
6288
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6289
                                              instance.name,
6290
                                              instance.hypervisor)
6291
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6292
    instance_running = bool(remote_info.payload)
6293

    
6294
    node_current = instance.primary_node
6295

    
6296
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6297
                                            constants.INSTANCE_REBOOT_HARD]:
6298
      for disk in instance.disks:
6299
        self.cfg.SetDiskID(disk, node_current)
6300
      result = self.rpc.call_instance_reboot(node_current, instance,
6301
                                             reboot_type,
6302
                                             self.op.shutdown_timeout)
6303
      result.Raise("Could not reboot instance")
6304
    else:
6305
      if instance_running:
6306
        result = self.rpc.call_instance_shutdown(node_current, instance,
6307
                                                 self.op.shutdown_timeout)
6308
        result.Raise("Could not shutdown instance for full reboot")
6309
        _ShutdownInstanceDisks(self, instance)
6310
      else:
6311
        self.LogInfo("Instance %s was already stopped, starting now",
6312
                     instance.name)
6313
      _StartInstanceDisks(self, instance, ignore_secondaries)
6314
      result = self.rpc.call_instance_start(node_current,
6315
                                            (instance, None, None), False)
6316
      msg = result.fail_msg
6317
      if msg:
6318
        _ShutdownInstanceDisks(self, instance)
6319
        raise errors.OpExecError("Could not start instance for"
6320
                                 " full reboot: %s" % msg)
6321

    
6322
    self.cfg.MarkInstanceUp(instance.name)
6323

    
6324

    
6325
class LUInstanceShutdown(LogicalUnit):
6326
  """Shutdown an instance.
6327

6328
  """
6329
  HPATH = "instance-stop"
6330
  HTYPE = constants.HTYPE_INSTANCE
6331
  REQ_BGL = False
6332

    
6333
  def ExpandNames(self):
6334
    self._ExpandAndLockInstance()
6335

    
6336
  def BuildHooksEnv(self):
6337
    """Build hooks env.
6338

6339
    This runs on master, primary and secondary nodes of the instance.
6340

6341
    """
6342
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6343
    env["TIMEOUT"] = self.op.timeout
6344
    return env
6345

    
6346
  def BuildHooksNodes(self):
6347
    """Build hooks nodes.
6348

6349
    """
6350
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6351
    return (nl, nl)
6352

    
6353
  def CheckPrereq(self):
6354
    """Check prerequisites.
6355

6356
    This checks that the instance is in the cluster.
6357

6358
    """
6359
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6360
    assert self.instance is not None, \
6361
      "Cannot retrieve locked instance %s" % self.op.instance_name
6362

    
6363
    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6364

    
6365
    self.primary_offline = \
6366
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6367

    
6368
    if self.primary_offline and self.op.ignore_offline_nodes:
6369
      self.proc.LogWarning("Ignoring offline primary node")
6370
    else:
6371
      _CheckNodeOnline(self, self.instance.primary_node)
6372

    
6373
  def Exec(self, feedback_fn):
6374
    """Shutdown the instance.
6375

6376
    """
6377
    instance = self.instance
6378
    node_current = instance.primary_node
6379
    timeout = self.op.timeout
6380

    
6381
    if not self.op.no_remember:
6382
      self.cfg.MarkInstanceDown(instance.name)
6383

    
6384
    if self.primary_offline:
6385
      assert self.op.ignore_offline_nodes
6386
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6387
    else:
6388
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6389
      msg = result.fail_msg
6390
      if msg:
6391
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6392

    
6393
      _ShutdownInstanceDisks(self, instance)
6394

    
6395

    
6396
class LUInstanceReinstall(LogicalUnit):
6397
  """Reinstall an instance.
6398

6399
  """
6400
  HPATH = "instance-reinstall"
6401
  HTYPE = constants.HTYPE_INSTANCE
6402
  REQ_BGL = False
6403

    
6404
  def ExpandNames(self):
6405
    self._ExpandAndLockInstance()
6406

    
6407
  def BuildHooksEnv(self):
6408
    """Build hooks env.
6409

6410
    This runs on master, primary and secondary nodes of the instance.
6411

6412
    """
6413
    return _BuildInstanceHookEnvByObject(self, self.instance)
6414

    
6415
  def BuildHooksNodes(self):
6416
    """Build hooks nodes.
6417

6418
    """
6419
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6420
    return (nl, nl)
6421

    
6422
  def CheckPrereq(self):
6423
    """Check prerequisites.
6424

6425
    This checks that the instance is in the cluster and is not running.
6426

6427
    """
6428
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6429
    assert instance is not None, \
6430
      "Cannot retrieve locked instance %s" % self.op.instance_name
6431
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6432
                     " offline, cannot reinstall")
6433
    for node in instance.secondary_nodes:
6434
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6435
                       " cannot reinstall")
6436

    
6437
    if instance.disk_template == constants.DT_DISKLESS:
6438
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6439
                                 self.op.instance_name,
6440
                                 errors.ECODE_INVAL)
6441
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6442

    
6443
    if self.op.os_type is not None:
6444
      # OS verification
6445
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6446
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6447
      instance_os = self.op.os_type
6448
    else:
6449
      instance_os = instance.os
6450

    
6451
    nodelist = list(instance.all_nodes)
6452

    
6453
    if self.op.osparams:
6454
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6455
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6456
      self.os_inst = i_osdict # the new dict (without defaults)
6457
    else:
6458
      self.os_inst = None
6459

    
6460
    self.instance = instance
6461

    
6462
  def Exec(self, feedback_fn):
6463
    """Reinstall the instance.
6464

6465
    """
6466
    inst = self.instance
6467

    
6468
    if self.op.os_type is not None:
6469
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6470
      inst.os = self.op.os_type
6471
      # Write to configuration
6472
      self.cfg.Update(inst, feedback_fn)
6473

    
6474
    _StartInstanceDisks(self, inst, None)
6475
    try:
6476
      feedback_fn("Running the instance OS create scripts...")
6477
      # FIXME: pass debug option from opcode to backend
6478
      result = self.rpc.call_instance_os_add(inst.primary_node,
6479
                                             (inst, self.os_inst), True,
6480
                                             self.op.debug_level)
6481
      result.Raise("Could not install OS for instance %s on node %s" %
6482
                   (inst.name, inst.primary_node))
6483
    finally:
6484
      _ShutdownInstanceDisks(self, inst)
6485

    
6486

    
6487
class LUInstanceRecreateDisks(LogicalUnit):
6488
  """Recreate an instance's missing disks.
6489

6490
  """
6491
  HPATH = "instance-recreate-disks"
6492
  HTYPE = constants.HTYPE_INSTANCE
6493
  REQ_BGL = False
6494

    
6495
  def CheckArguments(self):
6496
    # normalise the disk list
6497
    self.op.disks = sorted(frozenset(self.op.disks))
6498

    
6499
  def ExpandNames(self):
6500
    self._ExpandAndLockInstance()
6501
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6502
    if self.op.nodes:
6503
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6504
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6505
    else:
6506
      self.needed_locks[locking.LEVEL_NODE] = []
6507

    
6508
  def DeclareLocks(self, level):
6509
    if level == locking.LEVEL_NODE:
6510
      # if we replace the nodes, we only need to lock the old primary,
6511
      # otherwise we need to lock all nodes for disk re-creation
6512
      primary_only = bool(self.op.nodes)
6513
      self._LockInstancesNodes(primary_only=primary_only)
6514
    elif level == locking.LEVEL_NODE_RES:
6515
      # Copy node locks
6516
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6517
        self.needed_locks[locking.LEVEL_NODE][:]
6518

    
6519
  def BuildHooksEnv(self):
6520
    """Build hooks env.
6521

6522
    This runs on master, primary and secondary nodes of the instance.
6523

6524
    """
6525
    return _BuildInstanceHookEnvByObject(self, self.instance)
6526

    
6527
  def BuildHooksNodes(self):
6528
    """Build hooks nodes.
6529

6530
    """
6531
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6532
    return (nl, nl)
6533

    
6534
  def CheckPrereq(self):
6535
    """Check prerequisites.
6536

6537
    This checks that the instance is in the cluster and is not running.
6538

6539
    """
6540
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6541
    assert instance is not None, \
6542
      "Cannot retrieve locked instance %s" % self.op.instance_name
6543
    if self.op.nodes:
6544
      if len(self.op.nodes) != len(instance.all_nodes):
6545
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6546
                                   " %d replacement nodes were specified" %
6547
                                   (instance.name, len(instance.all_nodes),
6548
                                    len(self.op.nodes)),
6549
                                   errors.ECODE_INVAL)
6550
      assert instance.disk_template != constants.DT_DRBD8 or \
6551
          len(self.op.nodes) == 2
6552
      assert instance.disk_template != constants.DT_PLAIN or \
6553
          len(self.op.nodes) == 1
6554
      primary_node = self.op.nodes[0]
6555
    else:
6556
      primary_node = instance.primary_node
6557
    _CheckNodeOnline(self, primary_node)
6558

    
6559
    if instance.disk_template == constants.DT_DISKLESS:
6560
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6561
                                 self.op.instance_name, errors.ECODE_INVAL)
6562
    # if we replace nodes *and* the old primary is offline, we don't
6563
    # check
6564
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6565
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6566
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6567
    if not (self.op.nodes and old_pnode.offline):
6568
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6569
                          msg="cannot recreate disks")
6570

    
6571
    if not self.op.disks:
6572
      self.op.disks = range(len(instance.disks))
6573
    else:
6574
      for idx in self.op.disks:
6575
        if idx >= len(instance.disks):
6576
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6577
                                     errors.ECODE_INVAL)
6578
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6579
      raise errors.OpPrereqError("Can't recreate disks partially and"
6580
                                 " change the nodes at the same time",
6581
                                 errors.ECODE_INVAL)
6582
    self.instance = instance
6583

    
6584
  def Exec(self, feedback_fn):
6585
    """Recreate the disks.
6586

6587
    """
6588
    instance = self.instance
6589

    
6590
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6591
            self.owned_locks(locking.LEVEL_NODE_RES))
6592

    
6593
    to_skip = []
6594
    mods = [] # keeps track of needed logical_id changes
6595

    
6596
    for idx, disk in enumerate(instance.disks):
6597
      if idx not in self.op.disks: # disk idx has not been passed in
6598
        to_skip.append(idx)
6599
        continue
6600
      # update secondaries for disks, if needed
6601
      if self.op.nodes:
6602
        if disk.dev_type == constants.LD_DRBD8:
6603
          # need to update the nodes and minors
6604
          assert len(self.op.nodes) == 2
6605
          assert len(disk.logical_id) == 6 # otherwise disk internals
6606
                                           # have changed
6607
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6608
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6609
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6610
                    new_minors[0], new_minors[1], old_secret)
6611
          assert len(disk.logical_id) == len(new_id)
6612
          mods.append((idx, new_id))
6613

    
6614
    # now that we have passed all asserts above, we can apply the mods
6615
    # in a single run (to avoid partial changes)
6616
    for idx, new_id in mods:
6617
      instance.disks[idx].logical_id = new_id
6618

    
6619
    # change primary node, if needed
6620
    if self.op.nodes:
6621
      instance.primary_node = self.op.nodes[0]
6622
      self.LogWarning("Changing the instance's nodes, you will have to"
6623
                      " remove any disks left on the older nodes manually")
6624

    
6625
    if self.op.nodes:
6626
      self.cfg.Update(instance, feedback_fn)
6627

    
6628
    _CreateDisks(self, instance, to_skip=to_skip)
6629

    
6630

    
6631
class LUInstanceRename(LogicalUnit):
6632
  """Rename an instance.
6633

6634
  """
6635
  HPATH = "instance-rename"
6636
  HTYPE = constants.HTYPE_INSTANCE
6637

    
6638
  def CheckArguments(self):
6639
    """Check arguments.
6640

6641
    """
6642
    if self.op.ip_check and not self.op.name_check:
6643
      # TODO: make the ip check more flexible and not depend on the name check
6644
      raise errors.OpPrereqError("IP address check requires a name check",
6645
                                 errors.ECODE_INVAL)
6646

    
6647
  def BuildHooksEnv(self):
6648
    """Build hooks env.
6649

6650
    This runs on master, primary and secondary nodes of the instance.
6651

6652
    """
6653
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6654
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6655
    return env
6656

    
6657
  def BuildHooksNodes(self):
6658
    """Build hooks nodes.
6659

6660
    """
6661
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6662
    return (nl, nl)
6663

    
6664
  def CheckPrereq(self):
6665
    """Check prerequisites.
6666

6667
    This checks that the instance is in the cluster and is not running.
6668

6669
    """
6670
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6671
                                                self.op.instance_name)
6672
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6673
    assert instance is not None
6674
    _CheckNodeOnline(self, instance.primary_node)
6675
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6676
                        msg="cannot rename")
6677
    self.instance = instance
6678

    
6679
    new_name = self.op.new_name
6680
    if self.op.name_check:
6681
      hostname = netutils.GetHostname(name=new_name)
6682
      if hostname.name != new_name:
6683
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6684
                     hostname.name)
6685
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6686
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6687
                                    " same as given hostname '%s'") %
6688
                                    (hostname.name, self.op.new_name),
6689
                                    errors.ECODE_INVAL)
6690
      new_name = self.op.new_name = hostname.name
6691
      if (self.op.ip_check and
6692
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6693
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6694
                                   (hostname.ip, new_name),
6695
                                   errors.ECODE_NOTUNIQUE)
6696

    
6697
    instance_list = self.cfg.GetInstanceList()
6698
    if new_name in instance_list and new_name != instance.name:
6699
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6700
                                 new_name, errors.ECODE_EXISTS)
6701

    
6702
  def Exec(self, feedback_fn):
6703
    """Rename the instance.
6704

6705
    """
6706
    inst = self.instance
6707
    old_name = inst.name
6708

    
6709
    rename_file_storage = False
6710
    if (inst.disk_template in constants.DTS_FILEBASED and
6711
        self.op.new_name != inst.name):
6712
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6713
      rename_file_storage = True
6714

    
6715
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6716
    # Change the instance lock. This is definitely safe while we hold the BGL.
6717
    # Otherwise the new lock would have to be added in acquired mode.
6718
    assert self.REQ_BGL
6719
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6720
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6721

    
6722
    # re-read the instance from the configuration after rename
6723
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6724

    
6725
    if rename_file_storage:
6726
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6727
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6728
                                                     old_file_storage_dir,
6729
                                                     new_file_storage_dir)
6730
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6731
                   " (but the instance has been renamed in Ganeti)" %
6732
                   (inst.primary_node, old_file_storage_dir,
6733
                    new_file_storage_dir))
6734

    
6735
    _StartInstanceDisks(self, inst, None)
6736
    try:
6737
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6738
                                                 old_name, self.op.debug_level)
6739
      msg = result.fail_msg
6740
      if msg:
6741
        msg = ("Could not run OS rename script for instance %s on node %s"
6742
               " (but the instance has been renamed in Ganeti): %s" %
6743
               (inst.name, inst.primary_node, msg))
6744
        self.proc.LogWarning(msg)
6745
    finally:
6746
      _ShutdownInstanceDisks(self, inst)
6747

    
6748
    return inst.name
6749

    
6750

    
6751
class LUInstanceRemove(LogicalUnit):
6752
  """Remove an instance.
6753

6754
  """
6755
  HPATH = "instance-remove"
6756
  HTYPE = constants.HTYPE_INSTANCE
6757
  REQ_BGL = False
6758

    
6759
  def ExpandNames(self):
6760
    self._ExpandAndLockInstance()
6761
    self.needed_locks[locking.LEVEL_NODE] = []
6762
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6763
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6764

    
6765
  def DeclareLocks(self, level):
6766
    if level == locking.LEVEL_NODE:
6767
      self._LockInstancesNodes()
6768
    elif level == locking.LEVEL_NODE_RES:
6769
      # Copy node locks
6770
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6771
        self.needed_locks[locking.LEVEL_NODE][:]
6772

    
6773
  def BuildHooksEnv(self):
6774
    """Build hooks env.
6775

6776
    This runs on master, primary and secondary nodes of the instance.
6777

6778
    """
6779
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6780
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6781
    return env
6782

    
6783
  def BuildHooksNodes(self):
6784
    """Build hooks nodes.
6785

6786
    """
6787
    nl = [self.cfg.GetMasterNode()]
6788
    nl_post = list(self.instance.all_nodes) + nl
6789
    return (nl, nl_post)
6790

    
6791
  def CheckPrereq(self):
6792
    """Check prerequisites.
6793

6794
    This checks that the instance is in the cluster.
6795

6796
    """
6797
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6798
    assert self.instance is not None, \
6799
      "Cannot retrieve locked instance %s" % self.op.instance_name
6800

    
6801
  def Exec(self, feedback_fn):
6802
    """Remove the instance.
6803

6804
    """
6805
    instance = self.instance
6806
    logging.info("Shutting down instance %s on node %s",
6807
                 instance.name, instance.primary_node)
6808

    
6809
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6810
                                             self.op.shutdown_timeout)
6811
    msg = result.fail_msg
6812
    if msg:
6813
      if self.op.ignore_failures:
6814
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6815
      else:
6816
        raise errors.OpExecError("Could not shutdown instance %s on"
6817
                                 " node %s: %s" %
6818
                                 (instance.name, instance.primary_node, msg))
6819

    
6820
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6821
            self.owned_locks(locking.LEVEL_NODE_RES))
6822
    assert not (set(instance.all_nodes) -
6823
                self.owned_locks(locking.LEVEL_NODE)), \
6824
      "Not owning correct locks"
6825

    
6826
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6827

    
6828

    
6829
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6830
  """Utility function to remove an instance.
6831

6832
  """
6833
  logging.info("Removing block devices for instance %s", instance.name)
6834

    
6835
  if not _RemoveDisks(lu, instance):
6836
    if not ignore_failures:
6837
      raise errors.OpExecError("Can't remove instance's disks")
6838
    feedback_fn("Warning: can't remove instance's disks")
6839

    
6840
  logging.info("Removing instance %s out of cluster config", instance.name)
6841

    
6842
  lu.cfg.RemoveInstance(instance.name)
6843

    
6844
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6845
    "Instance lock removal conflict"
6846

    
6847
  # Remove lock for the instance
6848
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6849

    
6850

    
6851
class LUInstanceQuery(NoHooksLU):
6852
  """Logical unit for querying instances.
6853

6854
  """
6855
  # pylint: disable=W0142
6856
  REQ_BGL = False
6857

    
6858
  def CheckArguments(self):
6859
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6860
                             self.op.output_fields, self.op.use_locking)
6861

    
6862
  def ExpandNames(self):
6863
    self.iq.ExpandNames(self)
6864

    
6865
  def DeclareLocks(self, level):
6866
    self.iq.DeclareLocks(self, level)
6867

    
6868
  def Exec(self, feedback_fn):
6869
    return self.iq.OldStyleQuery(self)
6870

    
6871

    
6872
class LUInstanceFailover(LogicalUnit):
6873
  """Failover an instance.
6874

6875
  """
6876
  HPATH = "instance-failover"
6877
  HTYPE = constants.HTYPE_INSTANCE
6878
  REQ_BGL = False
6879

    
6880
  def CheckArguments(self):
6881
    """Check the arguments.
6882

6883
    """
6884
    self.iallocator = getattr(self.op, "iallocator", None)
6885
    self.target_node = getattr(self.op, "target_node", None)
6886

    
6887
  def ExpandNames(self):
6888
    self._ExpandAndLockInstance()
6889

    
6890
    if self.op.target_node is not None:
6891
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6892

    
6893
    self.needed_locks[locking.LEVEL_NODE] = []
6894
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6895

    
6896
    ignore_consistency = self.op.ignore_consistency
6897
    shutdown_timeout = self.op.shutdown_timeout
6898
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6899
                                       cleanup=False,
6900
                                       failover=True,
6901
                                       ignore_consistency=ignore_consistency,
6902
                                       shutdown_timeout=shutdown_timeout)
6903
    self.tasklets = [self._migrater]
6904

    
6905
  def DeclareLocks(self, level):
6906
    if level == locking.LEVEL_NODE:
6907
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6908
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6909
        if self.op.target_node is None:
6910
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6911
        else:
6912
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6913
                                                   self.op.target_node]
6914
        del self.recalculate_locks[locking.LEVEL_NODE]
6915
      else:
6916
        self._LockInstancesNodes()
6917

    
6918
  def BuildHooksEnv(self):
6919
    """Build hooks env.
6920

6921
    This runs on master, primary and secondary nodes of the instance.
6922

6923
    """
6924
    instance = self._migrater.instance
6925
    source_node = instance.primary_node
6926
    target_node = self.op.target_node
6927
    env = {
6928
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6929
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6930
      "OLD_PRIMARY": source_node,
6931
      "NEW_PRIMARY": target_node,
6932
      }
6933

    
6934
    if instance.disk_template in constants.DTS_INT_MIRROR:
6935
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6936
      env["NEW_SECONDARY"] = source_node
6937
    else:
6938
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6939

    
6940
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6941

    
6942
    return env
6943

    
6944
  def BuildHooksNodes(self):
6945
    """Build hooks nodes.
6946

6947
    """
6948
    instance = self._migrater.instance
6949
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6950
    return (nl, nl + [instance.primary_node])
6951

    
6952

    
6953
class LUInstanceMigrate(LogicalUnit):
6954
  """Migrate an instance.
6955

6956
  This is migration without shutting down, compared to the failover,
6957
  which is done with shutdown.
6958

6959
  """
6960
  HPATH = "instance-migrate"
6961
  HTYPE = constants.HTYPE_INSTANCE
6962
  REQ_BGL = False
6963

    
6964
  def ExpandNames(self):
6965
    self._ExpandAndLockInstance()
6966

    
6967
    if self.op.target_node is not None:
6968
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6969

    
6970
    self.needed_locks[locking.LEVEL_NODE] = []
6971
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6972

    
6973
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6974
                                       cleanup=self.op.cleanup,
6975
                                       failover=False,
6976
                                       fallback=self.op.allow_failover)
6977
    self.tasklets = [self._migrater]
6978

    
6979
  def DeclareLocks(self, level):
6980
    if level == locking.LEVEL_NODE:
6981
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6982
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6983
        if self.op.target_node is None:
6984
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6985
        else:
6986
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6987
                                                   self.op.target_node]
6988
        del self.recalculate_locks[locking.LEVEL_NODE]
6989
      else:
6990
        self._LockInstancesNodes()
6991

    
6992
  def BuildHooksEnv(self):
6993
    """Build hooks env.
6994

6995
    This runs on master, primary and secondary nodes of the instance.
6996

6997
    """
6998
    instance = self._migrater.instance
6999
    source_node = instance.primary_node
7000
    target_node = self.op.target_node
7001
    env = _BuildInstanceHookEnvByObject(self, instance)
7002
    env.update({
7003
      "MIGRATE_LIVE": self._migrater.live,
7004
      "MIGRATE_CLEANUP": self.op.cleanup,
7005
      "OLD_PRIMARY": source_node,
7006
      "NEW_PRIMARY": target_node,
7007
      })
7008

    
7009
    if instance.disk_template in constants.DTS_INT_MIRROR:
7010
      env["OLD_SECONDARY"] = target_node
7011
      env["NEW_SECONDARY"] = source_node
7012
    else:
7013
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7014

    
7015
    return env
7016

    
7017
  def BuildHooksNodes(self):
7018
    """Build hooks nodes.
7019

7020
    """
7021
    instance = self._migrater.instance
7022
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7023
    return (nl, nl + [instance.primary_node])
7024

    
7025

    
7026
class LUInstanceMove(LogicalUnit):
7027
  """Move an instance by data-copying.
7028

7029
  """
7030
  HPATH = "instance-move"
7031
  HTYPE = constants.HTYPE_INSTANCE
7032
  REQ_BGL = False
7033

    
7034
  def ExpandNames(self):
7035
    self._ExpandAndLockInstance()
7036
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7037
    self.op.target_node = target_node
7038
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
7039
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7040
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7041

    
7042
  def DeclareLocks(self, level):
7043
    if level == locking.LEVEL_NODE:
7044
      self._LockInstancesNodes(primary_only=True)
7045
    elif level == locking.LEVEL_NODE_RES:
7046
      # Copy node locks
7047
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7048
        self.needed_locks[locking.LEVEL_NODE][:]
7049

    
7050
  def BuildHooksEnv(self):
7051
    """Build hooks env.
7052

7053
    This runs on master, primary and secondary nodes of the instance.
7054

7055
    """
7056
    env = {
7057
      "TARGET_NODE": self.op.target_node,
7058
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7059
      }
7060
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7061
    return env
7062

    
7063
  def BuildHooksNodes(self):
7064
    """Build hooks nodes.
7065

7066
    """
7067
    nl = [
7068
      self.cfg.GetMasterNode(),
7069
      self.instance.primary_node,
7070
      self.op.target_node,
7071
      ]
7072
    return (nl, nl)
7073

    
7074
  def CheckPrereq(self):
7075
    """Check prerequisites.
7076

7077
    This checks that the instance is in the cluster.
7078

7079
    """
7080
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7081
    assert self.instance is not None, \
7082
      "Cannot retrieve locked instance %s" % self.op.instance_name
7083

    
7084
    node = self.cfg.GetNodeInfo(self.op.target_node)
7085
    assert node is not None, \
7086
      "Cannot retrieve locked node %s" % self.op.target_node
7087

    
7088
    self.target_node = target_node = node.name
7089

    
7090
    if target_node == instance.primary_node:
7091
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7092
                                 (instance.name, target_node),
7093
                                 errors.ECODE_STATE)
7094

    
7095
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7096

    
7097
    for idx, dsk in enumerate(instance.disks):
7098
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7099
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7100
                                   " cannot copy" % idx, errors.ECODE_STATE)
7101

    
7102
    _CheckNodeOnline(self, target_node)
7103
    _CheckNodeNotDrained(self, target_node)
7104
    _CheckNodeVmCapable(self, target_node)
7105

    
7106
    if instance.admin_state == constants.ADMINST_UP:
7107
      # check memory requirements on the secondary node
7108
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7109
                           instance.name, bep[constants.BE_MEMORY],
7110
                           instance.hypervisor)
7111
    else:
7112
      self.LogInfo("Not checking memory on the secondary node as"
7113
                   " instance will not be started")
7114

    
7115
    # check bridge existance
7116
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7117

    
7118
  def Exec(self, feedback_fn):
7119
    """Move an instance.
7120

7121
    The move is done by shutting it down on its present node, copying
7122
    the data over (slow) and starting it on the new node.
7123

7124
    """
7125
    instance = self.instance
7126

    
7127
    source_node = instance.primary_node
7128
    target_node = self.target_node
7129

    
7130
    self.LogInfo("Shutting down instance %s on source node %s",
7131
                 instance.name, source_node)
7132

    
7133
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7134
            self.owned_locks(locking.LEVEL_NODE_RES))
7135

    
7136
    result = self.rpc.call_instance_shutdown(source_node, instance,
7137
                                             self.op.shutdown_timeout)
7138
    msg = result.fail_msg
7139
    if msg:
7140
      if self.op.ignore_consistency:
7141
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7142
                             " Proceeding anyway. Please make sure node"
7143
                             " %s is down. Error details: %s",
7144
                             instance.name, source_node, source_node, msg)
7145
      else:
7146
        raise errors.OpExecError("Could not shutdown instance %s on"
7147
                                 " node %s: %s" %
7148
                                 (instance.name, source_node, msg))
7149

    
7150
    # create the target disks
7151
    try:
7152
      _CreateDisks(self, instance, target_node=target_node)
7153
    except errors.OpExecError:
7154
      self.LogWarning("Device creation failed, reverting...")
7155
      try:
7156
        _RemoveDisks(self, instance, target_node=target_node)
7157
      finally:
7158
        self.cfg.ReleaseDRBDMinors(instance.name)
7159
        raise
7160

    
7161
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7162

    
7163
    errs = []
7164
    # activate, get path, copy the data over
7165
    for idx, disk in enumerate(instance.disks):
7166
      self.LogInfo("Copying data for disk %d", idx)
7167
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7168
                                               instance.name, True, idx)
7169
      if result.fail_msg:
7170
        self.LogWarning("Can't assemble newly created disk %d: %s",
7171
                        idx, result.fail_msg)
7172
        errs.append(result.fail_msg)
7173
        break
7174
      dev_path = result.payload
7175
      result = self.rpc.call_blockdev_export(source_node, disk,
7176
                                             target_node, dev_path,
7177
                                             cluster_name)
7178
      if result.fail_msg:
7179
        self.LogWarning("Can't copy data over for disk %d: %s",
7180
                        idx, result.fail_msg)
7181
        errs.append(result.fail_msg)
7182
        break
7183

    
7184
    if errs:
7185
      self.LogWarning("Some disks failed to copy, aborting")
7186
      try:
7187
        _RemoveDisks(self, instance, target_node=target_node)
7188
      finally:
7189
        self.cfg.ReleaseDRBDMinors(instance.name)
7190
        raise errors.OpExecError("Errors during disk copy: %s" %
7191
                                 (",".join(errs),))
7192

    
7193
    instance.primary_node = target_node
7194
    self.cfg.Update(instance, feedback_fn)
7195

    
7196
    self.LogInfo("Removing the disks on the original node")
7197
    _RemoveDisks(self, instance, target_node=source_node)
7198

    
7199
    # Only start the instance if it's marked as up
7200
    if instance.admin_state == constants.ADMINST_UP:
7201
      self.LogInfo("Starting instance %s on node %s",
7202
                   instance.name, target_node)
7203

    
7204
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7205
                                           ignore_secondaries=True)
7206
      if not disks_ok:
7207
        _ShutdownInstanceDisks(self, instance)
7208
        raise errors.OpExecError("Can't activate the instance's disks")
7209

    
7210
      result = self.rpc.call_instance_start(target_node,
7211
                                            (instance, None, None), False)
7212
      msg = result.fail_msg
7213
      if msg:
7214
        _ShutdownInstanceDisks(self, instance)
7215
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7216
                                 (instance.name, target_node, msg))
7217

    
7218

    
7219
class LUNodeMigrate(LogicalUnit):
7220
  """Migrate all instances from a node.
7221

7222
  """
7223
  HPATH = "node-migrate"
7224
  HTYPE = constants.HTYPE_NODE
7225
  REQ_BGL = False
7226

    
7227
  def CheckArguments(self):
7228
    pass
7229

    
7230
  def ExpandNames(self):
7231
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7232

    
7233
    self.share_locks = _ShareAll()
7234
    self.needed_locks = {
7235
      locking.LEVEL_NODE: [self.op.node_name],
7236
      }
7237

    
7238
  def BuildHooksEnv(self):
7239
    """Build hooks env.
7240

7241
    This runs on the master, the primary and all the secondaries.
7242

7243
    """
7244
    return {
7245
      "NODE_NAME": self.op.node_name,
7246
      }
7247

    
7248
  def BuildHooksNodes(self):
7249
    """Build hooks nodes.
7250

7251
    """
7252
    nl = [self.cfg.GetMasterNode()]
7253
    return (nl, nl)
7254

    
7255
  def CheckPrereq(self):
7256
    pass
7257

    
7258
  def Exec(self, feedback_fn):
7259
    # Prepare jobs for migration instances
7260
    jobs = [
7261
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7262
                                 mode=self.op.mode,
7263
                                 live=self.op.live,
7264
                                 iallocator=self.op.iallocator,
7265
                                 target_node=self.op.target_node)]
7266
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7267
      ]
7268

    
7269
    # TODO: Run iallocator in this opcode and pass correct placement options to
7270
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7271
    # running the iallocator and the actual migration, a good consistency model
7272
    # will have to be found.
7273

    
7274
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7275
            frozenset([self.op.node_name]))
7276

    
7277
    return ResultWithJobs(jobs)
7278

    
7279

    
7280
class TLMigrateInstance(Tasklet):
7281
  """Tasklet class for instance migration.
7282

7283
  @type live: boolean
7284
  @ivar live: whether the migration will be done live or non-live;
7285
      this variable is initalized only after CheckPrereq has run
7286
  @type cleanup: boolean
7287
  @ivar cleanup: Wheater we cleanup from a failed migration
7288
  @type iallocator: string
7289
  @ivar iallocator: The iallocator used to determine target_node
7290
  @type target_node: string
7291
  @ivar target_node: If given, the target_node to reallocate the instance to
7292
  @type failover: boolean
7293
  @ivar failover: Whether operation results in failover or migration
7294
  @type fallback: boolean
7295
  @ivar fallback: Whether fallback to failover is allowed if migration not
7296
                  possible
7297
  @type ignore_consistency: boolean
7298
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7299
                            and target node
7300
  @type shutdown_timeout: int
7301
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7302

7303
  """
7304

    
7305
  # Constants
7306
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7307
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7308

    
7309
  def __init__(self, lu, instance_name, cleanup=False,
7310
               failover=False, fallback=False,
7311
               ignore_consistency=False,
7312
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7313
    """Initializes this class.
7314

7315
    """
7316
    Tasklet.__init__(self, lu)
7317

    
7318
    # Parameters
7319
    self.instance_name = instance_name
7320
    self.cleanup = cleanup
7321
    self.live = False # will be overridden later
7322
    self.failover = failover
7323
    self.fallback = fallback
7324
    self.ignore_consistency = ignore_consistency
7325
    self.shutdown_timeout = shutdown_timeout
7326

    
7327
  def CheckPrereq(self):
7328
    """Check prerequisites.
7329

7330
    This checks that the instance is in the cluster.
7331

7332
    """
7333
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7334
    instance = self.cfg.GetInstanceInfo(instance_name)
7335
    assert instance is not None
7336
    self.instance = instance
7337

    
7338
    if (not self.cleanup and
7339
        not instance.admin_state == constants.ADMINST_UP and
7340
        not self.failover and self.fallback):
7341
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7342
                      " switching to failover")
7343
      self.failover = True
7344

    
7345
    if instance.disk_template not in constants.DTS_MIRRORED:
7346
      if self.failover:
7347
        text = "failovers"
7348
      else:
7349
        text = "migrations"
7350
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7351
                                 " %s" % (instance.disk_template, text),
7352
                                 errors.ECODE_STATE)
7353

    
7354
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7355
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7356

    
7357
      if self.lu.op.iallocator:
7358
        self._RunAllocator()
7359
      else:
7360
        # We set set self.target_node as it is required by
7361
        # BuildHooksEnv
7362
        self.target_node = self.lu.op.target_node
7363

    
7364
      # self.target_node is already populated, either directly or by the
7365
      # iallocator run
7366
      target_node = self.target_node
7367
      if self.target_node == instance.primary_node:
7368
        raise errors.OpPrereqError("Cannot migrate instance %s"
7369
                                   " to its primary (%s)" %
7370
                                   (instance.name, instance.primary_node))
7371

    
7372
      if len(self.lu.tasklets) == 1:
7373
        # It is safe to release locks only when we're the only tasklet
7374
        # in the LU
7375
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7376
                      keep=[instance.primary_node, self.target_node])
7377

    
7378
    else:
7379
      secondary_nodes = instance.secondary_nodes
7380
      if not secondary_nodes:
7381
        raise errors.ConfigurationError("No secondary node but using"
7382
                                        " %s disk template" %
7383
                                        instance.disk_template)
7384
      target_node = secondary_nodes[0]
7385
      if self.lu.op.iallocator or (self.lu.op.target_node and
7386
                                   self.lu.op.target_node != target_node):
7387
        if self.failover:
7388
          text = "failed over"
7389
        else:
7390
          text = "migrated"
7391
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7392
                                   " be %s to arbitrary nodes"
7393
                                   " (neither an iallocator nor a target"
7394
                                   " node can be passed)" %
7395
                                   (instance.disk_template, text),
7396
                                   errors.ECODE_INVAL)
7397

    
7398
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7399

    
7400
    # check memory requirements on the secondary node
7401
    if not self.failover or instance.admin_state == constants.ADMINST_UP:
7402
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7403
                           instance.name, i_be[constants.BE_MEMORY],
7404
                           instance.hypervisor)
7405
    else:
7406
      self.lu.LogInfo("Not checking memory on the secondary node as"
7407
                      " instance will not be started")
7408

    
7409
    # check bridge existance
7410
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7411

    
7412
    if not self.cleanup:
7413
      _CheckNodeNotDrained(self.lu, target_node)
7414
      if not self.failover:
7415
        result = self.rpc.call_instance_migratable(instance.primary_node,
7416
                                                   instance)
7417
        if result.fail_msg and self.fallback:
7418
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7419
                          " failover")
7420
          self.failover = True
7421
        else:
7422
          result.Raise("Can't migrate, please use failover",
7423
                       prereq=True, ecode=errors.ECODE_STATE)
7424

    
7425
    assert not (self.failover and self.cleanup)
7426

    
7427
    if not self.failover:
7428
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7429
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7430
                                   " parameters are accepted",
7431
                                   errors.ECODE_INVAL)
7432
      if self.lu.op.live is not None:
7433
        if self.lu.op.live:
7434
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7435
        else:
7436
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7437
        # reset the 'live' parameter to None so that repeated
7438
        # invocations of CheckPrereq do not raise an exception
7439
        self.lu.op.live = None
7440
      elif self.lu.op.mode is None:
7441
        # read the default value from the hypervisor
7442
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7443
                                                skip_globals=False)
7444
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7445

    
7446
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7447
    else:
7448
      # Failover is never live
7449
      self.live = False
7450

    
7451
  def _RunAllocator(self):
7452
    """Run the allocator based on input opcode.
7453

7454
    """
7455
    ial = IAllocator(self.cfg, self.rpc,
7456
                     mode=constants.IALLOCATOR_MODE_RELOC,
7457
                     name=self.instance_name,
7458
                     # TODO See why hail breaks with a single node below
7459
                     relocate_from=[self.instance.primary_node,
7460
                                    self.instance.primary_node],
7461
                     )
7462

    
7463
    ial.Run(self.lu.op.iallocator)
7464

    
7465
    if not ial.success:
7466
      raise errors.OpPrereqError("Can't compute nodes using"
7467
                                 " iallocator '%s': %s" %
7468
                                 (self.lu.op.iallocator, ial.info),
7469
                                 errors.ECODE_NORES)
7470
    if len(ial.result) != ial.required_nodes:
7471
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7472
                                 " of nodes (%s), required %s" %
7473
                                 (self.lu.op.iallocator, len(ial.result),
7474
                                  ial.required_nodes), errors.ECODE_FAULT)
7475
    self.target_node = ial.result[0]
7476
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7477
                 self.instance_name, self.lu.op.iallocator,
7478
                 utils.CommaJoin(ial.result))
7479

    
7480
  def _WaitUntilSync(self):
7481
    """Poll with custom rpc for disk sync.
7482

7483
    This uses our own step-based rpc call.
7484

7485
    """
7486
    self.feedback_fn("* wait until resync is done")
7487
    all_done = False
7488
    while not all_done:
7489
      all_done = True
7490
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7491
                                            self.nodes_ip,
7492
                                            self.instance.disks)
7493
      min_percent = 100
7494
      for node, nres in result.items():
7495
        nres.Raise("Cannot resync disks on node %s" % node)
7496
        node_done, node_percent = nres.payload
7497
        all_done = all_done and node_done
7498
        if node_percent is not None:
7499
          min_percent = min(min_percent, node_percent)
7500
      if not all_done:
7501
        if min_percent < 100:
7502
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7503
        time.sleep(2)
7504

    
7505
  def _EnsureSecondary(self, node):
7506
    """Demote a node to secondary.
7507

7508
    """
7509
    self.feedback_fn("* switching node %s to secondary mode" % node)
7510

    
7511
    for dev in self.instance.disks:
7512
      self.cfg.SetDiskID(dev, node)
7513

    
7514
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7515
                                          self.instance.disks)
7516
    result.Raise("Cannot change disk to secondary on node %s" % node)
7517

    
7518
  def _GoStandalone(self):
7519
    """Disconnect from the network.
7520

7521
    """
7522
    self.feedback_fn("* changing into standalone mode")
7523
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7524
                                               self.instance.disks)
7525
    for node, nres in result.items():
7526
      nres.Raise("Cannot disconnect disks node %s" % node)
7527

    
7528
  def _GoReconnect(self, multimaster):
7529
    """Reconnect to the network.
7530

7531
    """
7532
    if multimaster:
7533
      msg = "dual-master"
7534
    else:
7535
      msg = "single-master"
7536
    self.feedback_fn("* changing disks into %s mode" % msg)
7537
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7538
                                           self.instance.disks,
7539
                                           self.instance.name, multimaster)
7540
    for node, nres in result.items():
7541
      nres.Raise("Cannot change disks config on node %s" % node)
7542

    
7543
  def _ExecCleanup(self):
7544
    """Try to cleanup after a failed migration.
7545

7546
    The cleanup is done by:
7547
      - check that the instance is running only on one node
7548
        (and update the config if needed)
7549
      - change disks on its secondary node to secondary
7550
      - wait until disks are fully synchronized
7551
      - disconnect from the network
7552
      - change disks into single-master mode
7553
      - wait again until disks are fully synchronized
7554

7555
    """
7556
    instance = self.instance
7557
    target_node = self.target_node
7558
    source_node = self.source_node
7559

    
7560
    # check running on only one node
7561
    self.feedback_fn("* checking where the instance actually runs"
7562
                     " (if this hangs, the hypervisor might be in"
7563
                     " a bad state)")
7564
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7565
    for node, result in ins_l.items():
7566
      result.Raise("Can't contact node %s" % node)
7567

    
7568
    runningon_source = instance.name in ins_l[source_node].payload
7569
    runningon_target = instance.name in ins_l[target_node].payload
7570

    
7571
    if runningon_source and runningon_target:
7572
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7573
                               " or the hypervisor is confused; you will have"
7574
                               " to ensure manually that it runs only on one"
7575
                               " and restart this operation")
7576

    
7577
    if not (runningon_source or runningon_target):
7578
      raise errors.OpExecError("Instance does not seem to be running at all;"
7579
                               " in this case it's safer to repair by"
7580
                               " running 'gnt-instance stop' to ensure disk"
7581
                               " shutdown, and then restarting it")
7582

    
7583
    if runningon_target:
7584
      # the migration has actually succeeded, we need to update the config
7585
      self.feedback_fn("* instance running on secondary node (%s),"
7586
                       " updating config" % target_node)
7587
      instance.primary_node = target_node
7588
      self.cfg.Update(instance, self.feedback_fn)
7589
      demoted_node = source_node
7590
    else:
7591
      self.feedback_fn("* instance confirmed to be running on its"
7592
                       " primary node (%s)" % source_node)
7593
      demoted_node = target_node
7594

    
7595
    if instance.disk_template in constants.DTS_INT_MIRROR:
7596
      self._EnsureSecondary(demoted_node)
7597
      try:
7598
        self._WaitUntilSync()
7599
      except errors.OpExecError:
7600
        # we ignore here errors, since if the device is standalone, it
7601
        # won't be able to sync
7602
        pass
7603
      self._GoStandalone()
7604
      self._GoReconnect(False)
7605
      self._WaitUntilSync()
7606

    
7607
    self.feedback_fn("* done")
7608

    
7609
  def _RevertDiskStatus(self):
7610
    """Try to revert the disk status after a failed migration.
7611

7612
    """
7613
    target_node = self.target_node
7614
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7615
      return
7616

    
7617
    try:
7618
      self._EnsureSecondary(target_node)
7619
      self._GoStandalone()
7620
      self._GoReconnect(False)
7621
      self._WaitUntilSync()
7622
    except errors.OpExecError, err:
7623
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7624
                         " please try to recover the instance manually;"
7625
                         " error '%s'" % str(err))
7626

    
7627
  def _AbortMigration(self):
7628
    """Call the hypervisor code to abort a started migration.
7629

7630
    """
7631
    instance = self.instance
7632
    target_node = self.target_node
7633
    source_node = self.source_node
7634
    migration_info = self.migration_info
7635

    
7636
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7637
                                                                 instance,
7638
                                                                 migration_info,
7639
                                                                 False)
7640
    abort_msg = abort_result.fail_msg
7641
    if abort_msg:
7642
      logging.error("Aborting migration failed on target node %s: %s",
7643
                    target_node, abort_msg)
7644
      # Don't raise an exception here, as we stil have to try to revert the
7645
      # disk status, even if this step failed.
7646

    
7647
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7648
        instance, False, self.live)
7649
    abort_msg = abort_result.fail_msg
7650
    if abort_msg:
7651
      logging.error("Aborting migration failed on source node %s: %s",
7652
                    source_node, abort_msg)
7653

    
7654
  def _ExecMigration(self):
7655
    """Migrate an instance.
7656

7657
    The migrate is done by:
7658
      - change the disks into dual-master mode
7659
      - wait until disks are fully synchronized again
7660
      - migrate the instance
7661
      - change disks on the new secondary node (the old primary) to secondary
7662
      - wait until disks are fully synchronized
7663
      - change disks into single-master mode
7664

7665
    """
7666
    instance = self.instance
7667
    target_node = self.target_node
7668
    source_node = self.source_node
7669

    
7670
    # Check for hypervisor version mismatch and warn the user.
7671
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7672
                                       None, self.instance.hypervisor)
7673
    src_info = nodeinfo[source_node]
7674
    dst_info = nodeinfo[target_node]
7675

    
7676
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7677
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7678
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7679
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7680
      if src_version != dst_version:
7681
        self.feedback_fn("* warning: hypervisor version mismatch between"
7682
                         " source (%s) and target (%s) node" %
7683
                         (src_version, dst_version))
7684

    
7685
    self.feedback_fn("* checking disk consistency between source and target")
7686
    for dev in instance.disks:
7687
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7688
        raise errors.OpExecError("Disk %s is degraded or not fully"
7689
                                 " synchronized on target node,"
7690
                                 " aborting migration" % dev.iv_name)
7691

    
7692
    # First get the migration information from the remote node
7693
    result = self.rpc.call_migration_info(source_node, instance)
7694
    msg = result.fail_msg
7695
    if msg:
7696
      log_err = ("Failed fetching source migration information from %s: %s" %
7697
                 (source_node, msg))
7698
      logging.error(log_err)
7699
      raise errors.OpExecError(log_err)
7700

    
7701
    self.migration_info = migration_info = result.payload
7702

    
7703
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7704
      # Then switch the disks to master/master mode
7705
      self._EnsureSecondary(target_node)
7706
      self._GoStandalone()
7707
      self._GoReconnect(True)
7708
      self._WaitUntilSync()
7709

    
7710
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7711
    result = self.rpc.call_accept_instance(target_node,
7712
                                           instance,
7713
                                           migration_info,
7714
                                           self.nodes_ip[target_node])
7715

    
7716
    msg = result.fail_msg
7717
    if msg:
7718
      logging.error("Instance pre-migration failed, trying to revert"
7719
                    " disk status: %s", msg)
7720
      self.feedback_fn("Pre-migration failed, aborting")
7721
      self._AbortMigration()
7722
      self._RevertDiskStatus()
7723
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7724
                               (instance.name, msg))
7725

    
7726
    self.feedback_fn("* migrating instance to %s" % target_node)
7727
    result = self.rpc.call_instance_migrate(source_node, instance,
7728
                                            self.nodes_ip[target_node],
7729
                                            self.live)
7730
    msg = result.fail_msg
7731
    if msg:
7732
      logging.error("Instance migration failed, trying to revert"
7733
                    " disk status: %s", msg)
7734
      self.feedback_fn("Migration failed, aborting")
7735
      self._AbortMigration()
7736
      self._RevertDiskStatus()
7737
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7738
                               (instance.name, msg))
7739

    
7740
    self.feedback_fn("* starting memory transfer")
7741
    last_feedback = time.time()
7742
    while True:
7743
      result = self.rpc.call_instance_get_migration_status(source_node,
7744
                                                           instance)
7745
      msg = result.fail_msg
7746
      ms = result.payload   # MigrationStatus instance
7747
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7748
        logging.error("Instance migration failed, trying to revert"
7749
                      " disk status: %s", msg)
7750
        self.feedback_fn("Migration failed, aborting")
7751
        self._AbortMigration()
7752
        self._RevertDiskStatus()
7753
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7754
                                 (instance.name, msg))
7755

    
7756
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7757
        self.feedback_fn("* memory transfer complete")
7758
        break
7759

    
7760
      if (utils.TimeoutExpired(last_feedback,
7761
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7762
          ms.transferred_ram is not None):
7763
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7764
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7765
        last_feedback = time.time()
7766

    
7767
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7768

    
7769
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7770
                                                           instance,
7771
                                                           True,
7772
                                                           self.live)
7773
    msg = result.fail_msg
7774
    if msg:
7775
      logging.error("Instance migration succeeded, but finalization failed"
7776
                    " on the source node: %s", msg)
7777
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7778
                               msg)
7779

    
7780
    instance.primary_node = target_node
7781

    
7782
    # distribute new instance config to the other nodes
7783
    self.cfg.Update(instance, self.feedback_fn)
7784

    
7785
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7786
                                                           instance,
7787
                                                           migration_info,
7788
                                                           True)
7789
    msg = result.fail_msg
7790
    if msg:
7791
      logging.error("Instance migration succeeded, but finalization failed"
7792
                    " on the target node: %s", msg)
7793
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7794
                               msg)
7795

    
7796
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7797
      self._EnsureSecondary(source_node)
7798
      self._WaitUntilSync()
7799
      self._GoStandalone()
7800
      self._GoReconnect(False)
7801
      self._WaitUntilSync()
7802

    
7803
    self.feedback_fn("* done")
7804

    
7805
  def _ExecFailover(self):
7806
    """Failover an instance.
7807

7808
    The failover is done by shutting it down on its present node and
7809
    starting it on the secondary.
7810

7811
    """
7812
    instance = self.instance
7813
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7814

    
7815
    source_node = instance.primary_node
7816
    target_node = self.target_node
7817

    
7818
    if instance.admin_state == constants.ADMINST_UP:
7819
      self.feedback_fn("* checking disk consistency between source and target")
7820
      for dev in instance.disks:
7821
        # for drbd, these are drbd over lvm
7822
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7823
          if primary_node.offline:
7824
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7825
                             " target node %s" %
7826
                             (primary_node.name, dev.iv_name, target_node))
7827
          elif not self.ignore_consistency:
7828
            raise errors.OpExecError("Disk %s is degraded on target node,"
7829
                                     " aborting failover" % dev.iv_name)
7830
    else:
7831
      self.feedback_fn("* not checking disk consistency as instance is not"
7832
                       " running")
7833

    
7834
    self.feedback_fn("* shutting down instance on source node")
7835
    logging.info("Shutting down instance %s on node %s",
7836
                 instance.name, source_node)
7837

    
7838
    result = self.rpc.call_instance_shutdown(source_node, instance,
7839
                                             self.shutdown_timeout)
7840
    msg = result.fail_msg
7841
    if msg:
7842
      if self.ignore_consistency or primary_node.offline:
7843
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7844
                           " proceeding anyway; please make sure node"
7845
                           " %s is down; error details: %s",
7846
                           instance.name, source_node, source_node, msg)
7847
      else:
7848
        raise errors.OpExecError("Could not shutdown instance %s on"
7849
                                 " node %s: %s" %
7850
                                 (instance.name, source_node, msg))
7851

    
7852
    self.feedback_fn("* deactivating the instance's disks on source node")
7853
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7854
      raise errors.OpExecError("Can't shut down the instance's disks")
7855

    
7856
    instance.primary_node = target_node
7857
    # distribute new instance config to the other nodes
7858
    self.cfg.Update(instance, self.feedback_fn)
7859

    
7860
    # Only start the instance if it's marked as up
7861
    if instance.admin_state == constants.ADMINST_UP:
7862
      self.feedback_fn("* activating the instance's disks on target node %s" %
7863
                       target_node)
7864
      logging.info("Starting instance %s on node %s",
7865
                   instance.name, target_node)
7866

    
7867
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7868
                                           ignore_secondaries=True)
7869
      if not disks_ok:
7870
        _ShutdownInstanceDisks(self.lu, instance)
7871
        raise errors.OpExecError("Can't activate the instance's disks")
7872

    
7873
      self.feedback_fn("* starting the instance on the target node %s" %
7874
                       target_node)
7875
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7876
                                            False)
7877
      msg = result.fail_msg
7878
      if msg:
7879
        _ShutdownInstanceDisks(self.lu, instance)
7880
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7881
                                 (instance.name, target_node, msg))
7882

    
7883
  def Exec(self, feedback_fn):
7884
    """Perform the migration.
7885

7886
    """
7887
    self.feedback_fn = feedback_fn
7888
    self.source_node = self.instance.primary_node
7889

    
7890
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7891
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7892
      self.target_node = self.instance.secondary_nodes[0]
7893
      # Otherwise self.target_node has been populated either
7894
      # directly, or through an iallocator.
7895

    
7896
    self.all_nodes = [self.source_node, self.target_node]
7897
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7898
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7899

    
7900
    if self.failover:
7901
      feedback_fn("Failover instance %s" % self.instance.name)
7902
      self._ExecFailover()
7903
    else:
7904
      feedback_fn("Migrating instance %s" % self.instance.name)
7905

    
7906
      if self.cleanup:
7907
        return self._ExecCleanup()
7908
      else:
7909
        return self._ExecMigration()
7910

    
7911

    
7912
def _CreateBlockDev(lu, node, instance, device, force_create,
7913
                    info, force_open):
7914
  """Create a tree of block devices on a given node.
7915

7916
  If this device type has to be created on secondaries, create it and
7917
  all its children.
7918

7919
  If not, just recurse to children keeping the same 'force' value.
7920

7921
  @param lu: the lu on whose behalf we execute
7922
  @param node: the node on which to create the device
7923
  @type instance: L{objects.Instance}
7924
  @param instance: the instance which owns the device
7925
  @type device: L{objects.Disk}
7926
  @param device: the device to create
7927
  @type force_create: boolean
7928
  @param force_create: whether to force creation of this device; this
7929
      will be change to True whenever we find a device which has
7930
      CreateOnSecondary() attribute
7931
  @param info: the extra 'metadata' we should attach to the device
7932
      (this will be represented as a LVM tag)
7933
  @type force_open: boolean
7934
  @param force_open: this parameter will be passes to the
7935
      L{backend.BlockdevCreate} function where it specifies
7936
      whether we run on primary or not, and it affects both
7937
      the child assembly and the device own Open() execution
7938

7939
  """
7940
  if device.CreateOnSecondary():
7941
    force_create = True
7942

    
7943
  if device.children:
7944
    for child in device.children:
7945
      _CreateBlockDev(lu, node, instance, child, force_create,
7946
                      info, force_open)
7947

    
7948
  if not force_create:
7949
    return
7950

    
7951
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7952

    
7953

    
7954
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7955
  """Create a single block device on a given node.
7956

7957
  This will not recurse over children of the device, so they must be
7958
  created in advance.
7959

7960
  @param lu: the lu on whose behalf we execute
7961
  @param node: the node on which to create the device
7962
  @type instance: L{objects.Instance}
7963
  @param instance: the instance which owns the device
7964
  @type device: L{objects.Disk}
7965
  @param device: the device to create
7966
  @param info: the extra 'metadata' we should attach to the device
7967
      (this will be represented as a LVM tag)
7968
  @type force_open: boolean
7969
  @param force_open: this parameter will be passes to the
7970
      L{backend.BlockdevCreate} function where it specifies
7971
      whether we run on primary or not, and it affects both
7972
      the child assembly and the device own Open() execution
7973

7974
  """
7975
  lu.cfg.SetDiskID(device, node)
7976
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7977
                                       instance.name, force_open, info)
7978
  result.Raise("Can't create block device %s on"
7979
               " node %s for instance %s" % (device, node, instance.name))
7980
  if device.physical_id is None:
7981
    device.physical_id = result.payload
7982

    
7983

    
7984
def _GenerateUniqueNames(lu, exts):
7985
  """Generate a suitable LV name.
7986

7987
  This will generate a logical volume name for the given instance.
7988

7989
  """
7990
  results = []
7991
  for val in exts:
7992
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7993
    results.append("%s%s" % (new_id, val))
7994
  return results
7995

    
7996

    
7997
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7998
                         iv_name, p_minor, s_minor):
7999
  """Generate a drbd8 device complete with its children.
8000

8001
  """
8002
  assert len(vgnames) == len(names) == 2
8003
  port = lu.cfg.AllocatePort()
8004
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8005
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8006
                          logical_id=(vgnames[0], names[0]))
8007
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8008
                          logical_id=(vgnames[1], names[1]))
8009
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8010
                          logical_id=(primary, secondary, port,
8011
                                      p_minor, s_minor,
8012
                                      shared_secret),
8013
                          children=[dev_data, dev_meta],
8014
                          iv_name=iv_name)
8015
  return drbd_dev
8016

    
8017

    
8018
def _GenerateDiskTemplate(lu, template_name,
8019
                          instance_name, primary_node,
8020
                          secondary_nodes, disk_info,
8021
                          file_storage_dir, file_driver,
8022
                          base_index, feedback_fn):
8023
  """Generate the entire disk layout for a given template type.
8024

8025
  """
8026
  #TODO: compute space requirements
8027

    
8028
  vgname = lu.cfg.GetVGName()
8029
  disk_count = len(disk_info)
8030
  disks = []
8031
  if template_name == constants.DT_DISKLESS:
8032
    pass
8033
  elif template_name == constants.DT_PLAIN:
8034
    if len(secondary_nodes) != 0:
8035
      raise errors.ProgrammerError("Wrong template configuration")
8036

    
8037
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8038
                                      for i in range(disk_count)])
8039
    for idx, disk in enumerate(disk_info):
8040
      disk_index = idx + base_index
8041
      vg = disk.get(constants.IDISK_VG, vgname)
8042
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8043
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
8044
                              size=disk[constants.IDISK_SIZE],
8045
                              logical_id=(vg, names[idx]),
8046
                              iv_name="disk/%d" % disk_index,
8047
                              mode=disk[constants.IDISK_MODE])
8048
      disks.append(disk_dev)
8049
  elif template_name == constants.DT_DRBD8:
8050
    if len(secondary_nodes) != 1:
8051
      raise errors.ProgrammerError("Wrong template configuration")
8052
    remote_node = secondary_nodes[0]
8053
    minors = lu.cfg.AllocateDRBDMinor(
8054
      [primary_node, remote_node] * len(disk_info), instance_name)
8055

    
8056
    names = []
8057
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8058
                                               for i in range(disk_count)]):
8059
      names.append(lv_prefix + "_data")
8060
      names.append(lv_prefix + "_meta")
8061
    for idx, disk in enumerate(disk_info):
8062
      disk_index = idx + base_index
8063
      data_vg = disk.get(constants.IDISK_VG, vgname)
8064
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8065
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8066
                                      disk[constants.IDISK_SIZE],
8067
                                      [data_vg, meta_vg],
8068
                                      names[idx * 2:idx * 2 + 2],
8069
                                      "disk/%d" % disk_index,
8070
                                      minors[idx * 2], minors[idx * 2 + 1])
8071
      disk_dev.mode = disk[constants.IDISK_MODE]
8072
      disks.append(disk_dev)
8073
  elif template_name == constants.DT_FILE:
8074
    if len(secondary_nodes) != 0:
8075
      raise errors.ProgrammerError("Wrong template configuration")
8076

    
8077
    opcodes.RequireFileStorage()
8078

    
8079
    for idx, disk in enumerate(disk_info):
8080
      disk_index = idx + base_index
8081
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8082
                              size=disk[constants.IDISK_SIZE],
8083
                              iv_name="disk/%d" % disk_index,
8084
                              logical_id=(file_driver,
8085
                                          "%s/disk%d" % (file_storage_dir,
8086
                                                         disk_index)),
8087
                              mode=disk[constants.IDISK_MODE])
8088
      disks.append(disk_dev)
8089
  elif template_name == constants.DT_SHARED_FILE:
8090
    if len(secondary_nodes) != 0:
8091
      raise errors.ProgrammerError("Wrong template configuration")
8092

    
8093
    opcodes.RequireSharedFileStorage()
8094

    
8095
    for idx, disk in enumerate(disk_info):
8096
      disk_index = idx + base_index
8097
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8098
                              size=disk[constants.IDISK_SIZE],
8099
                              iv_name="disk/%d" % disk_index,
8100
                              logical_id=(file_driver,
8101
                                          "%s/disk%d" % (file_storage_dir,
8102
                                                         disk_index)),
8103
                              mode=disk[constants.IDISK_MODE])
8104
      disks.append(disk_dev)
8105
  elif template_name == constants.DT_BLOCK:
8106
    if len(secondary_nodes) != 0:
8107
      raise errors.ProgrammerError("Wrong template configuration")
8108

    
8109
    for idx, disk in enumerate(disk_info):
8110
      disk_index = idx + base_index
8111
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8112
                              size=disk[constants.IDISK_SIZE],
8113
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8114
                                          disk[constants.IDISK_ADOPT]),
8115
                              iv_name="disk/%d" % disk_index,
8116
                              mode=disk[constants.IDISK_MODE])
8117
      disks.append(disk_dev)
8118

    
8119
  else:
8120
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8121
  return disks
8122

    
8123

    
8124
def _GetInstanceInfoText(instance):
8125
  """Compute that text that should be added to the disk's metadata.
8126

8127
  """
8128
  return "originstname+%s" % instance.name
8129

    
8130

    
8131
def _CalcEta(time_taken, written, total_size):
8132
  """Calculates the ETA based on size written and total size.
8133

8134
  @param time_taken: The time taken so far
8135
  @param written: amount written so far
8136
  @param total_size: The total size of data to be written
8137
  @return: The remaining time in seconds
8138

8139
  """
8140
  avg_time = time_taken / float(written)
8141
  return (total_size - written) * avg_time
8142

    
8143

    
8144
def _WipeDisks(lu, instance):
8145
  """Wipes instance disks.
8146

8147
  @type lu: L{LogicalUnit}
8148
  @param lu: the logical unit on whose behalf we execute
8149
  @type instance: L{objects.Instance}
8150
  @param instance: the instance whose disks we should create
8151
  @return: the success of the wipe
8152

8153
  """
8154
  node = instance.primary_node
8155

    
8156
  for device in instance.disks:
8157
    lu.cfg.SetDiskID(device, node)
8158

    
8159
  logging.info("Pause sync of instance %s disks", instance.name)
8160
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8161

    
8162
  for idx, success in enumerate(result.payload):
8163
    if not success:
8164
      logging.warn("pause-sync of instance %s for disks %d failed",
8165
                   instance.name, idx)
8166

    
8167
  try:
8168
    for idx, device in enumerate(instance.disks):
8169
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8170
      # MAX_WIPE_CHUNK at max
8171
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8172
                            constants.MIN_WIPE_CHUNK_PERCENT)
8173
      # we _must_ make this an int, otherwise rounding errors will
8174
      # occur
8175
      wipe_chunk_size = int(wipe_chunk_size)
8176

    
8177
      lu.LogInfo("* Wiping disk %d", idx)
8178
      logging.info("Wiping disk %d for instance %s, node %s using"
8179
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8180

    
8181
      offset = 0
8182
      size = device.size
8183
      last_output = 0
8184
      start_time = time.time()
8185

    
8186
      while offset < size:
8187
        wipe_size = min(wipe_chunk_size, size - offset)
8188
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8189
                      idx, offset, wipe_size)
8190
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8191
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8192
                     (idx, offset, wipe_size))
8193
        now = time.time()
8194
        offset += wipe_size
8195
        if now - last_output >= 60:
8196
          eta = _CalcEta(now - start_time, offset, size)
8197
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8198
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8199
          last_output = now
8200
  finally:
8201
    logging.info("Resume sync of instance %s disks", instance.name)
8202

    
8203
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8204

    
8205
    for idx, success in enumerate(result.payload):
8206
      if not success:
8207
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8208
                      " look at the status and troubleshoot the issue", idx)
8209
        logging.warn("resume-sync of instance %s for disks %d failed",
8210
                     instance.name, idx)
8211

    
8212

    
8213
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8214
  """Create all disks for an instance.
8215

8216
  This abstracts away some work from AddInstance.
8217

8218
  @type lu: L{LogicalUnit}
8219
  @param lu: the logical unit on whose behalf we execute
8220
  @type instance: L{objects.Instance}
8221
  @param instance: the instance whose disks we should create
8222
  @type to_skip: list
8223
  @param to_skip: list of indices to skip
8224
  @type target_node: string
8225
  @param target_node: if passed, overrides the target node for creation
8226
  @rtype: boolean
8227
  @return: the success of the creation
8228

8229
  """
8230
  info = _GetInstanceInfoText(instance)
8231
  if target_node is None:
8232
    pnode = instance.primary_node
8233
    all_nodes = instance.all_nodes
8234
  else:
8235
    pnode = target_node
8236
    all_nodes = [pnode]
8237

    
8238
  if instance.disk_template in constants.DTS_FILEBASED:
8239
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8240
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8241

    
8242
    result.Raise("Failed to create directory '%s' on"
8243
                 " node %s" % (file_storage_dir, pnode))
8244

    
8245
  # Note: this needs to be kept in sync with adding of disks in
8246
  # LUInstanceSetParams
8247
  for idx, device in enumerate(instance.disks):
8248
    if to_skip and idx in to_skip:
8249
      continue
8250
    logging.info("Creating volume %s for instance %s",
8251
                 device.iv_name, instance.name)
8252
    #HARDCODE
8253
    for node in all_nodes:
8254
      f_create = node == pnode
8255
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8256

    
8257

    
8258
def _RemoveDisks(lu, instance, target_node=None):
8259
  """Remove all disks for an instance.
8260

8261
  This abstracts away some work from `AddInstance()` and
8262
  `RemoveInstance()`. Note that in case some of the devices couldn't
8263
  be removed, the removal will continue with the other ones (compare
8264
  with `_CreateDisks()`).
8265

8266
  @type lu: L{LogicalUnit}
8267
  @param lu: the logical unit on whose behalf we execute
8268
  @type instance: L{objects.Instance}
8269
  @param instance: the instance whose disks we should remove
8270
  @type target_node: string
8271
  @param target_node: used to override the node on which to remove the disks
8272
  @rtype: boolean
8273
  @return: the success of the removal
8274

8275
  """
8276
  logging.info("Removing block devices for instance %s", instance.name)
8277

    
8278
  all_result = True
8279
  for device in instance.disks:
8280
    if target_node:
8281
      edata = [(target_node, device)]
8282
    else:
8283
      edata = device.ComputeNodeTree(instance.primary_node)
8284
    for node, disk in edata:
8285
      lu.cfg.SetDiskID(disk, node)
8286
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8287
      if msg:
8288
        lu.LogWarning("Could not remove block device %s on node %s,"
8289
                      " continuing anyway: %s", device.iv_name, node, msg)
8290
        all_result = False
8291

    
8292
    # if this is a DRBD disk, return its port to the pool
8293
    if device.dev_type in constants.LDS_DRBD:
8294
      tcp_port = device.logical_id[2]
8295
      lu.cfg.AddTcpUdpPort(tcp_port)
8296

    
8297
  if instance.disk_template == constants.DT_FILE:
8298
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8299
    if target_node:
8300
      tgt = target_node
8301
    else:
8302
      tgt = instance.primary_node
8303
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8304
    if result.fail_msg:
8305
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8306
                    file_storage_dir, instance.primary_node, result.fail_msg)
8307
      all_result = False
8308

    
8309
  return all_result
8310

    
8311

    
8312
def _ComputeDiskSizePerVG(disk_template, disks):
8313
  """Compute disk size requirements in the volume group
8314

8315
  """
8316
  def _compute(disks, payload):
8317
    """Universal algorithm.
8318

8319
    """
8320
    vgs = {}
8321
    for disk in disks:
8322
      vgs[disk[constants.IDISK_VG]] = \
8323
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8324

    
8325
    return vgs
8326

    
8327
  # Required free disk space as a function of disk and swap space
8328
  req_size_dict = {
8329
    constants.DT_DISKLESS: {},
8330
    constants.DT_PLAIN: _compute(disks, 0),
8331
    # 128 MB are added for drbd metadata for each disk
8332
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8333
    constants.DT_FILE: {},
8334
    constants.DT_SHARED_FILE: {},
8335
  }
8336

    
8337
  if disk_template not in req_size_dict:
8338
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8339
                                 " is unknown" % disk_template)
8340

    
8341
  return req_size_dict[disk_template]
8342

    
8343

    
8344
def _ComputeDiskSize(disk_template, disks):
8345
  """Compute disk size requirements in the volume group
8346

8347
  """
8348
  # Required free disk space as a function of disk and swap space
8349
  req_size_dict = {
8350
    constants.DT_DISKLESS: None,
8351
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8352
    # 128 MB are added for drbd metadata for each disk
8353
    constants.DT_DRBD8:
8354
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8355
    constants.DT_FILE: None,
8356
    constants.DT_SHARED_FILE: 0,
8357
    constants.DT_BLOCK: 0,
8358
  }
8359

    
8360
  if disk_template not in req_size_dict:
8361
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8362
                                 " is unknown" % disk_template)
8363

    
8364
  return req_size_dict[disk_template]
8365

    
8366

    
8367
def _FilterVmNodes(lu, nodenames):
8368
  """Filters out non-vm_capable nodes from a list.
8369

8370
  @type lu: L{LogicalUnit}
8371
  @param lu: the logical unit for which we check
8372
  @type nodenames: list
8373
  @param nodenames: the list of nodes on which we should check
8374
  @rtype: list
8375
  @return: the list of vm-capable nodes
8376

8377
  """
8378
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8379
  return [name for name in nodenames if name not in vm_nodes]
8380

    
8381

    
8382
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8383
  """Hypervisor parameter validation.
8384

8385
  This function abstract the hypervisor parameter validation to be
8386
  used in both instance create and instance modify.
8387

8388
  @type lu: L{LogicalUnit}
8389
  @param lu: the logical unit for which we check
8390
  @type nodenames: list
8391
  @param nodenames: the list of nodes on which we should check
8392
  @type hvname: string
8393
  @param hvname: the name of the hypervisor we should use
8394
  @type hvparams: dict
8395
  @param hvparams: the parameters which we need to check
8396
  @raise errors.OpPrereqError: if the parameters are not valid
8397

8398
  """
8399
  nodenames = _FilterVmNodes(lu, nodenames)
8400

    
8401
  cluster = lu.cfg.GetClusterInfo()
8402
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8403

    
8404
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8405
  for node in nodenames:
8406
    info = hvinfo[node]
8407
    if info.offline:
8408
      continue
8409
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8410

    
8411

    
8412
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8413
  """OS parameters validation.
8414

8415
  @type lu: L{LogicalUnit}
8416
  @param lu: the logical unit for which we check
8417
  @type required: boolean
8418
  @param required: whether the validation should fail if the OS is not
8419
      found
8420
  @type nodenames: list
8421
  @param nodenames: the list of nodes on which we should check
8422
  @type osname: string
8423
  @param osname: the name of the hypervisor we should use
8424
  @type osparams: dict
8425
  @param osparams: the parameters which we need to check
8426
  @raise errors.OpPrereqError: if the parameters are not valid
8427

8428
  """
8429
  nodenames = _FilterVmNodes(lu, nodenames)
8430
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8431
                                   [constants.OS_VALIDATE_PARAMETERS],
8432
                                   osparams)
8433
  for node, nres in result.items():
8434
    # we don't check for offline cases since this should be run only
8435
    # against the master node and/or an instance's nodes
8436
    nres.Raise("OS Parameters validation failed on node %s" % node)
8437
    if not nres.payload:
8438
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8439
                 osname, node)
8440

    
8441

    
8442
class LUInstanceCreate(LogicalUnit):
8443
  """Create an instance.
8444

8445
  """
8446
  HPATH = "instance-add"
8447
  HTYPE = constants.HTYPE_INSTANCE
8448
  REQ_BGL = False
8449

    
8450
  def CheckArguments(self):
8451
    """Check arguments.
8452

8453
    """
8454
    # do not require name_check to ease forward/backward compatibility
8455
    # for tools
8456
    if self.op.no_install and self.op.start:
8457
      self.LogInfo("No-installation mode selected, disabling startup")
8458
      self.op.start = False
8459
    # validate/normalize the instance name
8460
    self.op.instance_name = \
8461
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8462

    
8463
    if self.op.ip_check and not self.op.name_check:
8464
      # TODO: make the ip check more flexible and not depend on the name check
8465
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8466
                                 " check", errors.ECODE_INVAL)
8467

    
8468
    # check nics' parameter names
8469
    for nic in self.op.nics:
8470
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8471

    
8472
    # check disks. parameter names and consistent adopt/no-adopt strategy
8473
    has_adopt = has_no_adopt = False
8474
    for disk in self.op.disks:
8475
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8476
      if constants.IDISK_ADOPT in disk:
8477
        has_adopt = True
8478
      else:
8479
        has_no_adopt = True
8480
    if has_adopt and has_no_adopt:
8481
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8482
                                 errors.ECODE_INVAL)
8483
    if has_adopt:
8484
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8485
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8486
                                   " '%s' disk template" %
8487
                                   self.op.disk_template,
8488
                                   errors.ECODE_INVAL)
8489
      if self.op.iallocator is not None:
8490
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8491
                                   " iallocator script", errors.ECODE_INVAL)
8492
      if self.op.mode == constants.INSTANCE_IMPORT:
8493
        raise errors.OpPrereqError("Disk adoption not allowed for"
8494
                                   " instance import", errors.ECODE_INVAL)
8495
    else:
8496
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8497
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8498
                                   " but no 'adopt' parameter given" %
8499
                                   self.op.disk_template,
8500
                                   errors.ECODE_INVAL)
8501

    
8502
    self.adopt_disks = has_adopt
8503

    
8504
    # instance name verification
8505
    if self.op.name_check:
8506
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8507
      self.op.instance_name = self.hostname1.name
8508
      # used in CheckPrereq for ip ping check
8509
      self.check_ip = self.hostname1.ip
8510
    else:
8511
      self.check_ip = None
8512

    
8513
    # file storage checks
8514
    if (self.op.file_driver and
8515
        not self.op.file_driver in constants.FILE_DRIVER):
8516
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8517
                                 self.op.file_driver, errors.ECODE_INVAL)
8518

    
8519
    if self.op.disk_template == constants.DT_FILE:
8520
      opcodes.RequireFileStorage()
8521
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8522
      opcodes.RequireSharedFileStorage()
8523

    
8524
    ### Node/iallocator related checks
8525
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8526

    
8527
    if self.op.pnode is not None:
8528
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8529
        if self.op.snode is None:
8530
          raise errors.OpPrereqError("The networked disk templates need"
8531
                                     " a mirror node", errors.ECODE_INVAL)
8532
      elif self.op.snode:
8533
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8534
                        " template")
8535
        self.op.snode = None
8536

    
8537
    self._cds = _GetClusterDomainSecret()
8538

    
8539
    if self.op.mode == constants.INSTANCE_IMPORT:
8540
      # On import force_variant must be True, because if we forced it at
8541
      # initial install, our only chance when importing it back is that it
8542
      # works again!
8543
      self.op.force_variant = True
8544

    
8545
      if self.op.no_install:
8546
        self.LogInfo("No-installation mode has no effect during import")
8547

    
8548
    elif self.op.mode == constants.INSTANCE_CREATE:
8549
      if self.op.os_type is None:
8550
        raise errors.OpPrereqError("No guest OS specified",
8551
                                   errors.ECODE_INVAL)
8552
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8553
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8554
                                   " installation" % self.op.os_type,
8555
                                   errors.ECODE_STATE)
8556
      if self.op.disk_template is None:
8557
        raise errors.OpPrereqError("No disk template specified",
8558
                                   errors.ECODE_INVAL)
8559

    
8560
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8561
      # Check handshake to ensure both clusters have the same domain secret
8562
      src_handshake = self.op.source_handshake
8563
      if not src_handshake:
8564
        raise errors.OpPrereqError("Missing source handshake",
8565
                                   errors.ECODE_INVAL)
8566

    
8567
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8568
                                                           src_handshake)
8569
      if errmsg:
8570
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8571
                                   errors.ECODE_INVAL)
8572

    
8573
      # Load and check source CA
8574
      self.source_x509_ca_pem = self.op.source_x509_ca
8575
      if not self.source_x509_ca_pem:
8576
        raise errors.OpPrereqError("Missing source X509 CA",
8577
                                   errors.ECODE_INVAL)
8578

    
8579
      try:
8580
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8581
                                                    self._cds)
8582
      except OpenSSL.crypto.Error, err:
8583
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8584
                                   (err, ), errors.ECODE_INVAL)
8585

    
8586
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8587
      if errcode is not None:
8588
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8589
                                   errors.ECODE_INVAL)
8590

    
8591
      self.source_x509_ca = cert
8592

    
8593
      src_instance_name = self.op.source_instance_name
8594
      if not src_instance_name:
8595
        raise errors.OpPrereqError("Missing source instance name",
8596
                                   errors.ECODE_INVAL)
8597

    
8598
      self.source_instance_name = \
8599
          netutils.GetHostname(name=src_instance_name).name
8600

    
8601
    else:
8602
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8603
                                 self.op.mode, errors.ECODE_INVAL)
8604

    
8605
  def ExpandNames(self):
8606
    """ExpandNames for CreateInstance.
8607

8608
    Figure out the right locks for instance creation.
8609

8610
    """
8611
    self.needed_locks = {}
8612

    
8613
    instance_name = self.op.instance_name
8614
    # this is just a preventive check, but someone might still add this
8615
    # instance in the meantime, and creation will fail at lock-add time
8616
    if instance_name in self.cfg.GetInstanceList():
8617
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8618
                                 instance_name, errors.ECODE_EXISTS)
8619

    
8620
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8621

    
8622
    if self.op.iallocator:
8623
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8624
      # specifying a group on instance creation and then selecting nodes from
8625
      # that group
8626
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8627
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8628
    else:
8629
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8630
      nodelist = [self.op.pnode]
8631
      if self.op.snode is not None:
8632
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8633
        nodelist.append(self.op.snode)
8634
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8635
      # Lock resources of instance's primary and secondary nodes (copy to
8636
      # prevent accidential modification)
8637
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8638

    
8639
    # in case of import lock the source node too
8640
    if self.op.mode == constants.INSTANCE_IMPORT:
8641
      src_node = self.op.src_node
8642
      src_path = self.op.src_path
8643

    
8644
      if src_path is None:
8645
        self.op.src_path = src_path = self.op.instance_name
8646

    
8647
      if src_node is None:
8648
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8649
        self.op.src_node = None
8650
        if os.path.isabs(src_path):
8651
          raise errors.OpPrereqError("Importing an instance from a path"
8652
                                     " requires a source node option",
8653
                                     errors.ECODE_INVAL)
8654
      else:
8655
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8656
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8657
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8658
        if not os.path.isabs(src_path):
8659
          self.op.src_path = src_path = \
8660
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8661

    
8662
  def _RunAllocator(self):
8663
    """Run the allocator based on input opcode.
8664

8665
    """
8666
    nics = [n.ToDict() for n in self.nics]
8667
    ial = IAllocator(self.cfg, self.rpc,
8668
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8669
                     name=self.op.instance_name,
8670
                     disk_template=self.op.disk_template,
8671
                     tags=self.op.tags,
8672
                     os=self.op.os_type,
8673
                     vcpus=self.be_full[constants.BE_VCPUS],
8674
                     memory=self.be_full[constants.BE_MEMORY],
8675
                     disks=self.disks,
8676
                     nics=nics,
8677
                     hypervisor=self.op.hypervisor,
8678
                     )
8679

    
8680
    ial.Run(self.op.iallocator)
8681

    
8682
    if not ial.success:
8683
      raise errors.OpPrereqError("Can't compute nodes using"
8684
                                 " iallocator '%s': %s" %
8685
                                 (self.op.iallocator, ial.info),
8686
                                 errors.ECODE_NORES)
8687
    if len(ial.result) != ial.required_nodes:
8688
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8689
                                 " of nodes (%s), required %s" %
8690
                                 (self.op.iallocator, len(ial.result),
8691
                                  ial.required_nodes), errors.ECODE_FAULT)
8692
    self.op.pnode = ial.result[0]
8693
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8694
                 self.op.instance_name, self.op.iallocator,
8695
                 utils.CommaJoin(ial.result))
8696
    if ial.required_nodes == 2:
8697
      self.op.snode = ial.result[1]
8698

    
8699
  def BuildHooksEnv(self):
8700
    """Build hooks env.
8701

8702
    This runs on master, primary and secondary nodes of the instance.
8703

8704
    """
8705
    env = {
8706
      "ADD_MODE": self.op.mode,
8707
      }
8708
    if self.op.mode == constants.INSTANCE_IMPORT:
8709
      env["SRC_NODE"] = self.op.src_node
8710
      env["SRC_PATH"] = self.op.src_path
8711
      env["SRC_IMAGES"] = self.src_images
8712

    
8713
    env.update(_BuildInstanceHookEnv(
8714
      name=self.op.instance_name,
8715
      primary_node=self.op.pnode,
8716
      secondary_nodes=self.secondaries,
8717
      status=self.op.start,
8718
      os_type=self.op.os_type,
8719
      memory=self.be_full[constants.BE_MEMORY],
8720
      vcpus=self.be_full[constants.BE_VCPUS],
8721
      nics=_NICListToTuple(self, self.nics),
8722
      disk_template=self.op.disk_template,
8723
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8724
             for d in self.disks],
8725
      bep=self.be_full,
8726
      hvp=self.hv_full,
8727
      hypervisor_name=self.op.hypervisor,
8728
      tags=self.op.tags,
8729
    ))
8730

    
8731
    return env
8732

    
8733
  def BuildHooksNodes(self):
8734
    """Build hooks nodes.
8735

8736
    """
8737
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8738
    return nl, nl
8739

    
8740
  def _ReadExportInfo(self):
8741
    """Reads the export information from disk.
8742

8743
    It will override the opcode source node and path with the actual
8744
    information, if these two were not specified before.
8745

8746
    @return: the export information
8747

8748
    """
8749
    assert self.op.mode == constants.INSTANCE_IMPORT
8750

    
8751
    src_node = self.op.src_node
8752
    src_path = self.op.src_path
8753

    
8754
    if src_node is None:
8755
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8756
      exp_list = self.rpc.call_export_list(locked_nodes)
8757
      found = False
8758
      for node in exp_list:
8759
        if exp_list[node].fail_msg:
8760
          continue
8761
        if src_path in exp_list[node].payload:
8762
          found = True
8763
          self.op.src_node = src_node = node
8764
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8765
                                                       src_path)
8766
          break
8767
      if not found:
8768
        raise errors.OpPrereqError("No export found for relative path %s" %
8769
                                    src_path, errors.ECODE_INVAL)
8770

    
8771
    _CheckNodeOnline(self, src_node)
8772
    result = self.rpc.call_export_info(src_node, src_path)
8773
    result.Raise("No export or invalid export found in dir %s" % src_path)
8774

    
8775
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8776
    if not export_info.has_section(constants.INISECT_EXP):
8777
      raise errors.ProgrammerError("Corrupted export config",
8778
                                   errors.ECODE_ENVIRON)
8779

    
8780
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8781
    if (int(ei_version) != constants.EXPORT_VERSION):
8782
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8783
                                 (ei_version, constants.EXPORT_VERSION),
8784
                                 errors.ECODE_ENVIRON)
8785
    return export_info
8786

    
8787
  def _ReadExportParams(self, einfo):
8788
    """Use export parameters as defaults.
8789

8790
    In case the opcode doesn't specify (as in override) some instance
8791
    parameters, then try to use them from the export information, if
8792
    that declares them.
8793

8794
    """
8795
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8796

    
8797
    if self.op.disk_template is None:
8798
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8799
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8800
                                          "disk_template")
8801
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8802
          raise errors.OpPrereqError("Disk template specified in configuration"
8803
                                     " file is not one of the allowed values:"
8804
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8805
      else:
8806
        raise errors.OpPrereqError("No disk template specified and the export"
8807
                                   " is missing the disk_template information",
8808
                                   errors.ECODE_INVAL)
8809

    
8810
    if not self.op.disks:
8811
      disks = []
8812
      # TODO: import the disk iv_name too
8813
      for idx in range(constants.MAX_DISKS):
8814
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8815
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8816
          disks.append({constants.IDISK_SIZE: disk_sz})
8817
      self.op.disks = disks
8818
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8819
        raise errors.OpPrereqError("No disk info specified and the export"
8820
                                   " is missing the disk information",
8821
                                   errors.ECODE_INVAL)
8822

    
8823
    if not self.op.nics:
8824
      nics = []
8825
      for idx in range(constants.MAX_NICS):
8826
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8827
          ndict = {}
8828
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8829
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8830
            ndict[name] = v
8831
          nics.append(ndict)
8832
        else:
8833
          break
8834
      self.op.nics = nics
8835

    
8836
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8837
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8838

    
8839
    if (self.op.hypervisor is None and
8840
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8841
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8842

    
8843
    if einfo.has_section(constants.INISECT_HYP):
8844
      # use the export parameters but do not override the ones
8845
      # specified by the user
8846
      for name, value in einfo.items(constants.INISECT_HYP):
8847
        if name not in self.op.hvparams:
8848
          self.op.hvparams[name] = value
8849

    
8850
    if einfo.has_section(constants.INISECT_BEP):
8851
      # use the parameters, without overriding
8852
      for name, value in einfo.items(constants.INISECT_BEP):
8853
        if name not in self.op.beparams:
8854
          self.op.beparams[name] = value
8855
    else:
8856
      # try to read the parameters old style, from the main section
8857
      for name in constants.BES_PARAMETERS:
8858
        if (name not in self.op.beparams and
8859
            einfo.has_option(constants.INISECT_INS, name)):
8860
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8861

    
8862
    if einfo.has_section(constants.INISECT_OSP):
8863
      # use the parameters, without overriding
8864
      for name, value in einfo.items(constants.INISECT_OSP):
8865
        if name not in self.op.osparams:
8866
          self.op.osparams[name] = value
8867

    
8868
  def _RevertToDefaults(self, cluster):
8869
    """Revert the instance parameters to the default values.
8870

8871
    """
8872
    # hvparams
8873
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8874
    for name in self.op.hvparams.keys():
8875
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8876
        del self.op.hvparams[name]
8877
    # beparams
8878
    be_defs = cluster.SimpleFillBE({})
8879
    for name in self.op.beparams.keys():
8880
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8881
        del self.op.beparams[name]
8882
    # nic params
8883
    nic_defs = cluster.SimpleFillNIC({})
8884
    for nic in self.op.nics:
8885
      for name in constants.NICS_PARAMETERS:
8886
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8887
          del nic[name]
8888
    # osparams
8889
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8890
    for name in self.op.osparams.keys():
8891
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8892
        del self.op.osparams[name]
8893

    
8894
  def _CalculateFileStorageDir(self):
8895
    """Calculate final instance file storage dir.
8896

8897
    """
8898
    # file storage dir calculation/check
8899
    self.instance_file_storage_dir = None
8900
    if self.op.disk_template in constants.DTS_FILEBASED:
8901
      # build the full file storage dir path
8902
      joinargs = []
8903

    
8904
      if self.op.disk_template == constants.DT_SHARED_FILE:
8905
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8906
      else:
8907
        get_fsd_fn = self.cfg.GetFileStorageDir
8908

    
8909
      cfg_storagedir = get_fsd_fn()
8910
      if not cfg_storagedir:
8911
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8912
      joinargs.append(cfg_storagedir)
8913

    
8914
      if self.op.file_storage_dir is not None:
8915
        joinargs.append(self.op.file_storage_dir)
8916

    
8917
      joinargs.append(self.op.instance_name)
8918

    
8919
      # pylint: disable=W0142
8920
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8921

    
8922
  def CheckPrereq(self):
8923
    """Check prerequisites.
8924

8925
    """
8926
    self._CalculateFileStorageDir()
8927

    
8928
    if self.op.mode == constants.INSTANCE_IMPORT:
8929
      export_info = self._ReadExportInfo()
8930
      self._ReadExportParams(export_info)
8931

    
8932
    if (not self.cfg.GetVGName() and
8933
        self.op.disk_template not in constants.DTS_NOT_LVM):
8934
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8935
                                 " instances", errors.ECODE_STATE)
8936

    
8937
    if (self.op.hypervisor is None or
8938
        self.op.hypervisor == constants.VALUE_AUTO):
8939
      self.op.hypervisor = self.cfg.GetHypervisorType()
8940

    
8941
    cluster = self.cfg.GetClusterInfo()
8942
    enabled_hvs = cluster.enabled_hypervisors
8943
    if self.op.hypervisor not in enabled_hvs:
8944
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8945
                                 " cluster (%s)" % (self.op.hypervisor,
8946
                                  ",".join(enabled_hvs)),
8947
                                 errors.ECODE_STATE)
8948

    
8949
    # Check tag validity
8950
    for tag in self.op.tags:
8951
      objects.TaggableObject.ValidateTag(tag)
8952

    
8953
    # check hypervisor parameter syntax (locally)
8954
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8955
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8956
                                      self.op.hvparams)
8957
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8958
    hv_type.CheckParameterSyntax(filled_hvp)
8959
    self.hv_full = filled_hvp
8960
    # check that we don't specify global parameters on an instance
8961
    _CheckGlobalHvParams(self.op.hvparams)
8962

    
8963
    # fill and remember the beparams dict
8964
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8965
    for param, value in self.op.beparams.iteritems():
8966
      if value == constants.VALUE_AUTO:
8967
        self.op.beparams[param] = default_beparams[param]
8968
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8969
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8970

    
8971
    # build os parameters
8972
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8973

    
8974
    # now that hvp/bep are in final format, let's reset to defaults,
8975
    # if told to do so
8976
    if self.op.identify_defaults:
8977
      self._RevertToDefaults(cluster)
8978

    
8979
    # NIC buildup
8980
    self.nics = []
8981
    for idx, nic in enumerate(self.op.nics):
8982
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8983
      nic_mode = nic_mode_req
8984
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8985
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8986

    
8987
      # in routed mode, for the first nic, the default ip is 'auto'
8988
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8989
        default_ip_mode = constants.VALUE_AUTO
8990
      else:
8991
        default_ip_mode = constants.VALUE_NONE
8992

    
8993
      # ip validity checks
8994
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8995
      if ip is None or ip.lower() == constants.VALUE_NONE:
8996
        nic_ip = None
8997
      elif ip.lower() == constants.VALUE_AUTO:
8998
        if not self.op.name_check:
8999
          raise errors.OpPrereqError("IP address set to auto but name checks"
9000
                                     " have been skipped",
9001
                                     errors.ECODE_INVAL)
9002
        nic_ip = self.hostname1.ip
9003
      else:
9004
        if not netutils.IPAddress.IsValid(ip):
9005
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9006
                                     errors.ECODE_INVAL)
9007
        nic_ip = ip
9008

    
9009
      # TODO: check the ip address for uniqueness
9010
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9011
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
9012
                                   errors.ECODE_INVAL)
9013

    
9014
      # MAC address verification
9015
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9016
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9017
        mac = utils.NormalizeAndValidateMac(mac)
9018

    
9019
        try:
9020
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9021
        except errors.ReservationError:
9022
          raise errors.OpPrereqError("MAC address %s already in use"
9023
                                     " in cluster" % mac,
9024
                                     errors.ECODE_NOTUNIQUE)
9025

    
9026
      #  Build nic parameters
9027
      link = nic.get(constants.INIC_LINK, None)
9028
      if link == constants.VALUE_AUTO:
9029
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9030
      nicparams = {}
9031
      if nic_mode_req:
9032
        nicparams[constants.NIC_MODE] = nic_mode
9033
      if link:
9034
        nicparams[constants.NIC_LINK] = link
9035

    
9036
      check_params = cluster.SimpleFillNIC(nicparams)
9037
      objects.NIC.CheckParameterSyntax(check_params)
9038
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9039

    
9040
    # disk checks/pre-build
9041
    default_vg = self.cfg.GetVGName()
9042
    self.disks = []
9043
    for disk in self.op.disks:
9044
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9045
      if mode not in constants.DISK_ACCESS_SET:
9046
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9047
                                   mode, errors.ECODE_INVAL)
9048
      size = disk.get(constants.IDISK_SIZE, None)
9049
      if size is None:
9050
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9051
      try:
9052
        size = int(size)
9053
      except (TypeError, ValueError):
9054
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9055
                                   errors.ECODE_INVAL)
9056

    
9057
      data_vg = disk.get(constants.IDISK_VG, default_vg)
9058
      new_disk = {
9059
        constants.IDISK_SIZE: size,
9060
        constants.IDISK_MODE: mode,
9061
        constants.IDISK_VG: data_vg,
9062
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9063
        }
9064
      if constants.IDISK_ADOPT in disk:
9065
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9066
      self.disks.append(new_disk)
9067

    
9068
    if self.op.mode == constants.INSTANCE_IMPORT:
9069
      disk_images = []
9070
      for idx in range(len(self.disks)):
9071
        option = "disk%d_dump" % idx
9072
        if export_info.has_option(constants.INISECT_INS, option):
9073
          # FIXME: are the old os-es, disk sizes, etc. useful?
9074
          export_name = export_info.get(constants.INISECT_INS, option)
9075
          image = utils.PathJoin(self.op.src_path, export_name)
9076
          disk_images.append(image)
9077
        else:
9078
          disk_images.append(False)
9079

    
9080
      self.src_images = disk_images
9081

    
9082
      old_name = export_info.get(constants.INISECT_INS, "name")
9083
      if self.op.instance_name == old_name:
9084
        for idx, nic in enumerate(self.nics):
9085
          if nic.mac == constants.VALUE_AUTO:
9086
            nic_mac_ini = "nic%d_mac" % idx
9087
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9088

    
9089
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9090

    
9091
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9092
    if self.op.ip_check:
9093
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9094
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9095
                                   (self.check_ip, self.op.instance_name),
9096
                                   errors.ECODE_NOTUNIQUE)
9097

    
9098
    #### mac address generation
9099
    # By generating here the mac address both the allocator and the hooks get
9100
    # the real final mac address rather than the 'auto' or 'generate' value.
9101
    # There is a race condition between the generation and the instance object
9102
    # creation, which means that we know the mac is valid now, but we're not
9103
    # sure it will be when we actually add the instance. If things go bad
9104
    # adding the instance will abort because of a duplicate mac, and the
9105
    # creation job will fail.
9106
    for nic in self.nics:
9107
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9108
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9109

    
9110
    #### allocator run
9111

    
9112
    if self.op.iallocator is not None:
9113
      self._RunAllocator()
9114

    
9115
    # Release all unneeded node locks
9116
    _ReleaseLocks(self, locking.LEVEL_NODE,
9117
                  keep=filter(None, [self.op.pnode, self.op.snode,
9118
                                     self.op.src_node]))
9119

    
9120
    #### node related checks
9121

    
9122
    # check primary node
9123
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9124
    assert self.pnode is not None, \
9125
      "Cannot retrieve locked node %s" % self.op.pnode
9126
    if pnode.offline:
9127
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9128
                                 pnode.name, errors.ECODE_STATE)
9129
    if pnode.drained:
9130
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9131
                                 pnode.name, errors.ECODE_STATE)
9132
    if not pnode.vm_capable:
9133
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9134
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9135

    
9136
    self.secondaries = []
9137

    
9138
    # mirror node verification
9139
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9140
      if self.op.snode == pnode.name:
9141
        raise errors.OpPrereqError("The secondary node cannot be the"
9142
                                   " primary node", errors.ECODE_INVAL)
9143
      _CheckNodeOnline(self, self.op.snode)
9144
      _CheckNodeNotDrained(self, self.op.snode)
9145
      _CheckNodeVmCapable(self, self.op.snode)
9146
      self.secondaries.append(self.op.snode)
9147

    
9148
    nodenames = [pnode.name] + self.secondaries
9149

    
9150
    if not self.adopt_disks:
9151
      # Check lv size requirements, if not adopting
9152
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9153
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9154

    
9155
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9156
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9157
                                disk[constants.IDISK_ADOPT])
9158
                     for disk in self.disks])
9159
      if len(all_lvs) != len(self.disks):
9160
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9161
                                   errors.ECODE_INVAL)
9162
      for lv_name in all_lvs:
9163
        try:
9164
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9165
          # to ReserveLV uses the same syntax
9166
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9167
        except errors.ReservationError:
9168
          raise errors.OpPrereqError("LV named %s used by another instance" %
9169
                                     lv_name, errors.ECODE_NOTUNIQUE)
9170

    
9171
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9172
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9173

    
9174
      node_lvs = self.rpc.call_lv_list([pnode.name],
9175
                                       vg_names.payload.keys())[pnode.name]
9176
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9177
      node_lvs = node_lvs.payload
9178

    
9179
      delta = all_lvs.difference(node_lvs.keys())
9180
      if delta:
9181
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9182
                                   utils.CommaJoin(delta),
9183
                                   errors.ECODE_INVAL)
9184
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9185
      if online_lvs:
9186
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9187
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9188
                                   errors.ECODE_STATE)
9189
      # update the size of disk based on what is found
9190
      for dsk in self.disks:
9191
        dsk[constants.IDISK_SIZE] = \
9192
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9193
                                        dsk[constants.IDISK_ADOPT])][0]))
9194

    
9195
    elif self.op.disk_template == constants.DT_BLOCK:
9196
      # Normalize and de-duplicate device paths
9197
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9198
                       for disk in self.disks])
9199
      if len(all_disks) != len(self.disks):
9200
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9201
                                   errors.ECODE_INVAL)
9202
      baddisks = [d for d in all_disks
9203
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9204
      if baddisks:
9205
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9206
                                   " cannot be adopted" %
9207
                                   (", ".join(baddisks),
9208
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9209
                                   errors.ECODE_INVAL)
9210

    
9211
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9212
                                            list(all_disks))[pnode.name]
9213
      node_disks.Raise("Cannot get block device information from node %s" %
9214
                       pnode.name)
9215
      node_disks = node_disks.payload
9216
      delta = all_disks.difference(node_disks.keys())
9217
      if delta:
9218
        raise errors.OpPrereqError("Missing block device(s): %s" %
9219
                                   utils.CommaJoin(delta),
9220
                                   errors.ECODE_INVAL)
9221
      for dsk in self.disks:
9222
        dsk[constants.IDISK_SIZE] = \
9223
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9224

    
9225
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9226

    
9227
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9228
    # check OS parameters (remotely)
9229
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9230

    
9231
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9232

    
9233
    # memory check on primary node
9234
    if self.op.start:
9235
      _CheckNodeFreeMemory(self, self.pnode.name,
9236
                           "creating instance %s" % self.op.instance_name,
9237
                           self.be_full[constants.BE_MEMORY],
9238
                           self.op.hypervisor)
9239

    
9240
    self.dry_run_result = list(nodenames)
9241

    
9242
  def Exec(self, feedback_fn):
9243
    """Create and add the instance to the cluster.
9244

9245
    """
9246
    instance = self.op.instance_name
9247
    pnode_name = self.pnode.name
9248

    
9249
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9250
                self.owned_locks(locking.LEVEL_NODE)), \
9251
      "Node locks differ from node resource locks"
9252

    
9253
    ht_kind = self.op.hypervisor
9254
    if ht_kind in constants.HTS_REQ_PORT:
9255
      network_port = self.cfg.AllocatePort()
9256
    else:
9257
      network_port = None
9258

    
9259
    disks = _GenerateDiskTemplate(self,
9260
                                  self.op.disk_template,
9261
                                  instance, pnode_name,
9262
                                  self.secondaries,
9263
                                  self.disks,
9264
                                  self.instance_file_storage_dir,
9265
                                  self.op.file_driver,
9266
                                  0,
9267
                                  feedback_fn)
9268

    
9269
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9270
                            primary_node=pnode_name,
9271
                            nics=self.nics, disks=disks,
9272
                            disk_template=self.op.disk_template,
9273
                            admin_state=constants.ADMINST_DOWN,
9274
                            network_port=network_port,
9275
                            beparams=self.op.beparams,
9276
                            hvparams=self.op.hvparams,
9277
                            hypervisor=self.op.hypervisor,
9278
                            osparams=self.op.osparams,
9279
                            )
9280

    
9281
    if self.op.tags:
9282
      for tag in self.op.tags:
9283
        iobj.AddTag(tag)
9284

    
9285
    if self.adopt_disks:
9286
      if self.op.disk_template == constants.DT_PLAIN:
9287
        # rename LVs to the newly-generated names; we need to construct
9288
        # 'fake' LV disks with the old data, plus the new unique_id
9289
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9290
        rename_to = []
9291
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9292
          rename_to.append(t_dsk.logical_id)
9293
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9294
          self.cfg.SetDiskID(t_dsk, pnode_name)
9295
        result = self.rpc.call_blockdev_rename(pnode_name,
9296
                                               zip(tmp_disks, rename_to))
9297
        result.Raise("Failed to rename adoped LVs")
9298
    else:
9299
      feedback_fn("* creating instance disks...")
9300
      try:
9301
        _CreateDisks(self, iobj)
9302
      except errors.OpExecError:
9303
        self.LogWarning("Device creation failed, reverting...")
9304
        try:
9305
          _RemoveDisks(self, iobj)
9306
        finally:
9307
          self.cfg.ReleaseDRBDMinors(instance)
9308
          raise
9309

    
9310
    feedback_fn("adding instance %s to cluster config" % instance)
9311

    
9312
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9313

    
9314
    # Declare that we don't want to remove the instance lock anymore, as we've
9315
    # added the instance to the config
9316
    del self.remove_locks[locking.LEVEL_INSTANCE]
9317

    
9318
    if self.op.mode == constants.INSTANCE_IMPORT:
9319
      # Release unused nodes
9320
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9321
    else:
9322
      # Release all nodes
9323
      _ReleaseLocks(self, locking.LEVEL_NODE)
9324

    
9325
    disk_abort = False
9326
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9327
      feedback_fn("* wiping instance disks...")
9328
      try:
9329
        _WipeDisks(self, iobj)
9330
      except errors.OpExecError, err:
9331
        logging.exception("Wiping disks failed")
9332
        self.LogWarning("Wiping instance disks failed (%s)", err)
9333
        disk_abort = True
9334

    
9335
    if disk_abort:
9336
      # Something is already wrong with the disks, don't do anything else
9337
      pass
9338
    elif self.op.wait_for_sync:
9339
      disk_abort = not _WaitForSync(self, iobj)
9340
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9341
      # make sure the disks are not degraded (still sync-ing is ok)
9342
      feedback_fn("* checking mirrors status")
9343
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9344
    else:
9345
      disk_abort = False
9346

    
9347
    if disk_abort:
9348
      _RemoveDisks(self, iobj)
9349
      self.cfg.RemoveInstance(iobj.name)
9350
      # Make sure the instance lock gets removed
9351
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9352
      raise errors.OpExecError("There are some degraded disks for"
9353
                               " this instance")
9354

    
9355
    # Release all node resource locks
9356
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9357

    
9358
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9359
      if self.op.mode == constants.INSTANCE_CREATE:
9360
        if not self.op.no_install:
9361
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9362
                        not self.op.wait_for_sync)
9363
          if pause_sync:
9364
            feedback_fn("* pausing disk sync to install instance OS")
9365
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9366
                                                              iobj.disks, True)
9367
            for idx, success in enumerate(result.payload):
9368
              if not success:
9369
                logging.warn("pause-sync of instance %s for disk %d failed",
9370
                             instance, idx)
9371

    
9372
          feedback_fn("* running the instance OS create scripts...")
9373
          # FIXME: pass debug option from opcode to backend
9374
          os_add_result = \
9375
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9376
                                          self.op.debug_level)
9377
          if pause_sync:
9378
            feedback_fn("* resuming disk sync")
9379
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9380
                                                              iobj.disks, False)
9381
            for idx, success in enumerate(result.payload):
9382
              if not success:
9383
                logging.warn("resume-sync of instance %s for disk %d failed",
9384
                             instance, idx)
9385

    
9386
          os_add_result.Raise("Could not add os for instance %s"
9387
                              " on node %s" % (instance, pnode_name))
9388

    
9389
      elif self.op.mode == constants.INSTANCE_IMPORT:
9390
        feedback_fn("* running the instance OS import scripts...")
9391

    
9392
        transfers = []
9393

    
9394
        for idx, image in enumerate(self.src_images):
9395
          if not image:
9396
            continue
9397

    
9398
          # FIXME: pass debug option from opcode to backend
9399
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9400
                                             constants.IEIO_FILE, (image, ),
9401
                                             constants.IEIO_SCRIPT,
9402
                                             (iobj.disks[idx], idx),
9403
                                             None)
9404
          transfers.append(dt)
9405

    
9406
        import_result = \
9407
          masterd.instance.TransferInstanceData(self, feedback_fn,
9408
                                                self.op.src_node, pnode_name,
9409
                                                self.pnode.secondary_ip,
9410
                                                iobj, transfers)
9411
        if not compat.all(import_result):
9412
          self.LogWarning("Some disks for instance %s on node %s were not"
9413
                          " imported successfully" % (instance, pnode_name))
9414

    
9415
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9416
        feedback_fn("* preparing remote import...")
9417
        # The source cluster will stop the instance before attempting to make a
9418
        # connection. In some cases stopping an instance can take a long time,
9419
        # hence the shutdown timeout is added to the connection timeout.
9420
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9421
                           self.op.source_shutdown_timeout)
9422
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9423

    
9424
        assert iobj.primary_node == self.pnode.name
9425
        disk_results = \
9426
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9427
                                        self.source_x509_ca,
9428
                                        self._cds, timeouts)
9429
        if not compat.all(disk_results):
9430
          # TODO: Should the instance still be started, even if some disks
9431
          # failed to import (valid for local imports, too)?
9432
          self.LogWarning("Some disks for instance %s on node %s were not"
9433
                          " imported successfully" % (instance, pnode_name))
9434

    
9435
        # Run rename script on newly imported instance
9436
        assert iobj.name == instance
9437
        feedback_fn("Running rename script for %s" % instance)
9438
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9439
                                                   self.source_instance_name,
9440
                                                   self.op.debug_level)
9441
        if result.fail_msg:
9442
          self.LogWarning("Failed to run rename script for %s on node"
9443
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9444

    
9445
      else:
9446
        # also checked in the prereq part
9447
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9448
                                     % self.op.mode)
9449

    
9450
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9451

    
9452
    if self.op.start:
9453
      iobj.admin_state = constants.ADMINST_UP
9454
      self.cfg.Update(iobj, feedback_fn)
9455
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9456
      feedback_fn("* starting instance...")
9457
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9458
                                            False)
9459
      result.Raise("Could not start instance")
9460

    
9461
    return list(iobj.all_nodes)
9462

    
9463

    
9464
class LUInstanceConsole(NoHooksLU):
9465
  """Connect to an instance's console.
9466

9467
  This is somewhat special in that it returns the command line that
9468
  you need to run on the master node in order to connect to the
9469
  console.
9470

9471
  """
9472
  REQ_BGL = False
9473

    
9474
  def ExpandNames(self):
9475
    self.share_locks = _ShareAll()
9476
    self._ExpandAndLockInstance()
9477

    
9478
  def CheckPrereq(self):
9479
    """Check prerequisites.
9480

9481
    This checks that the instance is in the cluster.
9482

9483
    """
9484
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9485
    assert self.instance is not None, \
9486
      "Cannot retrieve locked instance %s" % self.op.instance_name
9487
    _CheckNodeOnline(self, self.instance.primary_node)
9488

    
9489
  def Exec(self, feedback_fn):
9490
    """Connect to the console of an instance
9491

9492
    """
9493
    instance = self.instance
9494
    node = instance.primary_node
9495

    
9496
    node_insts = self.rpc.call_instance_list([node],
9497
                                             [instance.hypervisor])[node]
9498
    node_insts.Raise("Can't get node information from %s" % node)
9499

    
9500
    if instance.name not in node_insts.payload:
9501
      if instance.admin_state == constants.ADMINST_UP:
9502
        state = constants.INSTST_ERRORDOWN
9503
      elif instance.admin_state == constants.ADMINST_DOWN:
9504
        state = constants.INSTST_ADMINDOWN
9505
      else:
9506
        state = constants.INSTST_ADMINOFFLINE
9507
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9508
                               (instance.name, state))
9509

    
9510
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9511

    
9512
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9513

    
9514

    
9515
def _GetInstanceConsole(cluster, instance):
9516
  """Returns console information for an instance.
9517

9518
  @type cluster: L{objects.Cluster}
9519
  @type instance: L{objects.Instance}
9520
  @rtype: dict
9521

9522
  """
9523
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9524
  # beparams and hvparams are passed separately, to avoid editing the
9525
  # instance and then saving the defaults in the instance itself.
9526
  hvparams = cluster.FillHV(instance)
9527
  beparams = cluster.FillBE(instance)
9528
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9529

    
9530
  assert console.instance == instance.name
9531
  assert console.Validate()
9532

    
9533
  return console.ToDict()
9534

    
9535

    
9536
class LUInstanceReplaceDisks(LogicalUnit):
9537
  """Replace the disks of an instance.
9538

9539
  """
9540
  HPATH = "mirrors-replace"
9541
  HTYPE = constants.HTYPE_INSTANCE
9542
  REQ_BGL = False
9543

    
9544
  def CheckArguments(self):
9545
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9546
                                  self.op.iallocator)
9547

    
9548
  def ExpandNames(self):
9549
    self._ExpandAndLockInstance()
9550

    
9551
    assert locking.LEVEL_NODE not in self.needed_locks
9552
    assert locking.LEVEL_NODE_RES not in self.needed_locks
9553
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9554

    
9555
    assert self.op.iallocator is None or self.op.remote_node is None, \
9556
      "Conflicting options"
9557

    
9558
    if self.op.remote_node is not None:
9559
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9560

    
9561
      # Warning: do not remove the locking of the new secondary here
9562
      # unless DRBD8.AddChildren is changed to work in parallel;
9563
      # currently it doesn't since parallel invocations of
9564
      # FindUnusedMinor will conflict
9565
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9566
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9567
    else:
9568
      self.needed_locks[locking.LEVEL_NODE] = []
9569
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9570

    
9571
      if self.op.iallocator is not None:
9572
        # iallocator will select a new node in the same group
9573
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9574

    
9575
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9576

    
9577
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9578
                                   self.op.iallocator, self.op.remote_node,
9579
                                   self.op.disks, False, self.op.early_release)
9580

    
9581
    self.tasklets = [self.replacer]
9582

    
9583
  def DeclareLocks(self, level):
9584
    if level == locking.LEVEL_NODEGROUP:
9585
      assert self.op.remote_node is None
9586
      assert self.op.iallocator is not None
9587
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9588

    
9589
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9590
      # Lock all groups used by instance optimistically; this requires going
9591
      # via the node before it's locked, requiring verification later on
9592
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9593
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9594

    
9595
    elif level == locking.LEVEL_NODE:
9596
      if self.op.iallocator is not None:
9597
        assert self.op.remote_node is None
9598
        assert not self.needed_locks[locking.LEVEL_NODE]
9599

    
9600
        # Lock member nodes of all locked groups
9601
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9602
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9603
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9604
      else:
9605
        self._LockInstancesNodes()
9606
    elif level == locking.LEVEL_NODE_RES:
9607
      # Reuse node locks
9608
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9609
        self.needed_locks[locking.LEVEL_NODE]
9610

    
9611
  def BuildHooksEnv(self):
9612
    """Build hooks env.
9613

9614
    This runs on the master, the primary and all the secondaries.
9615

9616
    """
9617
    instance = self.replacer.instance
9618
    env = {
9619
      "MODE": self.op.mode,
9620
      "NEW_SECONDARY": self.op.remote_node,
9621
      "OLD_SECONDARY": instance.secondary_nodes[0],
9622
      }
9623
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9624
    return env
9625

    
9626
  def BuildHooksNodes(self):
9627
    """Build hooks nodes.
9628

9629
    """
9630
    instance = self.replacer.instance
9631
    nl = [
9632
      self.cfg.GetMasterNode(),
9633
      instance.primary_node,
9634
      ]
9635
    if self.op.remote_node is not None:
9636
      nl.append(self.op.remote_node)
9637
    return nl, nl
9638

    
9639
  def CheckPrereq(self):
9640
    """Check prerequisites.
9641

9642
    """
9643
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9644
            self.op.iallocator is None)
9645

    
9646
    # Verify if node group locks are still correct
9647
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9648
    if owned_groups:
9649
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9650

    
9651
    return LogicalUnit.CheckPrereq(self)
9652

    
9653

    
9654
class TLReplaceDisks(Tasklet):
9655
  """Replaces disks for an instance.
9656

9657
  Note: Locking is not within the scope of this class.
9658

9659
  """
9660
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9661
               disks, delay_iallocator, early_release):
9662
    """Initializes this class.
9663

9664
    """
9665
    Tasklet.__init__(self, lu)
9666

    
9667
    # Parameters
9668
    self.instance_name = instance_name
9669
    self.mode = mode
9670
    self.iallocator_name = iallocator_name
9671
    self.remote_node = remote_node
9672
    self.disks = disks
9673
    self.delay_iallocator = delay_iallocator
9674
    self.early_release = early_release
9675

    
9676
    # Runtime data
9677
    self.instance = None
9678
    self.new_node = None
9679
    self.target_node = None
9680
    self.other_node = None
9681
    self.remote_node_info = None
9682
    self.node_secondary_ip = None
9683

    
9684
  @staticmethod
9685
  def CheckArguments(mode, remote_node, iallocator):
9686
    """Helper function for users of this class.
9687

9688
    """
9689
    # check for valid parameter combination
9690
    if mode == constants.REPLACE_DISK_CHG:
9691
      if remote_node is None and iallocator is None:
9692
        raise errors.OpPrereqError("When changing the secondary either an"
9693
                                   " iallocator script must be used or the"
9694
                                   " new node given", errors.ECODE_INVAL)
9695

    
9696
      if remote_node is not None and iallocator is not None:
9697
        raise errors.OpPrereqError("Give either the iallocator or the new"
9698
                                   " secondary, not both", errors.ECODE_INVAL)
9699

    
9700
    elif remote_node is not None or iallocator is not None:
9701
      # Not replacing the secondary
9702
      raise errors.OpPrereqError("The iallocator and new node options can"
9703
                                 " only be used when changing the"
9704
                                 " secondary node", errors.ECODE_INVAL)
9705

    
9706
  @staticmethod
9707
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9708
    """Compute a new secondary node using an IAllocator.
9709

9710
    """
9711
    ial = IAllocator(lu.cfg, lu.rpc,
9712
                     mode=constants.IALLOCATOR_MODE_RELOC,
9713
                     name=instance_name,
9714
                     relocate_from=list(relocate_from))
9715

    
9716
    ial.Run(iallocator_name)
9717

    
9718
    if not ial.success:
9719
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9720
                                 " %s" % (iallocator_name, ial.info),
9721
                                 errors.ECODE_NORES)
9722

    
9723
    if len(ial.result) != ial.required_nodes:
9724
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9725
                                 " of nodes (%s), required %s" %
9726
                                 (iallocator_name,
9727
                                  len(ial.result), ial.required_nodes),
9728
                                 errors.ECODE_FAULT)
9729

    
9730
    remote_node_name = ial.result[0]
9731

    
9732
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9733
               instance_name, remote_node_name)
9734

    
9735
    return remote_node_name
9736

    
9737
  def _FindFaultyDisks(self, node_name):
9738
    """Wrapper for L{_FindFaultyInstanceDisks}.
9739

9740
    """
9741
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9742
                                    node_name, True)
9743

    
9744
  def _CheckDisksActivated(self, instance):
9745
    """Checks if the instance disks are activated.
9746

9747
    @param instance: The instance to check disks
9748
    @return: True if they are activated, False otherwise
9749

9750
    """
9751
    nodes = instance.all_nodes
9752

    
9753
    for idx, dev in enumerate(instance.disks):
9754
      for node in nodes:
9755
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9756
        self.cfg.SetDiskID(dev, node)
9757

    
9758
        result = self.rpc.call_blockdev_find(node, dev)
9759

    
9760
        if result.offline:
9761
          continue
9762
        elif result.fail_msg or not result.payload:
9763
          return False
9764

    
9765
    return True
9766

    
9767
  def CheckPrereq(self):
9768
    """Check prerequisites.
9769

9770
    This checks that the instance is in the cluster.
9771

9772
    """
9773
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9774
    assert instance is not None, \
9775
      "Cannot retrieve locked instance %s" % self.instance_name
9776

    
9777
    if instance.disk_template != constants.DT_DRBD8:
9778
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9779
                                 " instances", errors.ECODE_INVAL)
9780

    
9781
    if len(instance.secondary_nodes) != 1:
9782
      raise errors.OpPrereqError("The instance has a strange layout,"
9783
                                 " expected one secondary but found %d" %
9784
                                 len(instance.secondary_nodes),
9785
                                 errors.ECODE_FAULT)
9786

    
9787
    if not self.delay_iallocator:
9788
      self._CheckPrereq2()
9789

    
9790
  def _CheckPrereq2(self):
9791
    """Check prerequisites, second part.
9792

9793
    This function should always be part of CheckPrereq. It was separated and is
9794
    now called from Exec because during node evacuation iallocator was only
9795
    called with an unmodified cluster model, not taking planned changes into
9796
    account.
9797

9798
    """
9799
    instance = self.instance
9800
    secondary_node = instance.secondary_nodes[0]
9801

    
9802
    if self.iallocator_name is None:
9803
      remote_node = self.remote_node
9804
    else:
9805
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9806
                                       instance.name, instance.secondary_nodes)
9807

    
9808
    if remote_node is None:
9809
      self.remote_node_info = None
9810
    else:
9811
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9812
             "Remote node '%s' is not locked" % remote_node
9813

    
9814
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9815
      assert self.remote_node_info is not None, \
9816
        "Cannot retrieve locked node %s" % remote_node
9817

    
9818
    if remote_node == self.instance.primary_node:
9819
      raise errors.OpPrereqError("The specified node is the primary node of"
9820
                                 " the instance", errors.ECODE_INVAL)
9821

    
9822
    if remote_node == secondary_node:
9823
      raise errors.OpPrereqError("The specified node is already the"
9824
                                 " secondary node of the instance",
9825
                                 errors.ECODE_INVAL)
9826

    
9827
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9828
                                    constants.REPLACE_DISK_CHG):
9829
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9830
                                 errors.ECODE_INVAL)
9831

    
9832
    if self.mode == constants.REPLACE_DISK_AUTO:
9833
      if not self._CheckDisksActivated(instance):
9834
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9835
                                   " first" % self.instance_name,
9836
                                   errors.ECODE_STATE)
9837
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9838
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9839

    
9840
      if faulty_primary and faulty_secondary:
9841
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9842
                                   " one node and can not be repaired"
9843
                                   " automatically" % self.instance_name,
9844
                                   errors.ECODE_STATE)
9845

    
9846
      if faulty_primary:
9847
        self.disks = faulty_primary
9848
        self.target_node = instance.primary_node
9849
        self.other_node = secondary_node
9850
        check_nodes = [self.target_node, self.other_node]
9851
      elif faulty_secondary:
9852
        self.disks = faulty_secondary
9853
        self.target_node = secondary_node
9854
        self.other_node = instance.primary_node
9855
        check_nodes = [self.target_node, self.other_node]
9856
      else:
9857
        self.disks = []
9858
        check_nodes = []
9859

    
9860
    else:
9861
      # Non-automatic modes
9862
      if self.mode == constants.REPLACE_DISK_PRI:
9863
        self.target_node = instance.primary_node
9864
        self.other_node = secondary_node
9865
        check_nodes = [self.target_node, self.other_node]
9866

    
9867
      elif self.mode == constants.REPLACE_DISK_SEC:
9868
        self.target_node = secondary_node
9869
        self.other_node = instance.primary_node
9870
        check_nodes = [self.target_node, self.other_node]
9871

    
9872
      elif self.mode == constants.REPLACE_DISK_CHG:
9873
        self.new_node = remote_node
9874
        self.other_node = instance.primary_node
9875
        self.target_node = secondary_node
9876
        check_nodes = [self.new_node, self.other_node]
9877

    
9878
        _CheckNodeNotDrained(self.lu, remote_node)
9879
        _CheckNodeVmCapable(self.lu, remote_node)
9880

    
9881
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9882
        assert old_node_info is not None
9883
        if old_node_info.offline and not self.early_release:
9884
          # doesn't make sense to delay the release
9885
          self.early_release = True
9886
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9887
                          " early-release mode", secondary_node)
9888

    
9889
      else:
9890
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9891
                                     self.mode)
9892

    
9893
      # If not specified all disks should be replaced
9894
      if not self.disks:
9895
        self.disks = range(len(self.instance.disks))
9896

    
9897
    for node in check_nodes:
9898
      _CheckNodeOnline(self.lu, node)
9899

    
9900
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9901
                                                          self.other_node,
9902
                                                          self.target_node]
9903
                              if node_name is not None)
9904

    
9905
    # Release unneeded node and node resource locks
9906
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9907
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
9908

    
9909
    # Release any owned node group
9910
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9911
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9912

    
9913
    # Check whether disks are valid
9914
    for disk_idx in self.disks:
9915
      instance.FindDisk(disk_idx)
9916

    
9917
    # Get secondary node IP addresses
9918
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9919
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9920

    
9921
  def Exec(self, feedback_fn):
9922
    """Execute disk replacement.
9923

9924
    This dispatches the disk replacement to the appropriate handler.
9925

9926
    """
9927
    if self.delay_iallocator:
9928
      self._CheckPrereq2()
9929

    
9930
    if __debug__:
9931
      # Verify owned locks before starting operation
9932
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9933
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9934
          ("Incorrect node locks, owning %s, expected %s" %
9935
           (owned_nodes, self.node_secondary_ip.keys()))
9936
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
9937
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
9938

    
9939
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9940
      assert list(owned_instances) == [self.instance_name], \
9941
          "Instance '%s' not locked" % self.instance_name
9942

    
9943
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9944
          "Should not own any node group lock at this point"
9945

    
9946
    if not self.disks:
9947
      feedback_fn("No disks need replacement")
9948
      return
9949

    
9950
    feedback_fn("Replacing disk(s) %s for %s" %
9951
                (utils.CommaJoin(self.disks), self.instance.name))
9952

    
9953
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
9954

    
9955
    # Activate the instance disks if we're replacing them on a down instance
9956
    if activate_disks:
9957
      _StartInstanceDisks(self.lu, self.instance, True)
9958

    
9959
    try:
9960
      # Should we replace the secondary node?
9961
      if self.new_node is not None:
9962
        fn = self._ExecDrbd8Secondary
9963
      else:
9964
        fn = self._ExecDrbd8DiskOnly
9965

    
9966
      result = fn(feedback_fn)
9967
    finally:
9968
      # Deactivate the instance disks if we're replacing them on a
9969
      # down instance
9970
      if activate_disks:
9971
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9972

    
9973
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
9974

    
9975
    if __debug__:
9976
      # Verify owned locks
9977
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
9978
      nodes = frozenset(self.node_secondary_ip)
9979
      assert ((self.early_release and not owned_nodes) or
9980
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9981
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9982
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9983

    
9984
    return result
9985

    
9986
  def _CheckVolumeGroup(self, nodes):
9987
    self.lu.LogInfo("Checking volume groups")
9988

    
9989
    vgname = self.cfg.GetVGName()
9990

    
9991
    # Make sure volume group exists on all involved nodes
9992
    results = self.rpc.call_vg_list(nodes)
9993
    if not results:
9994
      raise errors.OpExecError("Can't list volume groups on the nodes")
9995

    
9996
    for node in nodes:
9997
      res = results[node]
9998
      res.Raise("Error checking node %s" % node)
9999
      if vgname not in res.payload:
10000
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
10001
                                 (vgname, node))
10002

    
10003
  def _CheckDisksExistence(self, nodes):
10004
    # Check disk existence
10005
    for idx, dev in enumerate(self.instance.disks):
10006
      if idx not in self.disks:
10007
        continue
10008

    
10009
      for node in nodes:
10010
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10011
        self.cfg.SetDiskID(dev, node)
10012

    
10013
        result = self.rpc.call_blockdev_find(node, dev)
10014

    
10015
        msg = result.fail_msg
10016
        if msg or not result.payload:
10017
          if not msg:
10018
            msg = "disk not found"
10019
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10020
                                   (idx, node, msg))
10021

    
10022
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10023
    for idx, dev in enumerate(self.instance.disks):
10024
      if idx not in self.disks:
10025
        continue
10026

    
10027
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10028
                      (idx, node_name))
10029

    
10030
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10031
                                   ldisk=ldisk):
10032
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10033
                                 " replace disks for instance %s" %
10034
                                 (node_name, self.instance.name))
10035

    
10036
  def _CreateNewStorage(self, node_name):
10037
    """Create new storage on the primary or secondary node.
10038

10039
    This is only used for same-node replaces, not for changing the
10040
    secondary node, hence we don't want to modify the existing disk.
10041

10042
    """
10043
    iv_names = {}
10044

    
10045
    for idx, dev in enumerate(self.instance.disks):
10046
      if idx not in self.disks:
10047
        continue
10048

    
10049
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10050

    
10051
      self.cfg.SetDiskID(dev, node_name)
10052

    
10053
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10054
      names = _GenerateUniqueNames(self.lu, lv_names)
10055

    
10056
      vg_data = dev.children[0].logical_id[0]
10057
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10058
                             logical_id=(vg_data, names[0]))
10059
      vg_meta = dev.children[1].logical_id[0]
10060
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10061
                             logical_id=(vg_meta, names[1]))
10062

    
10063
      new_lvs = [lv_data, lv_meta]
10064
      old_lvs = [child.Copy() for child in dev.children]
10065
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10066

    
10067
      # we pass force_create=True to force the LVM creation
10068
      for new_lv in new_lvs:
10069
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10070
                        _GetInstanceInfoText(self.instance), False)
10071

    
10072
    return iv_names
10073

    
10074
  def _CheckDevices(self, node_name, iv_names):
10075
    for name, (dev, _, _) in iv_names.iteritems():
10076
      self.cfg.SetDiskID(dev, node_name)
10077

    
10078
      result = self.rpc.call_blockdev_find(node_name, dev)
10079

    
10080
      msg = result.fail_msg
10081
      if msg or not result.payload:
10082
        if not msg:
10083
          msg = "disk not found"
10084
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10085
                                 (name, msg))
10086

    
10087
      if result.payload.is_degraded:
10088
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10089

    
10090
  def _RemoveOldStorage(self, node_name, iv_names):
10091
    for name, (_, old_lvs, _) in iv_names.iteritems():
10092
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10093

    
10094
      for lv in old_lvs:
10095
        self.cfg.SetDiskID(lv, node_name)
10096

    
10097
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10098
        if msg:
10099
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10100
                             hint="remove unused LVs manually")
10101

    
10102
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10103
    """Replace a disk on the primary or secondary for DRBD 8.
10104

10105
    The algorithm for replace is quite complicated:
10106

10107
      1. for each disk to be replaced:
10108

10109
        1. create new LVs on the target node with unique names
10110
        1. detach old LVs from the drbd device
10111
        1. rename old LVs to name_replaced.<time_t>
10112
        1. rename new LVs to old LVs
10113
        1. attach the new LVs (with the old names now) to the drbd device
10114

10115
      1. wait for sync across all devices
10116

10117
      1. for each modified disk:
10118

10119
        1. remove old LVs (which have the name name_replaces.<time_t>)
10120

10121
    Failures are not very well handled.
10122

10123
    """
10124
    steps_total = 6
10125

    
10126
    # Step: check device activation
10127
    self.lu.LogStep(1, steps_total, "Check device existence")
10128
    self._CheckDisksExistence([self.other_node, self.target_node])
10129
    self._CheckVolumeGroup([self.target_node, self.other_node])
10130

    
10131
    # Step: check other node consistency
10132
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10133
    self._CheckDisksConsistency(self.other_node,
10134
                                self.other_node == self.instance.primary_node,
10135
                                False)
10136

    
10137
    # Step: create new storage
10138
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10139
    iv_names = self._CreateNewStorage(self.target_node)
10140

    
10141
    # Step: for each lv, detach+rename*2+attach
10142
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10143
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10144
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10145

    
10146
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10147
                                                     old_lvs)
10148
      result.Raise("Can't detach drbd from local storage on node"
10149
                   " %s for device %s" % (self.target_node, dev.iv_name))
10150
      #dev.children = []
10151
      #cfg.Update(instance)
10152

    
10153
      # ok, we created the new LVs, so now we know we have the needed
10154
      # storage; as such, we proceed on the target node to rename
10155
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10156
      # using the assumption that logical_id == physical_id (which in
10157
      # turn is the unique_id on that node)
10158

    
10159
      # FIXME(iustin): use a better name for the replaced LVs
10160
      temp_suffix = int(time.time())
10161
      ren_fn = lambda d, suff: (d.physical_id[0],
10162
                                d.physical_id[1] + "_replaced-%s" % suff)
10163

    
10164
      # Build the rename list based on what LVs exist on the node
10165
      rename_old_to_new = []
10166
      for to_ren in old_lvs:
10167
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10168
        if not result.fail_msg and result.payload:
10169
          # device exists
10170
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10171

    
10172
      self.lu.LogInfo("Renaming the old LVs on the target node")
10173
      result = self.rpc.call_blockdev_rename(self.target_node,
10174
                                             rename_old_to_new)
10175
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10176

    
10177
      # Now we rename the new LVs to the old LVs
10178
      self.lu.LogInfo("Renaming the new LVs on the target node")
10179
      rename_new_to_old = [(new, old.physical_id)
10180
                           for old, new in zip(old_lvs, new_lvs)]
10181
      result = self.rpc.call_blockdev_rename(self.target_node,
10182
                                             rename_new_to_old)
10183
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10184

    
10185
      # Intermediate steps of in memory modifications
10186
      for old, new in zip(old_lvs, new_lvs):
10187
        new.logical_id = old.logical_id
10188
        self.cfg.SetDiskID(new, self.target_node)
10189

    
10190
      # We need to modify old_lvs so that removal later removes the
10191
      # right LVs, not the newly added ones; note that old_lvs is a
10192
      # copy here
10193
      for disk in old_lvs:
10194
        disk.logical_id = ren_fn(disk, temp_suffix)
10195
        self.cfg.SetDiskID(disk, self.target_node)
10196

    
10197
      # Now that the new lvs have the old name, we can add them to the device
10198
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10199
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10200
                                                  new_lvs)
10201
      msg = result.fail_msg
10202
      if msg:
10203
        for new_lv in new_lvs:
10204
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10205
                                               new_lv).fail_msg
10206
          if msg2:
10207
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10208
                               hint=("cleanup manually the unused logical"
10209
                                     "volumes"))
10210
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10211

    
10212
    cstep = itertools.count(5)
10213

    
10214
    if self.early_release:
10215
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10216
      self._RemoveOldStorage(self.target_node, iv_names)
10217
      # TODO: Check if releasing locks early still makes sense
10218
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10219
    else:
10220
      # Release all resource locks except those used by the instance
10221
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10222
                    keep=self.node_secondary_ip.keys())
10223

    
10224
    # Release all node locks while waiting for sync
10225
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10226

    
10227
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10228
    # shutdown in the caller into consideration.
10229

    
10230
    # Wait for sync
10231
    # This can fail as the old devices are degraded and _WaitForSync
10232
    # does a combined result over all disks, so we don't check its return value
10233
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10234
    _WaitForSync(self.lu, self.instance)
10235

    
10236
    # Check all devices manually
10237
    self._CheckDevices(self.instance.primary_node, iv_names)
10238

    
10239
    # Step: remove old storage
10240
    if not self.early_release:
10241
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10242
      self._RemoveOldStorage(self.target_node, iv_names)
10243

    
10244
  def _ExecDrbd8Secondary(self, feedback_fn):
10245
    """Replace the secondary node for DRBD 8.
10246

10247
    The algorithm for replace is quite complicated:
10248
      - for all disks of the instance:
10249
        - create new LVs on the new node with same names
10250
        - shutdown the drbd device on the old secondary
10251
        - disconnect the drbd network on the primary
10252
        - create the drbd device on the new secondary
10253
        - network attach the drbd on the primary, using an artifice:
10254
          the drbd code for Attach() will connect to the network if it
10255
          finds a device which is connected to the good local disks but
10256
          not network enabled
10257
      - wait for sync across all devices
10258
      - remove all disks from the old secondary
10259

10260
    Failures are not very well handled.
10261

10262
    """
10263
    steps_total = 6
10264

    
10265
    pnode = self.instance.primary_node
10266

    
10267
    # Step: check device activation
10268
    self.lu.LogStep(1, steps_total, "Check device existence")
10269
    self._CheckDisksExistence([self.instance.primary_node])
10270
    self._CheckVolumeGroup([self.instance.primary_node])
10271

    
10272
    # Step: check other node consistency
10273
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10274
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10275

    
10276
    # Step: create new storage
10277
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10278
    for idx, dev in enumerate(self.instance.disks):
10279
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10280
                      (self.new_node, idx))
10281
      # we pass force_create=True to force LVM creation
10282
      for new_lv in dev.children:
10283
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10284
                        _GetInstanceInfoText(self.instance), False)
10285

    
10286
    # Step 4: dbrd minors and drbd setups changes
10287
    # after this, we must manually remove the drbd minors on both the
10288
    # error and the success paths
10289
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10290
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10291
                                         for dev in self.instance.disks],
10292
                                        self.instance.name)
10293
    logging.debug("Allocated minors %r", minors)
10294

    
10295
    iv_names = {}
10296
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10297
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10298
                      (self.new_node, idx))
10299
      # create new devices on new_node; note that we create two IDs:
10300
      # one without port, so the drbd will be activated without
10301
      # networking information on the new node at this stage, and one
10302
      # with network, for the latter activation in step 4
10303
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10304
      if self.instance.primary_node == o_node1:
10305
        p_minor = o_minor1
10306
      else:
10307
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10308
        p_minor = o_minor2
10309

    
10310
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10311
                      p_minor, new_minor, o_secret)
10312
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10313
                    p_minor, new_minor, o_secret)
10314

    
10315
      iv_names[idx] = (dev, dev.children, new_net_id)
10316
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10317
                    new_net_id)
10318
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10319
                              logical_id=new_alone_id,
10320
                              children=dev.children,
10321
                              size=dev.size)
10322
      try:
10323
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10324
                              _GetInstanceInfoText(self.instance), False)
10325
      except errors.GenericError:
10326
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10327
        raise
10328

    
10329
    # We have new devices, shutdown the drbd on the old secondary
10330
    for idx, dev in enumerate(self.instance.disks):
10331
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10332
      self.cfg.SetDiskID(dev, self.target_node)
10333
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10334
      if msg:
10335
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10336
                           "node: %s" % (idx, msg),
10337
                           hint=("Please cleanup this device manually as"
10338
                                 " soon as possible"))
10339

    
10340
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10341
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10342
                                               self.instance.disks)[pnode]
10343

    
10344
    msg = result.fail_msg
10345
    if msg:
10346
      # detaches didn't succeed (unlikely)
10347
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10348
      raise errors.OpExecError("Can't detach the disks from the network on"
10349
                               " old node: %s" % (msg,))
10350

    
10351
    # if we managed to detach at least one, we update all the disks of
10352
    # the instance to point to the new secondary
10353
    self.lu.LogInfo("Updating instance configuration")
10354
    for dev, _, new_logical_id in iv_names.itervalues():
10355
      dev.logical_id = new_logical_id
10356
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10357

    
10358
    self.cfg.Update(self.instance, feedback_fn)
10359

    
10360
    # Release all node locks (the configuration has been updated)
10361
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10362

    
10363
    # and now perform the drbd attach
10364
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10365
                    " (standalone => connected)")
10366
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10367
                                            self.new_node],
10368
                                           self.node_secondary_ip,
10369
                                           self.instance.disks,
10370
                                           self.instance.name,
10371
                                           False)
10372
    for to_node, to_result in result.items():
10373
      msg = to_result.fail_msg
10374
      if msg:
10375
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10376
                           to_node, msg,
10377
                           hint=("please do a gnt-instance info to see the"
10378
                                 " status of disks"))
10379

    
10380
    cstep = itertools.count(5)
10381

    
10382
    if self.early_release:
10383
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10384
      self._RemoveOldStorage(self.target_node, iv_names)
10385
      # TODO: Check if releasing locks early still makes sense
10386
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10387
    else:
10388
      # Release all resource locks except those used by the instance
10389
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10390
                    keep=self.node_secondary_ip.keys())
10391

    
10392
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10393
    # shutdown in the caller into consideration.
10394

    
10395
    # Wait for sync
10396
    # This can fail as the old devices are degraded and _WaitForSync
10397
    # does a combined result over all disks, so we don't check its return value
10398
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10399
    _WaitForSync(self.lu, self.instance)
10400

    
10401
    # Check all devices manually
10402
    self._CheckDevices(self.instance.primary_node, iv_names)
10403

    
10404
    # Step: remove old storage
10405
    if not self.early_release:
10406
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10407
      self._RemoveOldStorage(self.target_node, iv_names)
10408

    
10409

    
10410
class LURepairNodeStorage(NoHooksLU):
10411
  """Repairs the volume group on a node.
10412

10413
  """
10414
  REQ_BGL = False
10415

    
10416
  def CheckArguments(self):
10417
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10418

    
10419
    storage_type = self.op.storage_type
10420

    
10421
    if (constants.SO_FIX_CONSISTENCY not in
10422
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10423
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10424
                                 " repaired" % storage_type,
10425
                                 errors.ECODE_INVAL)
10426

    
10427
  def ExpandNames(self):
10428
    self.needed_locks = {
10429
      locking.LEVEL_NODE: [self.op.node_name],
10430
      }
10431

    
10432
  def _CheckFaultyDisks(self, instance, node_name):
10433
    """Ensure faulty disks abort the opcode or at least warn."""
10434
    try:
10435
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10436
                                  node_name, True):
10437
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10438
                                   " node '%s'" % (instance.name, node_name),
10439
                                   errors.ECODE_STATE)
10440
    except errors.OpPrereqError, err:
10441
      if self.op.ignore_consistency:
10442
        self.proc.LogWarning(str(err.args[0]))
10443
      else:
10444
        raise
10445

    
10446
  def CheckPrereq(self):
10447
    """Check prerequisites.
10448

10449
    """
10450
    # Check whether any instance on this node has faulty disks
10451
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10452
      if inst.admin_state != constants.ADMINST_UP:
10453
        continue
10454
      check_nodes = set(inst.all_nodes)
10455
      check_nodes.discard(self.op.node_name)
10456
      for inst_node_name in check_nodes:
10457
        self._CheckFaultyDisks(inst, inst_node_name)
10458

    
10459
  def Exec(self, feedback_fn):
10460
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10461
                (self.op.name, self.op.node_name))
10462

    
10463
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10464
    result = self.rpc.call_storage_execute(self.op.node_name,
10465
                                           self.op.storage_type, st_args,
10466
                                           self.op.name,
10467
                                           constants.SO_FIX_CONSISTENCY)
10468
    result.Raise("Failed to repair storage unit '%s' on %s" %
10469
                 (self.op.name, self.op.node_name))
10470

    
10471

    
10472
class LUNodeEvacuate(NoHooksLU):
10473
  """Evacuates instances off a list of nodes.
10474

10475
  """
10476
  REQ_BGL = False
10477

    
10478
  def CheckArguments(self):
10479
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10480

    
10481
  def ExpandNames(self):
10482
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10483

    
10484
    if self.op.remote_node is not None:
10485
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10486
      assert self.op.remote_node
10487

    
10488
      if self.op.remote_node == self.op.node_name:
10489
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10490
                                   " secondary node", errors.ECODE_INVAL)
10491

    
10492
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10493
        raise errors.OpPrereqError("Without the use of an iallocator only"
10494
                                   " secondary instances can be evacuated",
10495
                                   errors.ECODE_INVAL)
10496

    
10497
    # Declare locks
10498
    self.share_locks = _ShareAll()
10499
    self.needed_locks = {
10500
      locking.LEVEL_INSTANCE: [],
10501
      locking.LEVEL_NODEGROUP: [],
10502
      locking.LEVEL_NODE: [],
10503
      }
10504

    
10505
    if self.op.remote_node is None:
10506
      # Iallocator will choose any node(s) in the same group
10507
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10508
    else:
10509
      group_nodes = frozenset([self.op.remote_node])
10510

    
10511
    # Determine nodes to be locked
10512
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10513

    
10514
  def _DetermineInstances(self):
10515
    """Builds list of instances to operate on.
10516

10517
    """
10518
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10519

    
10520
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10521
      # Primary instances only
10522
      inst_fn = _GetNodePrimaryInstances
10523
      assert self.op.remote_node is None, \
10524
        "Evacuating primary instances requires iallocator"
10525
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10526
      # Secondary instances only
10527
      inst_fn = _GetNodeSecondaryInstances
10528
    else:
10529
      # All instances
10530
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10531
      inst_fn = _GetNodeInstances
10532

    
10533
    return inst_fn(self.cfg, self.op.node_name)
10534

    
10535
  def DeclareLocks(self, level):
10536
    if level == locking.LEVEL_INSTANCE:
10537
      # Lock instances optimistically, needs verification once node and group
10538
      # locks have been acquired
10539
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10540
        set(i.name for i in self._DetermineInstances())
10541

    
10542
    elif level == locking.LEVEL_NODEGROUP:
10543
      # Lock node groups optimistically, needs verification once nodes have
10544
      # been acquired
10545
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10546
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10547

    
10548
    elif level == locking.LEVEL_NODE:
10549
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10550

    
10551
  def CheckPrereq(self):
10552
    # Verify locks
10553
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10554
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10555
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10556

    
10557
    assert owned_nodes == self.lock_nodes
10558

    
10559
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10560
    if owned_groups != wanted_groups:
10561
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10562
                               " current groups are '%s', used to be '%s'" %
10563
                               (utils.CommaJoin(wanted_groups),
10564
                                utils.CommaJoin(owned_groups)))
10565

    
10566
    # Determine affected instances
10567
    self.instances = self._DetermineInstances()
10568
    self.instance_names = [i.name for i in self.instances]
10569

    
10570
    if set(self.instance_names) != owned_instances:
10571
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10572
                               " were acquired, current instances are '%s',"
10573
                               " used to be '%s'" %
10574
                               (self.op.node_name,
10575
                                utils.CommaJoin(self.instance_names),
10576
                                utils.CommaJoin(owned_instances)))
10577

    
10578
    if self.instance_names:
10579
      self.LogInfo("Evacuating instances from node '%s': %s",
10580
                   self.op.node_name,
10581
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10582
    else:
10583
      self.LogInfo("No instances to evacuate from node '%s'",
10584
                   self.op.node_name)
10585

    
10586
    if self.op.remote_node is not None:
10587
      for i in self.instances:
10588
        if i.primary_node == self.op.remote_node:
10589
          raise errors.OpPrereqError("Node %s is the primary node of"
10590
                                     " instance %s, cannot use it as"
10591
                                     " secondary" %
10592
                                     (self.op.remote_node, i.name),
10593
                                     errors.ECODE_INVAL)
10594

    
10595
  def Exec(self, feedback_fn):
10596
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10597

    
10598
    if not self.instance_names:
10599
      # No instances to evacuate
10600
      jobs = []
10601

    
10602
    elif self.op.iallocator is not None:
10603
      # TODO: Implement relocation to other group
10604
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10605
                       evac_mode=self.op.mode,
10606
                       instances=list(self.instance_names))
10607

    
10608
      ial.Run(self.op.iallocator)
10609

    
10610
      if not ial.success:
10611
        raise errors.OpPrereqError("Can't compute node evacuation using"
10612
                                   " iallocator '%s': %s" %
10613
                                   (self.op.iallocator, ial.info),
10614
                                   errors.ECODE_NORES)
10615

    
10616
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10617

    
10618
    elif self.op.remote_node is not None:
10619
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10620
      jobs = [
10621
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10622
                                        remote_node=self.op.remote_node,
10623
                                        disks=[],
10624
                                        mode=constants.REPLACE_DISK_CHG,
10625
                                        early_release=self.op.early_release)]
10626
        for instance_name in self.instance_names
10627
        ]
10628

    
10629
    else:
10630
      raise errors.ProgrammerError("No iallocator or remote node")
10631

    
10632
    return ResultWithJobs(jobs)
10633

    
10634

    
10635
def _SetOpEarlyRelease(early_release, op):
10636
  """Sets C{early_release} flag on opcodes if available.
10637

10638
  """
10639
  try:
10640
    op.early_release = early_release
10641
  except AttributeError:
10642
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10643

    
10644
  return op
10645

    
10646

    
10647
def _NodeEvacDest(use_nodes, group, nodes):
10648
  """Returns group or nodes depending on caller's choice.
10649

10650
  """
10651
  if use_nodes:
10652
    return utils.CommaJoin(nodes)
10653
  else:
10654
    return group
10655

    
10656

    
10657
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10658
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10659

10660
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10661
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10662

10663
  @type lu: L{LogicalUnit}
10664
  @param lu: Logical unit instance
10665
  @type alloc_result: tuple/list
10666
  @param alloc_result: Result from iallocator
10667
  @type early_release: bool
10668
  @param early_release: Whether to release locks early if possible
10669
  @type use_nodes: bool
10670
  @param use_nodes: Whether to display node names instead of groups
10671

10672
  """
10673
  (moved, failed, jobs) = alloc_result
10674

    
10675
  if failed:
10676
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10677
                                 for (name, reason) in failed)
10678
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10679
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10680

    
10681
  if moved:
10682
    lu.LogInfo("Instances to be moved: %s",
10683
               utils.CommaJoin("%s (to %s)" %
10684
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10685
                               for (name, group, nodes) in moved))
10686

    
10687
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10688
              map(opcodes.OpCode.LoadOpCode, ops))
10689
          for ops in jobs]
10690

    
10691

    
10692
class LUInstanceGrowDisk(LogicalUnit):
10693
  """Grow a disk of an instance.
10694

10695
  """
10696
  HPATH = "disk-grow"
10697
  HTYPE = constants.HTYPE_INSTANCE
10698
  REQ_BGL = False
10699

    
10700
  def ExpandNames(self):
10701
    self._ExpandAndLockInstance()
10702
    self.needed_locks[locking.LEVEL_NODE] = []
10703
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10704
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10705

    
10706
  def DeclareLocks(self, level):
10707
    if level == locking.LEVEL_NODE:
10708
      self._LockInstancesNodes()
10709
    elif level == locking.LEVEL_NODE_RES:
10710
      # Copy node locks
10711
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10712
        self.needed_locks[locking.LEVEL_NODE][:]
10713

    
10714
  def BuildHooksEnv(self):
10715
    """Build hooks env.
10716

10717
    This runs on the master, the primary and all the secondaries.
10718

10719
    """
10720
    env = {
10721
      "DISK": self.op.disk,
10722
      "AMOUNT": self.op.amount,
10723
      }
10724
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10725
    return env
10726

    
10727
  def BuildHooksNodes(self):
10728
    """Build hooks nodes.
10729

10730
    """
10731
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10732
    return (nl, nl)
10733

    
10734
  def CheckPrereq(self):
10735
    """Check prerequisites.
10736

10737
    This checks that the instance is in the cluster.
10738

10739
    """
10740
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10741
    assert instance is not None, \
10742
      "Cannot retrieve locked instance %s" % self.op.instance_name
10743
    nodenames = list(instance.all_nodes)
10744
    for node in nodenames:
10745
      _CheckNodeOnline(self, node)
10746

    
10747
    self.instance = instance
10748

    
10749
    if instance.disk_template not in constants.DTS_GROWABLE:
10750
      raise errors.OpPrereqError("Instance's disk layout does not support"
10751
                                 " growing", errors.ECODE_INVAL)
10752

    
10753
    self.disk = instance.FindDisk(self.op.disk)
10754

    
10755
    if instance.disk_template not in (constants.DT_FILE,
10756
                                      constants.DT_SHARED_FILE):
10757
      # TODO: check the free disk space for file, when that feature will be
10758
      # supported
10759
      _CheckNodesFreeDiskPerVG(self, nodenames,
10760
                               self.disk.ComputeGrowth(self.op.amount))
10761

    
10762
  def Exec(self, feedback_fn):
10763
    """Execute disk grow.
10764

10765
    """
10766
    instance = self.instance
10767
    disk = self.disk
10768

    
10769
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10770
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10771
            self.owned_locks(locking.LEVEL_NODE_RES))
10772

    
10773
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10774
    if not disks_ok:
10775
      raise errors.OpExecError("Cannot activate block device to grow")
10776

    
10777
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10778
                (self.op.disk, instance.name,
10779
                 utils.FormatUnit(self.op.amount, "h")))
10780

    
10781
    # First run all grow ops in dry-run mode
10782
    for node in instance.all_nodes:
10783
      self.cfg.SetDiskID(disk, node)
10784
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10785
      result.Raise("Grow request failed to node %s" % node)
10786

    
10787
    # We know that (as far as we can test) operations across different
10788
    # nodes will succeed, time to run it for real
10789
    for node in instance.all_nodes:
10790
      self.cfg.SetDiskID(disk, node)
10791
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10792
      result.Raise("Grow request failed to node %s" % node)
10793

    
10794
      # TODO: Rewrite code to work properly
10795
      # DRBD goes into sync mode for a short amount of time after executing the
10796
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10797
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10798
      # time is a work-around.
10799
      time.sleep(5)
10800

    
10801
    disk.RecordGrow(self.op.amount)
10802
    self.cfg.Update(instance, feedback_fn)
10803

    
10804
    # Changes have been recorded, release node lock
10805
    _ReleaseLocks(self, locking.LEVEL_NODE)
10806

    
10807
    # Downgrade lock while waiting for sync
10808
    self.glm.downgrade(locking.LEVEL_INSTANCE)
10809

    
10810
    if self.op.wait_for_sync:
10811
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10812
      if disk_abort:
10813
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10814
                             " status; please check the instance")
10815
      if instance.admin_state != constants.ADMINST_UP:
10816
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10817
    elif instance.admin_state != constants.ADMINST_UP:
10818
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10819
                           " not supposed to be running because no wait for"
10820
                           " sync mode was requested")
10821

    
10822
    assert self.owned_locks(locking.LEVEL_NODE_RES)
10823
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10824

    
10825

    
10826
class LUInstanceQueryData(NoHooksLU):
10827
  """Query runtime instance data.
10828

10829
  """
10830
  REQ_BGL = False
10831

    
10832
  def ExpandNames(self):
10833
    self.needed_locks = {}
10834

    
10835
    # Use locking if requested or when non-static information is wanted
10836
    if not (self.op.static or self.op.use_locking):
10837
      self.LogWarning("Non-static data requested, locks need to be acquired")
10838
      self.op.use_locking = True
10839

    
10840
    if self.op.instances or not self.op.use_locking:
10841
      # Expand instance names right here
10842
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10843
    else:
10844
      # Will use acquired locks
10845
      self.wanted_names = None
10846

    
10847
    if self.op.use_locking:
10848
      self.share_locks = _ShareAll()
10849

    
10850
      if self.wanted_names is None:
10851
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10852
      else:
10853
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10854

    
10855
      self.needed_locks[locking.LEVEL_NODE] = []
10856
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10857

    
10858
  def DeclareLocks(self, level):
10859
    if self.op.use_locking and level == locking.LEVEL_NODE:
10860
      self._LockInstancesNodes()
10861

    
10862
  def CheckPrereq(self):
10863
    """Check prerequisites.
10864

10865
    This only checks the optional instance list against the existing names.
10866

10867
    """
10868
    if self.wanted_names is None:
10869
      assert self.op.use_locking, "Locking was not used"
10870
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10871

    
10872
    self.wanted_instances = \
10873
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10874

    
10875
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10876
    """Returns the status of a block device
10877

10878
    """
10879
    if self.op.static or not node:
10880
      return None
10881

    
10882
    self.cfg.SetDiskID(dev, node)
10883

    
10884
    result = self.rpc.call_blockdev_find(node, dev)
10885
    if result.offline:
10886
      return None
10887

    
10888
    result.Raise("Can't compute disk status for %s" % instance_name)
10889

    
10890
    status = result.payload
10891
    if status is None:
10892
      return None
10893

    
10894
    return (status.dev_path, status.major, status.minor,
10895
            status.sync_percent, status.estimated_time,
10896
            status.is_degraded, status.ldisk_status)
10897

    
10898
  def _ComputeDiskStatus(self, instance, snode, dev):
10899
    """Compute block device status.
10900

10901
    """
10902
    if dev.dev_type in constants.LDS_DRBD:
10903
      # we change the snode then (otherwise we use the one passed in)
10904
      if dev.logical_id[0] == instance.primary_node:
10905
        snode = dev.logical_id[1]
10906
      else:
10907
        snode = dev.logical_id[0]
10908

    
10909
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10910
                                              instance.name, dev)
10911
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10912

    
10913
    if dev.children:
10914
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10915
                                        instance, snode),
10916
                         dev.children)
10917
    else:
10918
      dev_children = []
10919

    
10920
    return {
10921
      "iv_name": dev.iv_name,
10922
      "dev_type": dev.dev_type,
10923
      "logical_id": dev.logical_id,
10924
      "physical_id": dev.physical_id,
10925
      "pstatus": dev_pstatus,
10926
      "sstatus": dev_sstatus,
10927
      "children": dev_children,
10928
      "mode": dev.mode,
10929
      "size": dev.size,
10930
      }
10931

    
10932
  def Exec(self, feedback_fn):
10933
    """Gather and return data"""
10934
    result = {}
10935

    
10936
    cluster = self.cfg.GetClusterInfo()
10937

    
10938
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10939
                                          for i in self.wanted_instances)
10940
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10941
      if self.op.static or pnode.offline:
10942
        remote_state = None
10943
        if pnode.offline:
10944
          self.LogWarning("Primary node %s is marked offline, returning static"
10945
                          " information only for instance %s" %
10946
                          (pnode.name, instance.name))
10947
      else:
10948
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10949
                                                  instance.name,
10950
                                                  instance.hypervisor)
10951
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10952
        remote_info = remote_info.payload
10953
        if remote_info and "state" in remote_info:
10954
          remote_state = "up"
10955
        else:
10956
          if instance.admin_state == constants.ADMINST_UP:
10957
            remote_state = "down"
10958
          else:
10959
            remote_state = instance.admin_state
10960

    
10961
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10962
                  instance.disks)
10963

    
10964
      result[instance.name] = {
10965
        "name": instance.name,
10966
        "config_state": instance.admin_state,
10967
        "run_state": remote_state,
10968
        "pnode": instance.primary_node,
10969
        "snodes": instance.secondary_nodes,
10970
        "os": instance.os,
10971
        # this happens to be the same format used for hooks
10972
        "nics": _NICListToTuple(self, instance.nics),
10973
        "disk_template": instance.disk_template,
10974
        "disks": disks,
10975
        "hypervisor": instance.hypervisor,
10976
        "network_port": instance.network_port,
10977
        "hv_instance": instance.hvparams,
10978
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10979
        "be_instance": instance.beparams,
10980
        "be_actual": cluster.FillBE(instance),
10981
        "os_instance": instance.osparams,
10982
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10983
        "serial_no": instance.serial_no,
10984
        "mtime": instance.mtime,
10985
        "ctime": instance.ctime,
10986
        "uuid": instance.uuid,
10987
        }
10988

    
10989
    return result
10990

    
10991

    
10992
class LUInstanceSetParams(LogicalUnit):
10993
  """Modifies an instances's parameters.
10994

10995
  """
10996
  HPATH = "instance-modify"
10997
  HTYPE = constants.HTYPE_INSTANCE
10998
  REQ_BGL = False
10999

    
11000
  def CheckArguments(self):
11001
    if not (self.op.nics or self.op.disks or self.op.disk_template or
11002
            self.op.hvparams or self.op.beparams or self.op.os_name or
11003
            self.op.online_inst or self.op.offline_inst):
11004
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11005

    
11006
    if self.op.hvparams:
11007
      _CheckGlobalHvParams(self.op.hvparams)
11008

    
11009
    # Disk validation
11010
    disk_addremove = 0
11011
    for disk_op, disk_dict in self.op.disks:
11012
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11013
      if disk_op == constants.DDM_REMOVE:
11014
        disk_addremove += 1
11015
        continue
11016
      elif disk_op == constants.DDM_ADD:
11017
        disk_addremove += 1
11018
      else:
11019
        if not isinstance(disk_op, int):
11020
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11021
        if not isinstance(disk_dict, dict):
11022
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11023
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11024

    
11025
      if disk_op == constants.DDM_ADD:
11026
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11027
        if mode not in constants.DISK_ACCESS_SET:
11028
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11029
                                     errors.ECODE_INVAL)
11030
        size = disk_dict.get(constants.IDISK_SIZE, None)
11031
        if size is None:
11032
          raise errors.OpPrereqError("Required disk parameter size missing",
11033
                                     errors.ECODE_INVAL)
11034
        try:
11035
          size = int(size)
11036
        except (TypeError, ValueError), err:
11037
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11038
                                     str(err), errors.ECODE_INVAL)
11039
        disk_dict[constants.IDISK_SIZE] = size
11040
      else:
11041
        # modification of disk
11042
        if constants.IDISK_SIZE in disk_dict:
11043
          raise errors.OpPrereqError("Disk size change not possible, use"
11044
                                     " grow-disk", errors.ECODE_INVAL)
11045

    
11046
    if disk_addremove > 1:
11047
      raise errors.OpPrereqError("Only one disk add or remove operation"
11048
                                 " supported at a time", errors.ECODE_INVAL)
11049

    
11050
    if self.op.disks and self.op.disk_template is not None:
11051
      raise errors.OpPrereqError("Disk template conversion and other disk"
11052
                                 " changes not supported at the same time",
11053
                                 errors.ECODE_INVAL)
11054

    
11055
    if (self.op.disk_template and
11056
        self.op.disk_template in constants.DTS_INT_MIRROR and
11057
        self.op.remote_node is None):
11058
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
11059
                                 " one requires specifying a secondary node",
11060
                                 errors.ECODE_INVAL)
11061

    
11062
    # NIC validation
11063
    nic_addremove = 0
11064
    for nic_op, nic_dict in self.op.nics:
11065
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11066
      if nic_op == constants.DDM_REMOVE:
11067
        nic_addremove += 1
11068
        continue
11069
      elif nic_op == constants.DDM_ADD:
11070
        nic_addremove += 1
11071
      else:
11072
        if not isinstance(nic_op, int):
11073
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11074
        if not isinstance(nic_dict, dict):
11075
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11076
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11077

    
11078
      # nic_dict should be a dict
11079
      nic_ip = nic_dict.get(constants.INIC_IP, None)
11080
      if nic_ip is not None:
11081
        if nic_ip.lower() == constants.VALUE_NONE:
11082
          nic_dict[constants.INIC_IP] = None
11083
        else:
11084
          if not netutils.IPAddress.IsValid(nic_ip):
11085
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11086
                                       errors.ECODE_INVAL)
11087

    
11088
      nic_bridge = nic_dict.get("bridge", None)
11089
      nic_link = nic_dict.get(constants.INIC_LINK, None)
11090
      if nic_bridge and nic_link:
11091
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11092
                                   " at the same time", errors.ECODE_INVAL)
11093
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11094
        nic_dict["bridge"] = None
11095
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11096
        nic_dict[constants.INIC_LINK] = None
11097

    
11098
      if nic_op == constants.DDM_ADD:
11099
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11100
        if nic_mac is None:
11101
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11102

    
11103
      if constants.INIC_MAC in nic_dict:
11104
        nic_mac = nic_dict[constants.INIC_MAC]
11105
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11106
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11107

    
11108
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11109
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11110
                                     " modifying an existing nic",
11111
                                     errors.ECODE_INVAL)
11112

    
11113
    if nic_addremove > 1:
11114
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11115
                                 " supported at a time", errors.ECODE_INVAL)
11116

    
11117
  def ExpandNames(self):
11118
    self._ExpandAndLockInstance()
11119
    # Can't even acquire node locks in shared mode as upcoming changes in
11120
    # Ganeti 2.6 will start to modify the node object on disk conversion
11121
    self.needed_locks[locking.LEVEL_NODE] = []
11122
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11123
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11124

    
11125
  def DeclareLocks(self, level):
11126
    if level == locking.LEVEL_NODE:
11127
      self._LockInstancesNodes()
11128
      if self.op.disk_template and self.op.remote_node:
11129
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11130
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11131
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11132
      # Copy node locks
11133
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11134
        self.needed_locks[locking.LEVEL_NODE][:]
11135

    
11136
  def BuildHooksEnv(self):
11137
    """Build hooks env.
11138

11139
    This runs on the master, primary and secondaries.
11140

11141
    """
11142
    args = dict()
11143
    if constants.BE_MEMORY in self.be_new:
11144
      args["memory"] = self.be_new[constants.BE_MEMORY]
11145
    if constants.BE_VCPUS in self.be_new:
11146
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11147
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11148
    # information at all.
11149
    if self.op.nics:
11150
      args["nics"] = []
11151
      nic_override = dict(self.op.nics)
11152
      for idx, nic in enumerate(self.instance.nics):
11153
        if idx in nic_override:
11154
          this_nic_override = nic_override[idx]
11155
        else:
11156
          this_nic_override = {}
11157
        if constants.INIC_IP in this_nic_override:
11158
          ip = this_nic_override[constants.INIC_IP]
11159
        else:
11160
          ip = nic.ip
11161
        if constants.INIC_MAC in this_nic_override:
11162
          mac = this_nic_override[constants.INIC_MAC]
11163
        else:
11164
          mac = nic.mac
11165
        if idx in self.nic_pnew:
11166
          nicparams = self.nic_pnew[idx]
11167
        else:
11168
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11169
        mode = nicparams[constants.NIC_MODE]
11170
        link = nicparams[constants.NIC_LINK]
11171
        args["nics"].append((ip, mac, mode, link))
11172
      if constants.DDM_ADD in nic_override:
11173
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11174
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11175
        nicparams = self.nic_pnew[constants.DDM_ADD]
11176
        mode = nicparams[constants.NIC_MODE]
11177
        link = nicparams[constants.NIC_LINK]
11178
        args["nics"].append((ip, mac, mode, link))
11179
      elif constants.DDM_REMOVE in nic_override:
11180
        del args["nics"][-1]
11181

    
11182
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11183
    if self.op.disk_template:
11184
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11185

    
11186
    return env
11187

    
11188
  def BuildHooksNodes(self):
11189
    """Build hooks nodes.
11190

11191
    """
11192
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11193
    return (nl, nl)
11194

    
11195
  def CheckPrereq(self):
11196
    """Check prerequisites.
11197

11198
    This only checks the instance list against the existing names.
11199

11200
    """
11201
    # checking the new params on the primary/secondary nodes
11202

    
11203
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11204
    cluster = self.cluster = self.cfg.GetClusterInfo()
11205
    assert self.instance is not None, \
11206
      "Cannot retrieve locked instance %s" % self.op.instance_name
11207
    pnode = instance.primary_node
11208
    nodelist = list(instance.all_nodes)
11209

    
11210
    # OS change
11211
    if self.op.os_name and not self.op.force:
11212
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11213
                      self.op.force_variant)
11214
      instance_os = self.op.os_name
11215
    else:
11216
      instance_os = instance.os
11217

    
11218
    if self.op.disk_template:
11219
      if instance.disk_template == self.op.disk_template:
11220
        raise errors.OpPrereqError("Instance already has disk template %s" %
11221
                                   instance.disk_template, errors.ECODE_INVAL)
11222

    
11223
      if (instance.disk_template,
11224
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11225
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11226
                                   " %s to %s" % (instance.disk_template,
11227
                                                  self.op.disk_template),
11228
                                   errors.ECODE_INVAL)
11229
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11230
                          msg="cannot change disk template")
11231
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11232
        if self.op.remote_node == pnode:
11233
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11234
                                     " as the primary node of the instance" %
11235
                                     self.op.remote_node, errors.ECODE_STATE)
11236
        _CheckNodeOnline(self, self.op.remote_node)
11237
        _CheckNodeNotDrained(self, self.op.remote_node)
11238
        # FIXME: here we assume that the old instance type is DT_PLAIN
11239
        assert instance.disk_template == constants.DT_PLAIN
11240
        disks = [{constants.IDISK_SIZE: d.size,
11241
                  constants.IDISK_VG: d.logical_id[0]}
11242
                 for d in instance.disks]
11243
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11244
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11245

    
11246
    # hvparams processing
11247
    if self.op.hvparams:
11248
      hv_type = instance.hypervisor
11249
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11250
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11251
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11252

    
11253
      # local check
11254
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11255
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11256
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11257
      self.hv_inst = i_hvdict # the new dict (without defaults)
11258
    else:
11259
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11260
                                              instance.hvparams)
11261
      self.hv_new = self.hv_inst = {}
11262

    
11263
    # beparams processing
11264
    if self.op.beparams:
11265
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11266
                                   use_none=True)
11267
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11268
      be_new = cluster.SimpleFillBE(i_bedict)
11269
      self.be_proposed = self.be_new = be_new # the new actual values
11270
      self.be_inst = i_bedict # the new dict (without defaults)
11271
    else:
11272
      self.be_new = self.be_inst = {}
11273
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11274
    be_old = cluster.FillBE(instance)
11275

    
11276
    # CPU param validation -- checking every time a paramtere is
11277
    # changed to cover all cases where either CPU mask or vcpus have
11278
    # changed
11279
    if (constants.BE_VCPUS in self.be_proposed and
11280
        constants.HV_CPU_MASK in self.hv_proposed):
11281
      cpu_list = \
11282
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11283
      # Verify mask is consistent with number of vCPUs. Can skip this
11284
      # test if only 1 entry in the CPU mask, which means same mask
11285
      # is applied to all vCPUs.
11286
      if (len(cpu_list) > 1 and
11287
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11288
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11289
                                   " CPU mask [%s]" %
11290
                                   (self.be_proposed[constants.BE_VCPUS],
11291
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11292
                                   errors.ECODE_INVAL)
11293

    
11294
      # Only perform this test if a new CPU mask is given
11295
      if constants.HV_CPU_MASK in self.hv_new:
11296
        # Calculate the largest CPU number requested
11297
        max_requested_cpu = max(map(max, cpu_list))
11298
        # Check that all of the instance's nodes have enough physical CPUs to
11299
        # satisfy the requested CPU mask
11300
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11301
                                max_requested_cpu + 1, instance.hypervisor)
11302

    
11303
    # osparams processing
11304
    if self.op.osparams:
11305
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11306
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11307
      self.os_inst = i_osdict # the new dict (without defaults)
11308
    else:
11309
      self.os_inst = {}
11310

    
11311
    self.warn = []
11312

    
11313
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11314
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11315
      mem_check_list = [pnode]
11316
      if be_new[constants.BE_AUTO_BALANCE]:
11317
        # either we changed auto_balance to yes or it was from before
11318
        mem_check_list.extend(instance.secondary_nodes)
11319
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11320
                                                  instance.hypervisor)
11321
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11322
                                         instance.hypervisor)
11323
      pninfo = nodeinfo[pnode]
11324
      msg = pninfo.fail_msg
11325
      if msg:
11326
        # Assume the primary node is unreachable and go ahead
11327
        self.warn.append("Can't get info from primary node %s: %s" %
11328
                         (pnode, msg))
11329
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11330
        self.warn.append("Node data from primary node %s doesn't contain"
11331
                         " free memory information" % pnode)
11332
      elif instance_info.fail_msg:
11333
        self.warn.append("Can't get instance runtime information: %s" %
11334
                        instance_info.fail_msg)
11335
      else:
11336
        if instance_info.payload:
11337
          current_mem = int(instance_info.payload["memory"])
11338
        else:
11339
          # Assume instance not running
11340
          # (there is a slight race condition here, but it's not very probable,
11341
          # and we have no other way to check)
11342
          current_mem = 0
11343
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11344
                    pninfo.payload["memory_free"])
11345
        if miss_mem > 0:
11346
          raise errors.OpPrereqError("This change will prevent the instance"
11347
                                     " from starting, due to %d MB of memory"
11348
                                     " missing on its primary node" % miss_mem,
11349
                                     errors.ECODE_NORES)
11350

    
11351
      if be_new[constants.BE_AUTO_BALANCE]:
11352
        for node, nres in nodeinfo.items():
11353
          if node not in instance.secondary_nodes:
11354
            continue
11355
          nres.Raise("Can't get info from secondary node %s" % node,
11356
                     prereq=True, ecode=errors.ECODE_STATE)
11357
          if not isinstance(nres.payload.get("memory_free", None), int):
11358
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11359
                                       " memory information" % node,
11360
                                       errors.ECODE_STATE)
11361
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11362
            raise errors.OpPrereqError("This change will prevent the instance"
11363
                                       " from failover to its secondary node"
11364
                                       " %s, due to not enough memory" % node,
11365
                                       errors.ECODE_STATE)
11366

    
11367
    # NIC processing
11368
    self.nic_pnew = {}
11369
    self.nic_pinst = {}
11370
    for nic_op, nic_dict in self.op.nics:
11371
      if nic_op == constants.DDM_REMOVE:
11372
        if not instance.nics:
11373
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11374
                                     errors.ECODE_INVAL)
11375
        continue
11376
      if nic_op != constants.DDM_ADD:
11377
        # an existing nic
11378
        if not instance.nics:
11379
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11380
                                     " no NICs" % nic_op,
11381
                                     errors.ECODE_INVAL)
11382
        if nic_op < 0 or nic_op >= len(instance.nics):
11383
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11384
                                     " are 0 to %d" %
11385
                                     (nic_op, len(instance.nics) - 1),
11386
                                     errors.ECODE_INVAL)
11387
        old_nic_params = instance.nics[nic_op].nicparams
11388
        old_nic_ip = instance.nics[nic_op].ip
11389
      else:
11390
        old_nic_params = {}
11391
        old_nic_ip = None
11392

    
11393
      update_params_dict = dict([(key, nic_dict[key])
11394
                                 for key in constants.NICS_PARAMETERS
11395
                                 if key in nic_dict])
11396

    
11397
      if "bridge" in nic_dict:
11398
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11399

    
11400
      new_nic_params = _GetUpdatedParams(old_nic_params,
11401
                                         update_params_dict)
11402
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11403
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11404
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11405
      self.nic_pinst[nic_op] = new_nic_params
11406
      self.nic_pnew[nic_op] = new_filled_nic_params
11407
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11408

    
11409
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11410
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11411
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11412
        if msg:
11413
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11414
          if self.op.force:
11415
            self.warn.append(msg)
11416
          else:
11417
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11418
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11419
        if constants.INIC_IP in nic_dict:
11420
          nic_ip = nic_dict[constants.INIC_IP]
11421
        else:
11422
          nic_ip = old_nic_ip
11423
        if nic_ip is None:
11424
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11425
                                     " on a routed nic", errors.ECODE_INVAL)
11426
      if constants.INIC_MAC in nic_dict:
11427
        nic_mac = nic_dict[constants.INIC_MAC]
11428
        if nic_mac is None:
11429
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11430
                                     errors.ECODE_INVAL)
11431
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11432
          # otherwise generate the mac
11433
          nic_dict[constants.INIC_MAC] = \
11434
            self.cfg.GenerateMAC(self.proc.GetECId())
11435
        else:
11436
          # or validate/reserve the current one
11437
          try:
11438
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11439
          except errors.ReservationError:
11440
            raise errors.OpPrereqError("MAC address %s already in use"
11441
                                       " in cluster" % nic_mac,
11442
                                       errors.ECODE_NOTUNIQUE)
11443

    
11444
    # DISK processing
11445
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11446
      raise errors.OpPrereqError("Disk operations not supported for"
11447
                                 " diskless instances",
11448
                                 errors.ECODE_INVAL)
11449
    for disk_op, _ in self.op.disks:
11450
      if disk_op == constants.DDM_REMOVE:
11451
        if len(instance.disks) == 1:
11452
          raise errors.OpPrereqError("Cannot remove the last disk of"
11453
                                     " an instance", errors.ECODE_INVAL)
11454
        _CheckInstanceState(self, instance, INSTANCE_DOWN,
11455
                            msg="cannot remove disks")
11456

    
11457
      if (disk_op == constants.DDM_ADD and
11458
          len(instance.disks) >= constants.MAX_DISKS):
11459
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11460
                                   " add more" % constants.MAX_DISKS,
11461
                                   errors.ECODE_STATE)
11462
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11463
        # an existing disk
11464
        if disk_op < 0 or disk_op >= len(instance.disks):
11465
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11466
                                     " are 0 to %d" %
11467
                                     (disk_op, len(instance.disks)),
11468
                                     errors.ECODE_INVAL)
11469

    
11470
    # disabling the instance
11471
    if self.op.offline_inst:
11472
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11473
                          msg="cannot change instance state to offline")
11474

    
11475
    # enabling the instance
11476
    if self.op.online_inst:
11477
      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11478
                          msg="cannot make instance go online")
11479

    
11480
  def _ConvertPlainToDrbd(self, feedback_fn):
11481
    """Converts an instance from plain to drbd.
11482

11483
    """
11484
    feedback_fn("Converting template to drbd")
11485
    instance = self.instance
11486
    pnode = instance.primary_node
11487
    snode = self.op.remote_node
11488

    
11489
    assert instance.disk_template == constants.DT_PLAIN
11490

    
11491
    # create a fake disk info for _GenerateDiskTemplate
11492
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11493
                  constants.IDISK_VG: d.logical_id[0]}
11494
                 for d in instance.disks]
11495
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11496
                                      instance.name, pnode, [snode],
11497
                                      disk_info, None, None, 0, feedback_fn)
11498
    info = _GetInstanceInfoText(instance)
11499
    feedback_fn("Creating aditional volumes...")
11500
    # first, create the missing data and meta devices
11501
    for disk in new_disks:
11502
      # unfortunately this is... not too nice
11503
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11504
                            info, True)
11505
      for child in disk.children:
11506
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11507
    # at this stage, all new LVs have been created, we can rename the
11508
    # old ones
11509
    feedback_fn("Renaming original volumes...")
11510
    rename_list = [(o, n.children[0].logical_id)
11511
                   for (o, n) in zip(instance.disks, new_disks)]
11512
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11513
    result.Raise("Failed to rename original LVs")
11514

    
11515
    feedback_fn("Initializing DRBD devices...")
11516
    # all child devices are in place, we can now create the DRBD devices
11517
    for disk in new_disks:
11518
      for node in [pnode, snode]:
11519
        f_create = node == pnode
11520
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11521

    
11522
    # at this point, the instance has been modified
11523
    instance.disk_template = constants.DT_DRBD8
11524
    instance.disks = new_disks
11525
    self.cfg.Update(instance, feedback_fn)
11526

    
11527
    # Release node locks while waiting for sync
11528
    _ReleaseLocks(self, locking.LEVEL_NODE)
11529

    
11530
    # disks are created, waiting for sync
11531
    disk_abort = not _WaitForSync(self, instance,
11532
                                  oneshot=not self.op.wait_for_sync)
11533
    if disk_abort:
11534
      raise errors.OpExecError("There are some degraded disks for"
11535
                               " this instance, please cleanup manually")
11536

    
11537
    # Node resource locks will be released by caller
11538

    
11539
  def _ConvertDrbdToPlain(self, feedback_fn):
11540
    """Converts an instance from drbd to plain.
11541

11542
    """
11543
    instance = self.instance
11544

    
11545
    assert len(instance.secondary_nodes) == 1
11546
    assert instance.disk_template == constants.DT_DRBD8
11547

    
11548
    pnode = instance.primary_node
11549
    snode = instance.secondary_nodes[0]
11550
    feedback_fn("Converting template to plain")
11551

    
11552
    old_disks = instance.disks
11553
    new_disks = [d.children[0] for d in old_disks]
11554

    
11555
    # copy over size and mode
11556
    for parent, child in zip(old_disks, new_disks):
11557
      child.size = parent.size
11558
      child.mode = parent.mode
11559

    
11560
    # update instance structure
11561
    instance.disks = new_disks
11562
    instance.disk_template = constants.DT_PLAIN
11563
    self.cfg.Update(instance, feedback_fn)
11564

    
11565
    # Release locks in case removing disks takes a while
11566
    _ReleaseLocks(self, locking.LEVEL_NODE)
11567

    
11568
    feedback_fn("Removing volumes on the secondary node...")
11569
    for disk in old_disks:
11570
      self.cfg.SetDiskID(disk, snode)
11571
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11572
      if msg:
11573
        self.LogWarning("Could not remove block device %s on node %s,"
11574
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11575

    
11576
    feedback_fn("Removing unneeded volumes on the primary node...")
11577
    for idx, disk in enumerate(old_disks):
11578
      meta = disk.children[1]
11579
      self.cfg.SetDiskID(meta, pnode)
11580
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11581
      if msg:
11582
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11583
                        " continuing anyway: %s", idx, pnode, msg)
11584

    
11585
    # this is a DRBD disk, return its port to the pool
11586
    for disk in old_disks:
11587
      tcp_port = disk.logical_id[2]
11588
      self.cfg.AddTcpUdpPort(tcp_port)
11589

    
11590
    # Node resource locks will be released by caller
11591

    
11592
  def Exec(self, feedback_fn):
11593
    """Modifies an instance.
11594

11595
    All parameters take effect only at the next restart of the instance.
11596

11597
    """
11598
    # Process here the warnings from CheckPrereq, as we don't have a
11599
    # feedback_fn there.
11600
    for warn in self.warn:
11601
      feedback_fn("WARNING: %s" % warn)
11602

    
11603
    assert ((self.op.disk_template is None) ^
11604
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11605
      "Not owning any node resource locks"
11606

    
11607
    result = []
11608
    instance = self.instance
11609
    # disk changes
11610
    for disk_op, disk_dict in self.op.disks:
11611
      if disk_op == constants.DDM_REMOVE:
11612
        # remove the last disk
11613
        device = instance.disks.pop()
11614
        device_idx = len(instance.disks)
11615
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11616
          self.cfg.SetDiskID(disk, node)
11617
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11618
          if msg:
11619
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11620
                            " continuing anyway", device_idx, node, msg)
11621
        result.append(("disk/%d" % device_idx, "remove"))
11622

    
11623
        # if this is a DRBD disk, return its port to the pool
11624
        if device.dev_type in constants.LDS_DRBD:
11625
          tcp_port = device.logical_id[2]
11626
          self.cfg.AddTcpUdpPort(tcp_port)
11627
      elif disk_op == constants.DDM_ADD:
11628
        # add a new disk
11629
        if instance.disk_template in (constants.DT_FILE,
11630
                                        constants.DT_SHARED_FILE):
11631
          file_driver, file_path = instance.disks[0].logical_id
11632
          file_path = os.path.dirname(file_path)
11633
        else:
11634
          file_driver = file_path = None
11635
        disk_idx_base = len(instance.disks)
11636
        new_disk = _GenerateDiskTemplate(self,
11637
                                         instance.disk_template,
11638
                                         instance.name, instance.primary_node,
11639
                                         instance.secondary_nodes,
11640
                                         [disk_dict],
11641
                                         file_path,
11642
                                         file_driver,
11643
                                         disk_idx_base, feedback_fn)[0]
11644
        instance.disks.append(new_disk)
11645
        info = _GetInstanceInfoText(instance)
11646

    
11647
        logging.info("Creating volume %s for instance %s",
11648
                     new_disk.iv_name, instance.name)
11649
        # Note: this needs to be kept in sync with _CreateDisks
11650
        #HARDCODE
11651
        for node in instance.all_nodes:
11652
          f_create = node == instance.primary_node
11653
          try:
11654
            _CreateBlockDev(self, node, instance, new_disk,
11655
                            f_create, info, f_create)
11656
          except errors.OpExecError, err:
11657
            self.LogWarning("Failed to create volume %s (%s) on"
11658
                            " node %s: %s",
11659
                            new_disk.iv_name, new_disk, node, err)
11660
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11661
                       (new_disk.size, new_disk.mode)))
11662
      else:
11663
        # change a given disk
11664
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11665
        result.append(("disk.mode/%d" % disk_op,
11666
                       disk_dict[constants.IDISK_MODE]))
11667

    
11668
    if self.op.disk_template:
11669
      if __debug__:
11670
        check_nodes = set(instance.all_nodes)
11671
        if self.op.remote_node:
11672
          check_nodes.add(self.op.remote_node)
11673
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11674
          owned = self.owned_locks(level)
11675
          assert not (check_nodes - owned), \
11676
            ("Not owning the correct locks, owning %r, expected at least %r" %
11677
             (owned, check_nodes))
11678

    
11679
      r_shut = _ShutdownInstanceDisks(self, instance)
11680
      if not r_shut:
11681
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11682
                                 " proceed with disk template conversion")
11683
      mode = (instance.disk_template, self.op.disk_template)
11684
      try:
11685
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11686
      except:
11687
        self.cfg.ReleaseDRBDMinors(instance.name)
11688
        raise
11689
      result.append(("disk_template", self.op.disk_template))
11690

    
11691
      assert instance.disk_template == self.op.disk_template, \
11692
        ("Expected disk template '%s', found '%s'" %
11693
         (self.op.disk_template, instance.disk_template))
11694

    
11695
    # Release node and resource locks if there are any (they might already have
11696
    # been released during disk conversion)
11697
    _ReleaseLocks(self, locking.LEVEL_NODE)
11698
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11699

    
11700
    # NIC changes
11701
    for nic_op, nic_dict in self.op.nics:
11702
      if nic_op == constants.DDM_REMOVE:
11703
        # remove the last nic
11704
        del instance.nics[-1]
11705
        result.append(("nic.%d" % len(instance.nics), "remove"))
11706
      elif nic_op == constants.DDM_ADD:
11707
        # mac and bridge should be set, by now
11708
        mac = nic_dict[constants.INIC_MAC]
11709
        ip = nic_dict.get(constants.INIC_IP, None)
11710
        nicparams = self.nic_pinst[constants.DDM_ADD]
11711
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11712
        instance.nics.append(new_nic)
11713
        result.append(("nic.%d" % (len(instance.nics) - 1),
11714
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11715
                       (new_nic.mac, new_nic.ip,
11716
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11717
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11718
                       )))
11719
      else:
11720
        for key in (constants.INIC_MAC, constants.INIC_IP):
11721
          if key in nic_dict:
11722
            setattr(instance.nics[nic_op], key, nic_dict[key])
11723
        if nic_op in self.nic_pinst:
11724
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11725
        for key, val in nic_dict.iteritems():
11726
          result.append(("nic.%s/%d" % (key, nic_op), val))
11727

    
11728
    # hvparams changes
11729
    if self.op.hvparams:
11730
      instance.hvparams = self.hv_inst
11731
      for key, val in self.op.hvparams.iteritems():
11732
        result.append(("hv/%s" % key, val))
11733

    
11734
    # beparams changes
11735
    if self.op.beparams:
11736
      instance.beparams = self.be_inst
11737
      for key, val in self.op.beparams.iteritems():
11738
        result.append(("be/%s" % key, val))
11739

    
11740
    # OS change
11741
    if self.op.os_name:
11742
      instance.os = self.op.os_name
11743

    
11744
    # osparams changes
11745
    if self.op.osparams:
11746
      instance.osparams = self.os_inst
11747
      for key, val in self.op.osparams.iteritems():
11748
        result.append(("os/%s" % key, val))
11749

    
11750
    # online/offline instance
11751
    if self.op.online_inst:
11752
      self.cfg.MarkInstanceDown(instance.name)
11753
      result.append(("admin_state", constants.ADMINST_DOWN))
11754
    if self.op.offline_inst:
11755
      self.cfg.MarkInstanceOffline(instance.name)
11756
      result.append(("admin_state", constants.ADMINST_OFFLINE))
11757

    
11758
    self.cfg.Update(instance, feedback_fn)
11759

    
11760
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11761
                self.owned_locks(locking.LEVEL_NODE)), \
11762
      "All node locks should have been released by now"
11763

    
11764
    return result
11765

    
11766
  _DISK_CONVERSIONS = {
11767
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11768
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11769
    }
11770

    
11771

    
11772
class LUInstanceChangeGroup(LogicalUnit):
11773
  HPATH = "instance-change-group"
11774
  HTYPE = constants.HTYPE_INSTANCE
11775
  REQ_BGL = False
11776

    
11777
  def ExpandNames(self):
11778
    self.share_locks = _ShareAll()
11779
    self.needed_locks = {
11780
      locking.LEVEL_NODEGROUP: [],
11781
      locking.LEVEL_NODE: [],
11782
      }
11783

    
11784
    self._ExpandAndLockInstance()
11785

    
11786
    if self.op.target_groups:
11787
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11788
                                  self.op.target_groups)
11789
    else:
11790
      self.req_target_uuids = None
11791

    
11792
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11793

    
11794
  def DeclareLocks(self, level):
11795
    if level == locking.LEVEL_NODEGROUP:
11796
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11797

    
11798
      if self.req_target_uuids:
11799
        lock_groups = set(self.req_target_uuids)
11800

    
11801
        # Lock all groups used by instance optimistically; this requires going
11802
        # via the node before it's locked, requiring verification later on
11803
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11804
        lock_groups.update(instance_groups)
11805
      else:
11806
        # No target groups, need to lock all of them
11807
        lock_groups = locking.ALL_SET
11808

    
11809
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11810

    
11811
    elif level == locking.LEVEL_NODE:
11812
      if self.req_target_uuids:
11813
        # Lock all nodes used by instances
11814
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11815
        self._LockInstancesNodes()
11816

    
11817
        # Lock all nodes in all potential target groups
11818
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11819
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11820
        member_nodes = [node_name
11821
                        for group in lock_groups
11822
                        for node_name in self.cfg.GetNodeGroup(group).members]
11823
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11824
      else:
11825
        # Lock all nodes as all groups are potential targets
11826
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11827

    
11828
  def CheckPrereq(self):
11829
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11830
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11831
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11832

    
11833
    assert (self.req_target_uuids is None or
11834
            owned_groups.issuperset(self.req_target_uuids))
11835
    assert owned_instances == set([self.op.instance_name])
11836

    
11837
    # Get instance information
11838
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11839

    
11840
    # Check if node groups for locked instance are still correct
11841
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11842
      ("Instance %s's nodes changed while we kept the lock" %
11843
       self.op.instance_name)
11844

    
11845
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11846
                                           owned_groups)
11847

    
11848
    if self.req_target_uuids:
11849
      # User requested specific target groups
11850
      self.target_uuids = self.req_target_uuids
11851
    else:
11852
      # All groups except those used by the instance are potential targets
11853
      self.target_uuids = owned_groups - inst_groups
11854

    
11855
    conflicting_groups = self.target_uuids & inst_groups
11856
    if conflicting_groups:
11857
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11858
                                 " used by the instance '%s'" %
11859
                                 (utils.CommaJoin(conflicting_groups),
11860
                                  self.op.instance_name),
11861
                                 errors.ECODE_INVAL)
11862

    
11863
    if not self.target_uuids:
11864
      raise errors.OpPrereqError("There are no possible target groups",
11865
                                 errors.ECODE_INVAL)
11866

    
11867
  def BuildHooksEnv(self):
11868
    """Build hooks env.
11869

11870
    """
11871
    assert self.target_uuids
11872

    
11873
    env = {
11874
      "TARGET_GROUPS": " ".join(self.target_uuids),
11875
      }
11876

    
11877
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11878

    
11879
    return env
11880

    
11881
  def BuildHooksNodes(self):
11882
    """Build hooks nodes.
11883

11884
    """
11885
    mn = self.cfg.GetMasterNode()
11886
    return ([mn], [mn])
11887

    
11888
  def Exec(self, feedback_fn):
11889
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11890

    
11891
    assert instances == [self.op.instance_name], "Instance not locked"
11892

    
11893
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11894
                     instances=instances, target_groups=list(self.target_uuids))
11895

    
11896
    ial.Run(self.op.iallocator)
11897

    
11898
    if not ial.success:
11899
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11900
                                 " instance '%s' using iallocator '%s': %s" %
11901
                                 (self.op.instance_name, self.op.iallocator,
11902
                                  ial.info),
11903
                                 errors.ECODE_NORES)
11904

    
11905
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11906

    
11907
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11908
                 " instance '%s'", len(jobs), self.op.instance_name)
11909

    
11910
    return ResultWithJobs(jobs)
11911

    
11912

    
11913
class LUBackupQuery(NoHooksLU):
11914
  """Query the exports list
11915

11916
  """
11917
  REQ_BGL = False
11918

    
11919
  def ExpandNames(self):
11920
    self.needed_locks = {}
11921
    self.share_locks[locking.LEVEL_NODE] = 1
11922
    if not self.op.nodes:
11923
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11924
    else:
11925
      self.needed_locks[locking.LEVEL_NODE] = \
11926
        _GetWantedNodes(self, self.op.nodes)
11927

    
11928
  def Exec(self, feedback_fn):
11929
    """Compute the list of all the exported system images.
11930

11931
    @rtype: dict
11932
    @return: a dictionary with the structure node->(export-list)
11933
        where export-list is a list of the instances exported on
11934
        that node.
11935

11936
    """
11937
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11938
    rpcresult = self.rpc.call_export_list(self.nodes)
11939
    result = {}
11940
    for node in rpcresult:
11941
      if rpcresult[node].fail_msg:
11942
        result[node] = False
11943
      else:
11944
        result[node] = rpcresult[node].payload
11945

    
11946
    return result
11947

    
11948

    
11949
class LUBackupPrepare(NoHooksLU):
11950
  """Prepares an instance for an export and returns useful information.
11951

11952
  """
11953
  REQ_BGL = False
11954

    
11955
  def ExpandNames(self):
11956
    self._ExpandAndLockInstance()
11957

    
11958
  def CheckPrereq(self):
11959
    """Check prerequisites.
11960

11961
    """
11962
    instance_name = self.op.instance_name
11963

    
11964
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11965
    assert self.instance is not None, \
11966
          "Cannot retrieve locked instance %s" % self.op.instance_name
11967
    _CheckNodeOnline(self, self.instance.primary_node)
11968

    
11969
    self._cds = _GetClusterDomainSecret()
11970

    
11971
  def Exec(self, feedback_fn):
11972
    """Prepares an instance for an export.
11973

11974
    """
11975
    instance = self.instance
11976

    
11977
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11978
      salt = utils.GenerateSecret(8)
11979

    
11980
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11981
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11982
                                              constants.RIE_CERT_VALIDITY)
11983
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11984

    
11985
      (name, cert_pem) = result.payload
11986

    
11987
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11988
                                             cert_pem)
11989

    
11990
      return {
11991
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11992
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11993
                          salt),
11994
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11995
        }
11996

    
11997
    return None
11998

    
11999

    
12000
class LUBackupExport(LogicalUnit):
12001
  """Export an instance to an image in the cluster.
12002

12003
  """
12004
  HPATH = "instance-export"
12005
  HTYPE = constants.HTYPE_INSTANCE
12006
  REQ_BGL = False
12007

    
12008
  def CheckArguments(self):
12009
    """Check the arguments.
12010

12011
    """
12012
    self.x509_key_name = self.op.x509_key_name
12013
    self.dest_x509_ca_pem = self.op.destination_x509_ca
12014

    
12015
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12016
      if not self.x509_key_name:
12017
        raise errors.OpPrereqError("Missing X509 key name for encryption",
12018
                                   errors.ECODE_INVAL)
12019

    
12020
      if not self.dest_x509_ca_pem:
12021
        raise errors.OpPrereqError("Missing destination X509 CA",
12022
                                   errors.ECODE_INVAL)
12023

    
12024
  def ExpandNames(self):
12025
    self._ExpandAndLockInstance()
12026

    
12027
    # Lock all nodes for local exports
12028
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12029
      # FIXME: lock only instance primary and destination node
12030
      #
12031
      # Sad but true, for now we have do lock all nodes, as we don't know where
12032
      # the previous export might be, and in this LU we search for it and
12033
      # remove it from its current node. In the future we could fix this by:
12034
      #  - making a tasklet to search (share-lock all), then create the
12035
      #    new one, then one to remove, after
12036
      #  - removing the removal operation altogether
12037
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12038

    
12039
  def DeclareLocks(self, level):
12040
    """Last minute lock declaration."""
12041
    # All nodes are locked anyway, so nothing to do here.
12042

    
12043
  def BuildHooksEnv(self):
12044
    """Build hooks env.
12045

12046
    This will run on the master, primary node and target node.
12047

12048
    """
12049
    env = {
12050
      "EXPORT_MODE": self.op.mode,
12051
      "EXPORT_NODE": self.op.target_node,
12052
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12053
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12054
      # TODO: Generic function for boolean env variables
12055
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12056
      }
12057

    
12058
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12059

    
12060
    return env
12061

    
12062
  def BuildHooksNodes(self):
12063
    """Build hooks nodes.
12064

12065
    """
12066
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12067

    
12068
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12069
      nl.append(self.op.target_node)
12070

    
12071
    return (nl, nl)
12072

    
12073
  def CheckPrereq(self):
12074
    """Check prerequisites.
12075

12076
    This checks that the instance and node names are valid.
12077

12078
    """
12079
    instance_name = self.op.instance_name
12080

    
12081
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12082
    assert self.instance is not None, \
12083
          "Cannot retrieve locked instance %s" % self.op.instance_name
12084
    _CheckNodeOnline(self, self.instance.primary_node)
12085

    
12086
    if (self.op.remove_instance and
12087
        self.instance.admin_state == constants.ADMINST_UP and
12088
        not self.op.shutdown):
12089
      raise errors.OpPrereqError("Can not remove instance without shutting it"
12090
                                 " down before")
12091

    
12092
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12093
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12094
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12095
      assert self.dst_node is not None
12096

    
12097
      _CheckNodeOnline(self, self.dst_node.name)
12098
      _CheckNodeNotDrained(self, self.dst_node.name)
12099

    
12100
      self._cds = None
12101
      self.dest_disk_info = None
12102
      self.dest_x509_ca = None
12103

    
12104
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12105
      self.dst_node = None
12106

    
12107
      if len(self.op.target_node) != len(self.instance.disks):
12108
        raise errors.OpPrereqError(("Received destination information for %s"
12109
                                    " disks, but instance %s has %s disks") %
12110
                                   (len(self.op.target_node), instance_name,
12111
                                    len(self.instance.disks)),
12112
                                   errors.ECODE_INVAL)
12113

    
12114
      cds = _GetClusterDomainSecret()
12115

    
12116
      # Check X509 key name
12117
      try:
12118
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12119
      except (TypeError, ValueError), err:
12120
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12121

    
12122
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12123
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12124
                                   errors.ECODE_INVAL)
12125

    
12126
      # Load and verify CA
12127
      try:
12128
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12129
      except OpenSSL.crypto.Error, err:
12130
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12131
                                   (err, ), errors.ECODE_INVAL)
12132

    
12133
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12134
      if errcode is not None:
12135
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12136
                                   (msg, ), errors.ECODE_INVAL)
12137

    
12138
      self.dest_x509_ca = cert
12139

    
12140
      # Verify target information
12141
      disk_info = []
12142
      for idx, disk_data in enumerate(self.op.target_node):
12143
        try:
12144
          (host, port, magic) = \
12145
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12146
        except errors.GenericError, err:
12147
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12148
                                     (idx, err), errors.ECODE_INVAL)
12149

    
12150
        disk_info.append((host, port, magic))
12151

    
12152
      assert len(disk_info) == len(self.op.target_node)
12153
      self.dest_disk_info = disk_info
12154

    
12155
    else:
12156
      raise errors.ProgrammerError("Unhandled export mode %r" %
12157
                                   self.op.mode)
12158

    
12159
    # instance disk type verification
12160
    # TODO: Implement export support for file-based disks
12161
    for disk in self.instance.disks:
12162
      if disk.dev_type == constants.LD_FILE:
12163
        raise errors.OpPrereqError("Export not supported for instances with"
12164
                                   " file-based disks", errors.ECODE_INVAL)
12165

    
12166
  def _CleanupExports(self, feedback_fn):
12167
    """Removes exports of current instance from all other nodes.
12168

12169
    If an instance in a cluster with nodes A..D was exported to node C, its
12170
    exports will be removed from the nodes A, B and D.
12171

12172
    """
12173
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12174

    
12175
    nodelist = self.cfg.GetNodeList()
12176
    nodelist.remove(self.dst_node.name)
12177

    
12178
    # on one-node clusters nodelist will be empty after the removal
12179
    # if we proceed the backup would be removed because OpBackupQuery
12180
    # substitutes an empty list with the full cluster node list.
12181
    iname = self.instance.name
12182
    if nodelist:
12183
      feedback_fn("Removing old exports for instance %s" % iname)
12184
      exportlist = self.rpc.call_export_list(nodelist)
12185
      for node in exportlist:
12186
        if exportlist[node].fail_msg:
12187
          continue
12188
        if iname in exportlist[node].payload:
12189
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12190
          if msg:
12191
            self.LogWarning("Could not remove older export for instance %s"
12192
                            " on node %s: %s", iname, node, msg)
12193

    
12194
  def Exec(self, feedback_fn):
12195
    """Export an instance to an image in the cluster.
12196

12197
    """
12198
    assert self.op.mode in constants.EXPORT_MODES
12199

    
12200
    instance = self.instance
12201
    src_node = instance.primary_node
12202

    
12203
    if self.op.shutdown:
12204
      # shutdown the instance, but not the disks
12205
      feedback_fn("Shutting down instance %s" % instance.name)
12206
      result = self.rpc.call_instance_shutdown(src_node, instance,
12207
                                               self.op.shutdown_timeout)
12208
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12209
      result.Raise("Could not shutdown instance %s on"
12210
                   " node %s" % (instance.name, src_node))
12211

    
12212
    # set the disks ID correctly since call_instance_start needs the
12213
    # correct drbd minor to create the symlinks
12214
    for disk in instance.disks:
12215
      self.cfg.SetDiskID(disk, src_node)
12216

    
12217
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
12218

    
12219
    if activate_disks:
12220
      # Activate the instance disks if we'exporting a stopped instance
12221
      feedback_fn("Activating disks for %s" % instance.name)
12222
      _StartInstanceDisks(self, instance, None)
12223

    
12224
    try:
12225
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12226
                                                     instance)
12227

    
12228
      helper.CreateSnapshots()
12229
      try:
12230
        if (self.op.shutdown and
12231
            instance.admin_state == constants.ADMINST_UP and
12232
            not self.op.remove_instance):
12233
          assert not activate_disks
12234
          feedback_fn("Starting instance %s" % instance.name)
12235
          result = self.rpc.call_instance_start(src_node,
12236
                                                (instance, None, None), False)
12237
          msg = result.fail_msg
12238
          if msg:
12239
            feedback_fn("Failed to start instance: %s" % msg)
12240
            _ShutdownInstanceDisks(self, instance)
12241
            raise errors.OpExecError("Could not start instance: %s" % msg)
12242

    
12243
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12244
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12245
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12246
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12247
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12248

    
12249
          (key_name, _, _) = self.x509_key_name
12250

    
12251
          dest_ca_pem = \
12252
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12253
                                            self.dest_x509_ca)
12254

    
12255
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12256
                                                     key_name, dest_ca_pem,
12257
                                                     timeouts)
12258
      finally:
12259
        helper.Cleanup()
12260

    
12261
      # Check for backwards compatibility
12262
      assert len(dresults) == len(instance.disks)
12263
      assert compat.all(isinstance(i, bool) for i in dresults), \
12264
             "Not all results are boolean: %r" % dresults
12265

    
12266
    finally:
12267
      if activate_disks:
12268
        feedback_fn("Deactivating disks for %s" % instance.name)
12269
        _ShutdownInstanceDisks(self, instance)
12270

    
12271
    if not (compat.all(dresults) and fin_resu):
12272
      failures = []
12273
      if not fin_resu:
12274
        failures.append("export finalization")
12275
      if not compat.all(dresults):
12276
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12277
                               if not dsk)
12278
        failures.append("disk export: disk(s) %s" % fdsk)
12279

    
12280
      raise errors.OpExecError("Export failed, errors in %s" %
12281
                               utils.CommaJoin(failures))
12282

    
12283
    # At this point, the export was successful, we can cleanup/finish
12284

    
12285
    # Remove instance if requested
12286
    if self.op.remove_instance:
12287
      feedback_fn("Removing instance %s" % instance.name)
12288
      _RemoveInstance(self, feedback_fn, instance,
12289
                      self.op.ignore_remove_failures)
12290

    
12291
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12292
      self._CleanupExports(feedback_fn)
12293

    
12294
    return fin_resu, dresults
12295

    
12296

    
12297
class LUBackupRemove(NoHooksLU):
12298
  """Remove exports related to the named instance.
12299

12300
  """
12301
  REQ_BGL = False
12302

    
12303
  def ExpandNames(self):
12304
    self.needed_locks = {}
12305
    # We need all nodes to be locked in order for RemoveExport to work, but we
12306
    # don't need to lock the instance itself, as nothing will happen to it (and
12307
    # we can remove exports also for a removed instance)
12308
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12309

    
12310
  def Exec(self, feedback_fn):
12311
    """Remove any export.
12312

12313
    """
12314
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12315
    # If the instance was not found we'll try with the name that was passed in.
12316
    # This will only work if it was an FQDN, though.
12317
    fqdn_warn = False
12318
    if not instance_name:
12319
      fqdn_warn = True
12320
      instance_name = self.op.instance_name
12321

    
12322
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12323
    exportlist = self.rpc.call_export_list(locked_nodes)
12324
    found = False
12325
    for node in exportlist:
12326
      msg = exportlist[node].fail_msg
12327
      if msg:
12328
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12329
        continue
12330
      if instance_name in exportlist[node].payload:
12331
        found = True
12332
        result = self.rpc.call_export_remove(node, instance_name)
12333
        msg = result.fail_msg
12334
        if msg:
12335
          logging.error("Could not remove export for instance %s"
12336
                        " on node %s: %s", instance_name, node, msg)
12337

    
12338
    if fqdn_warn and not found:
12339
      feedback_fn("Export not found. If trying to remove an export belonging"
12340
                  " to a deleted instance please use its Fully Qualified"
12341
                  " Domain Name.")
12342

    
12343

    
12344
class LUGroupAdd(LogicalUnit):
12345
  """Logical unit for creating node groups.
12346

12347
  """
12348
  HPATH = "group-add"
12349
  HTYPE = constants.HTYPE_GROUP
12350
  REQ_BGL = False
12351

    
12352
  def ExpandNames(self):
12353
    # We need the new group's UUID here so that we can create and acquire the
12354
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12355
    # that it should not check whether the UUID exists in the configuration.
12356
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12357
    self.needed_locks = {}
12358
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12359

    
12360
  def CheckPrereq(self):
12361
    """Check prerequisites.
12362

12363
    This checks that the given group name is not an existing node group
12364
    already.
12365

12366
    """
12367
    try:
12368
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12369
    except errors.OpPrereqError:
12370
      pass
12371
    else:
12372
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12373
                                 " node group (UUID: %s)" %
12374
                                 (self.op.group_name, existing_uuid),
12375
                                 errors.ECODE_EXISTS)
12376

    
12377
    if self.op.ndparams:
12378
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12379

    
12380
  def BuildHooksEnv(self):
12381
    """Build hooks env.
12382

12383
    """
12384
    return {
12385
      "GROUP_NAME": self.op.group_name,
12386
      }
12387

    
12388
  def BuildHooksNodes(self):
12389
    """Build hooks nodes.
12390

12391
    """
12392
    mn = self.cfg.GetMasterNode()
12393
    return ([mn], [mn])
12394

    
12395
  def Exec(self, feedback_fn):
12396
    """Add the node group to the cluster.
12397

12398
    """
12399
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12400
                                  uuid=self.group_uuid,
12401
                                  alloc_policy=self.op.alloc_policy,
12402
                                  ndparams=self.op.ndparams)
12403

    
12404
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12405
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12406

    
12407

    
12408
class LUGroupAssignNodes(NoHooksLU):
12409
  """Logical unit for assigning nodes to groups.
12410

12411
  """
12412
  REQ_BGL = False
12413

    
12414
  def ExpandNames(self):
12415
    # These raise errors.OpPrereqError on their own:
12416
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12417
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12418

    
12419
    # We want to lock all the affected nodes and groups. We have readily
12420
    # available the list of nodes, and the *destination* group. To gather the
12421
    # list of "source" groups, we need to fetch node information later on.
12422
    self.needed_locks = {
12423
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12424
      locking.LEVEL_NODE: self.op.nodes,
12425
      }
12426

    
12427
  def DeclareLocks(self, level):
12428
    if level == locking.LEVEL_NODEGROUP:
12429
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12430

    
12431
      # Try to get all affected nodes' groups without having the group or node
12432
      # lock yet. Needs verification later in the code flow.
12433
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12434

    
12435
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12436

    
12437
  def CheckPrereq(self):
12438
    """Check prerequisites.
12439

12440
    """
12441
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12442
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12443
            frozenset(self.op.nodes))
12444

    
12445
    expected_locks = (set([self.group_uuid]) |
12446
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12447
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12448
    if actual_locks != expected_locks:
12449
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12450
                               " current groups are '%s', used to be '%s'" %
12451
                               (utils.CommaJoin(expected_locks),
12452
                                utils.CommaJoin(actual_locks)))
12453

    
12454
    self.node_data = self.cfg.GetAllNodesInfo()
12455
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12456
    instance_data = self.cfg.GetAllInstancesInfo()
12457

    
12458
    if self.group is None:
12459
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12460
                               (self.op.group_name, self.group_uuid))
12461

    
12462
    (new_splits, previous_splits) = \
12463
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12464
                                             for node in self.op.nodes],
12465
                                            self.node_data, instance_data)
12466

    
12467
    if new_splits:
12468
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12469

    
12470
      if not self.op.force:
12471
        raise errors.OpExecError("The following instances get split by this"
12472
                                 " change and --force was not given: %s" %
12473
                                 fmt_new_splits)
12474
      else:
12475
        self.LogWarning("This operation will split the following instances: %s",
12476
                        fmt_new_splits)
12477

    
12478
        if previous_splits:
12479
          self.LogWarning("In addition, these already-split instances continue"
12480
                          " to be split across groups: %s",
12481
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12482

    
12483
  def Exec(self, feedback_fn):
12484
    """Assign nodes to a new group.
12485

12486
    """
12487
    for node in self.op.nodes:
12488
      self.node_data[node].group = self.group_uuid
12489

    
12490
    # FIXME: Depends on side-effects of modifying the result of
12491
    # C{cfg.GetAllNodesInfo}
12492

    
12493
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12494

    
12495
  @staticmethod
12496
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12497
    """Check for split instances after a node assignment.
12498

12499
    This method considers a series of node assignments as an atomic operation,
12500
    and returns information about split instances after applying the set of
12501
    changes.
12502

12503
    In particular, it returns information about newly split instances, and
12504
    instances that were already split, and remain so after the change.
12505

12506
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12507
    considered.
12508

12509
    @type changes: list of (node_name, new_group_uuid) pairs.
12510
    @param changes: list of node assignments to consider.
12511
    @param node_data: a dict with data for all nodes
12512
    @param instance_data: a dict with all instances to consider
12513
    @rtype: a two-tuple
12514
    @return: a list of instances that were previously okay and result split as a
12515
      consequence of this change, and a list of instances that were previously
12516
      split and this change does not fix.
12517

12518
    """
12519
    changed_nodes = dict((node, group) for node, group in changes
12520
                         if node_data[node].group != group)
12521

    
12522
    all_split_instances = set()
12523
    previously_split_instances = set()
12524

    
12525
    def InstanceNodes(instance):
12526
      return [instance.primary_node] + list(instance.secondary_nodes)
12527

    
12528
    for inst in instance_data.values():
12529
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12530
        continue
12531

    
12532
      instance_nodes = InstanceNodes(inst)
12533

    
12534
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12535
        previously_split_instances.add(inst.name)
12536

    
12537
      if len(set(changed_nodes.get(node, node_data[node].group)
12538
                 for node in instance_nodes)) > 1:
12539
        all_split_instances.add(inst.name)
12540

    
12541
    return (list(all_split_instances - previously_split_instances),
12542
            list(previously_split_instances & all_split_instances))
12543

    
12544

    
12545
class _GroupQuery(_QueryBase):
12546
  FIELDS = query.GROUP_FIELDS
12547

    
12548
  def ExpandNames(self, lu):
12549
    lu.needed_locks = {}
12550

    
12551
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12552
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12553

    
12554
    if not self.names:
12555
      self.wanted = [name_to_uuid[name]
12556
                     for name in utils.NiceSort(name_to_uuid.keys())]
12557
    else:
12558
      # Accept names to be either names or UUIDs.
12559
      missing = []
12560
      self.wanted = []
12561
      all_uuid = frozenset(self._all_groups.keys())
12562

    
12563
      for name in self.names:
12564
        if name in all_uuid:
12565
          self.wanted.append(name)
12566
        elif name in name_to_uuid:
12567
          self.wanted.append(name_to_uuid[name])
12568
        else:
12569
          missing.append(name)
12570

    
12571
      if missing:
12572
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12573
                                   utils.CommaJoin(missing),
12574
                                   errors.ECODE_NOENT)
12575

    
12576
  def DeclareLocks(self, lu, level):
12577
    pass
12578

    
12579
  def _GetQueryData(self, lu):
12580
    """Computes the list of node groups and their attributes.
12581

12582
    """
12583
    do_nodes = query.GQ_NODE in self.requested_data
12584
    do_instances = query.GQ_INST in self.requested_data
12585

    
12586
    group_to_nodes = None
12587
    group_to_instances = None
12588

    
12589
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12590
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12591
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12592
    # instance->node. Hence, we will need to process nodes even if we only need
12593
    # instance information.
12594
    if do_nodes or do_instances:
12595
      all_nodes = lu.cfg.GetAllNodesInfo()
12596
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12597
      node_to_group = {}
12598

    
12599
      for node in all_nodes.values():
12600
        if node.group in group_to_nodes:
12601
          group_to_nodes[node.group].append(node.name)
12602
          node_to_group[node.name] = node.group
12603

    
12604
      if do_instances:
12605
        all_instances = lu.cfg.GetAllInstancesInfo()
12606
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12607

    
12608
        for instance in all_instances.values():
12609
          node = instance.primary_node
12610
          if node in node_to_group:
12611
            group_to_instances[node_to_group[node]].append(instance.name)
12612

    
12613
        if not do_nodes:
12614
          # Do not pass on node information if it was not requested.
12615
          group_to_nodes = None
12616

    
12617
    return query.GroupQueryData([self._all_groups[uuid]
12618
                                 for uuid in self.wanted],
12619
                                group_to_nodes, group_to_instances)
12620

    
12621

    
12622
class LUGroupQuery(NoHooksLU):
12623
  """Logical unit for querying node groups.
12624

12625
  """
12626
  REQ_BGL = False
12627

    
12628
  def CheckArguments(self):
12629
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12630
                          self.op.output_fields, False)
12631

    
12632
  def ExpandNames(self):
12633
    self.gq.ExpandNames(self)
12634

    
12635
  def DeclareLocks(self, level):
12636
    self.gq.DeclareLocks(self, level)
12637

    
12638
  def Exec(self, feedback_fn):
12639
    return self.gq.OldStyleQuery(self)
12640

    
12641

    
12642
class LUGroupSetParams(LogicalUnit):
12643
  """Modifies the parameters of a node group.
12644

12645
  """
12646
  HPATH = "group-modify"
12647
  HTYPE = constants.HTYPE_GROUP
12648
  REQ_BGL = False
12649

    
12650
  def CheckArguments(self):
12651
    all_changes = [
12652
      self.op.ndparams,
12653
      self.op.alloc_policy,
12654
      ]
12655

    
12656
    if all_changes.count(None) == len(all_changes):
12657
      raise errors.OpPrereqError("Please pass at least one modification",
12658
                                 errors.ECODE_INVAL)
12659

    
12660
  def ExpandNames(self):
12661
    # This raises errors.OpPrereqError on its own:
12662
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12663

    
12664
    self.needed_locks = {
12665
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12666
      }
12667

    
12668
  def CheckPrereq(self):
12669
    """Check prerequisites.
12670

12671
    """
12672
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12673

    
12674
    if self.group is None:
12675
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12676
                               (self.op.group_name, self.group_uuid))
12677

    
12678
    if self.op.ndparams:
12679
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12680
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12681
      self.new_ndparams = new_ndparams
12682

    
12683
  def BuildHooksEnv(self):
12684
    """Build hooks env.
12685

12686
    """
12687
    return {
12688
      "GROUP_NAME": self.op.group_name,
12689
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12690
      }
12691

    
12692
  def BuildHooksNodes(self):
12693
    """Build hooks nodes.
12694

12695
    """
12696
    mn = self.cfg.GetMasterNode()
12697
    return ([mn], [mn])
12698

    
12699
  def Exec(self, feedback_fn):
12700
    """Modifies the node group.
12701

12702
    """
12703
    result = []
12704

    
12705
    if self.op.ndparams:
12706
      self.group.ndparams = self.new_ndparams
12707
      result.append(("ndparams", str(self.group.ndparams)))
12708

    
12709
    if self.op.alloc_policy:
12710
      self.group.alloc_policy = self.op.alloc_policy
12711

    
12712
    self.cfg.Update(self.group, feedback_fn)
12713
    return result
12714

    
12715

    
12716
class LUGroupRemove(LogicalUnit):
12717
  HPATH = "group-remove"
12718
  HTYPE = constants.HTYPE_GROUP
12719
  REQ_BGL = False
12720

    
12721
  def ExpandNames(self):
12722
    # This will raises errors.OpPrereqError on its own:
12723
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12724
    self.needed_locks = {
12725
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12726
      }
12727

    
12728
  def CheckPrereq(self):
12729
    """Check prerequisites.
12730

12731
    This checks that the given group name exists as a node group, that is
12732
    empty (i.e., contains no nodes), and that is not the last group of the
12733
    cluster.
12734

12735
    """
12736
    # Verify that the group is empty.
12737
    group_nodes = [node.name
12738
                   for node in self.cfg.GetAllNodesInfo().values()
12739
                   if node.group == self.group_uuid]
12740

    
12741
    if group_nodes:
12742
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12743
                                 " nodes: %s" %
12744
                                 (self.op.group_name,
12745
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12746
                                 errors.ECODE_STATE)
12747

    
12748
    # Verify the cluster would not be left group-less.
12749
    if len(self.cfg.GetNodeGroupList()) == 1:
12750
      raise errors.OpPrereqError("Group '%s' is the only group,"
12751
                                 " cannot be removed" %
12752
                                 self.op.group_name,
12753
                                 errors.ECODE_STATE)
12754

    
12755
  def BuildHooksEnv(self):
12756
    """Build hooks env.
12757

12758
    """
12759
    return {
12760
      "GROUP_NAME": self.op.group_name,
12761
      }
12762

    
12763
  def BuildHooksNodes(self):
12764
    """Build hooks nodes.
12765

12766
    """
12767
    mn = self.cfg.GetMasterNode()
12768
    return ([mn], [mn])
12769

    
12770
  def Exec(self, feedback_fn):
12771
    """Remove the node group.
12772

12773
    """
12774
    try:
12775
      self.cfg.RemoveNodeGroup(self.group_uuid)
12776
    except errors.ConfigurationError:
12777
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12778
                               (self.op.group_name, self.group_uuid))
12779

    
12780
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12781

    
12782

    
12783
class LUGroupRename(LogicalUnit):
12784
  HPATH = "group-rename"
12785
  HTYPE = constants.HTYPE_GROUP
12786
  REQ_BGL = False
12787

    
12788
  def ExpandNames(self):
12789
    # This raises errors.OpPrereqError on its own:
12790
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12791

    
12792
    self.needed_locks = {
12793
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12794
      }
12795

    
12796
  def CheckPrereq(self):
12797
    """Check prerequisites.
12798

12799
    Ensures requested new name is not yet used.
12800

12801
    """
12802
    try:
12803
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12804
    except errors.OpPrereqError:
12805
      pass
12806
    else:
12807
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12808
                                 " node group (UUID: %s)" %
12809
                                 (self.op.new_name, new_name_uuid),
12810
                                 errors.ECODE_EXISTS)
12811

    
12812
  def BuildHooksEnv(self):
12813
    """Build hooks env.
12814

12815
    """
12816
    return {
12817
      "OLD_NAME": self.op.group_name,
12818
      "NEW_NAME": self.op.new_name,
12819
      }
12820

    
12821
  def BuildHooksNodes(self):
12822
    """Build hooks nodes.
12823

12824
    """
12825
    mn = self.cfg.GetMasterNode()
12826

    
12827
    all_nodes = self.cfg.GetAllNodesInfo()
12828
    all_nodes.pop(mn, None)
12829

    
12830
    run_nodes = [mn]
12831
    run_nodes.extend(node.name for node in all_nodes.values()
12832
                     if node.group == self.group_uuid)
12833

    
12834
    return (run_nodes, run_nodes)
12835

    
12836
  def Exec(self, feedback_fn):
12837
    """Rename the node group.
12838

12839
    """
12840
    group = self.cfg.GetNodeGroup(self.group_uuid)
12841

    
12842
    if group is None:
12843
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12844
                               (self.op.group_name, self.group_uuid))
12845

    
12846
    group.name = self.op.new_name
12847
    self.cfg.Update(group, feedback_fn)
12848

    
12849
    return self.op.new_name
12850

    
12851

    
12852
class LUGroupEvacuate(LogicalUnit):
12853
  HPATH = "group-evacuate"
12854
  HTYPE = constants.HTYPE_GROUP
12855
  REQ_BGL = False
12856

    
12857
  def ExpandNames(self):
12858
    # This raises errors.OpPrereqError on its own:
12859
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12860

    
12861
    if self.op.target_groups:
12862
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12863
                                  self.op.target_groups)
12864
    else:
12865
      self.req_target_uuids = []
12866

    
12867
    if self.group_uuid in self.req_target_uuids:
12868
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12869
                                 " as a target group (targets are %s)" %
12870
                                 (self.group_uuid,
12871
                                  utils.CommaJoin(self.req_target_uuids)),
12872
                                 errors.ECODE_INVAL)
12873

    
12874
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12875

    
12876
    self.share_locks = _ShareAll()
12877
    self.needed_locks = {
12878
      locking.LEVEL_INSTANCE: [],
12879
      locking.LEVEL_NODEGROUP: [],
12880
      locking.LEVEL_NODE: [],
12881
      }
12882

    
12883
  def DeclareLocks(self, level):
12884
    if level == locking.LEVEL_INSTANCE:
12885
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12886

    
12887
      # Lock instances optimistically, needs verification once node and group
12888
      # locks have been acquired
12889
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12890
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12891

    
12892
    elif level == locking.LEVEL_NODEGROUP:
12893
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12894

    
12895
      if self.req_target_uuids:
12896
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12897

    
12898
        # Lock all groups used by instances optimistically; this requires going
12899
        # via the node before it's locked, requiring verification later on
12900
        lock_groups.update(group_uuid
12901
                           for instance_name in
12902
                             self.owned_locks(locking.LEVEL_INSTANCE)
12903
                           for group_uuid in
12904
                             self.cfg.GetInstanceNodeGroups(instance_name))
12905
      else:
12906
        # No target groups, need to lock all of them
12907
        lock_groups = locking.ALL_SET
12908

    
12909
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12910

    
12911
    elif level == locking.LEVEL_NODE:
12912
      # This will only lock the nodes in the group to be evacuated which
12913
      # contain actual instances
12914
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12915
      self._LockInstancesNodes()
12916

    
12917
      # Lock all nodes in group to be evacuated and target groups
12918
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12919
      assert self.group_uuid in owned_groups
12920
      member_nodes = [node_name
12921
                      for group in owned_groups
12922
                      for node_name in self.cfg.GetNodeGroup(group).members]
12923
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12924

    
12925
  def CheckPrereq(self):
12926
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12927
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12928
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12929

    
12930
    assert owned_groups.issuperset(self.req_target_uuids)
12931
    assert self.group_uuid in owned_groups
12932

    
12933
    # Check if locked instances are still correct
12934
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12935

    
12936
    # Get instance information
12937
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12938

    
12939
    # Check if node groups for locked instances are still correct
12940
    for instance_name in owned_instances:
12941
      inst = self.instances[instance_name]
12942
      assert owned_nodes.issuperset(inst.all_nodes), \
12943
        "Instance %s's nodes changed while we kept the lock" % instance_name
12944

    
12945
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12946
                                             owned_groups)
12947

    
12948
      assert self.group_uuid in inst_groups, \
12949
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12950

    
12951
    if self.req_target_uuids:
12952
      # User requested specific target groups
12953
      self.target_uuids = self.req_target_uuids
12954
    else:
12955
      # All groups except the one to be evacuated are potential targets
12956
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12957
                           if group_uuid != self.group_uuid]
12958

    
12959
      if not self.target_uuids:
12960
        raise errors.OpPrereqError("There are no possible target groups",
12961
                                   errors.ECODE_INVAL)
12962

    
12963
  def BuildHooksEnv(self):
12964
    """Build hooks env.
12965

12966
    """
12967
    return {
12968
      "GROUP_NAME": self.op.group_name,
12969
      "TARGET_GROUPS": " ".join(self.target_uuids),
12970
      }
12971

    
12972
  def BuildHooksNodes(self):
12973
    """Build hooks nodes.
12974

12975
    """
12976
    mn = self.cfg.GetMasterNode()
12977

    
12978
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12979

    
12980
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12981

    
12982
    return (run_nodes, run_nodes)
12983

    
12984
  def Exec(self, feedback_fn):
12985
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12986

    
12987
    assert self.group_uuid not in self.target_uuids
12988

    
12989
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12990
                     instances=instances, target_groups=self.target_uuids)
12991

    
12992
    ial.Run(self.op.iallocator)
12993

    
12994
    if not ial.success:
12995
      raise errors.OpPrereqError("Can't compute group evacuation using"
12996
                                 " iallocator '%s': %s" %
12997
                                 (self.op.iallocator, ial.info),
12998
                                 errors.ECODE_NORES)
12999

    
13000
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13001

    
13002
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13003
                 len(jobs), self.op.group_name)
13004

    
13005
    return ResultWithJobs(jobs)
13006

    
13007

    
13008
class TagsLU(NoHooksLU): # pylint: disable=W0223
13009
  """Generic tags LU.
13010

13011
  This is an abstract class which is the parent of all the other tags LUs.
13012

13013
  """
13014
  def ExpandNames(self):
13015
    self.group_uuid = None
13016
    self.needed_locks = {}
13017
    if self.op.kind == constants.TAG_NODE:
13018
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13019
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
13020
    elif self.op.kind == constants.TAG_INSTANCE:
13021
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13022
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13023
    elif self.op.kind == constants.TAG_NODEGROUP:
13024
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13025

    
13026
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13027
    # not possible to acquire the BGL based on opcode parameters)
13028

    
13029
  def CheckPrereq(self):
13030
    """Check prerequisites.
13031

13032
    """
13033
    if self.op.kind == constants.TAG_CLUSTER:
13034
      self.target = self.cfg.GetClusterInfo()
13035
    elif self.op.kind == constants.TAG_NODE:
13036
      self.target = self.cfg.GetNodeInfo(self.op.name)
13037
    elif self.op.kind == constants.TAG_INSTANCE:
13038
      self.target = self.cfg.GetInstanceInfo(self.op.name)
13039
    elif self.op.kind == constants.TAG_NODEGROUP:
13040
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
13041
    else:
13042
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13043
                                 str(self.op.kind), errors.ECODE_INVAL)
13044

    
13045

    
13046
class LUTagsGet(TagsLU):
13047
  """Returns the tags of a given object.
13048

13049
  """
13050
  REQ_BGL = False
13051

    
13052
  def ExpandNames(self):
13053
    TagsLU.ExpandNames(self)
13054

    
13055
    # Share locks as this is only a read operation
13056
    self.share_locks = _ShareAll()
13057

    
13058
  def Exec(self, feedback_fn):
13059
    """Returns the tag list.
13060

13061
    """
13062
    return list(self.target.GetTags())
13063

    
13064

    
13065
class LUTagsSearch(NoHooksLU):
13066
  """Searches the tags for a given pattern.
13067

13068
  """
13069
  REQ_BGL = False
13070

    
13071
  def ExpandNames(self):
13072
    self.needed_locks = {}
13073

    
13074
  def CheckPrereq(self):
13075
    """Check prerequisites.
13076

13077
    This checks the pattern passed for validity by compiling it.
13078

13079
    """
13080
    try:
13081
      self.re = re.compile(self.op.pattern)
13082
    except re.error, err:
13083
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13084
                                 (self.op.pattern, err), errors.ECODE_INVAL)
13085

    
13086
  def Exec(self, feedback_fn):
13087
    """Returns the tag list.
13088

13089
    """
13090
    cfg = self.cfg
13091
    tgts = [("/cluster", cfg.GetClusterInfo())]
13092
    ilist = cfg.GetAllInstancesInfo().values()
13093
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13094
    nlist = cfg.GetAllNodesInfo().values()
13095
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13096
    tgts.extend(("/nodegroup/%s" % n.name, n)
13097
                for n in cfg.GetAllNodeGroupsInfo().values())
13098
    results = []
13099
    for path, target in tgts:
13100
      for tag in target.GetTags():
13101
        if self.re.search(tag):
13102
          results.append((path, tag))
13103
    return results
13104

    
13105

    
13106
class LUTagsSet(TagsLU):
13107
  """Sets a tag on a given object.
13108

13109
  """
13110
  REQ_BGL = False
13111

    
13112
  def CheckPrereq(self):
13113
    """Check prerequisites.
13114

13115
    This checks the type and length of the tag name and value.
13116

13117
    """
13118
    TagsLU.CheckPrereq(self)
13119
    for tag in self.op.tags:
13120
      objects.TaggableObject.ValidateTag(tag)
13121

    
13122
  def Exec(self, feedback_fn):
13123
    """Sets the tag.
13124

13125
    """
13126
    try:
13127
      for tag in self.op.tags:
13128
        self.target.AddTag(tag)
13129
    except errors.TagError, err:
13130
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13131
    self.cfg.Update(self.target, feedback_fn)
13132

    
13133

    
13134
class LUTagsDel(TagsLU):
13135
  """Delete a list of tags from a given object.
13136

13137
  """
13138
  REQ_BGL = False
13139

    
13140
  def CheckPrereq(self):
13141
    """Check prerequisites.
13142

13143
    This checks that we have the given tag.
13144

13145
    """
13146
    TagsLU.CheckPrereq(self)
13147
    for tag in self.op.tags:
13148
      objects.TaggableObject.ValidateTag(tag)
13149
    del_tags = frozenset(self.op.tags)
13150
    cur_tags = self.target.GetTags()
13151

    
13152
    diff_tags = del_tags - cur_tags
13153
    if diff_tags:
13154
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13155
      raise errors.OpPrereqError("Tag(s) %s not found" %
13156
                                 (utils.CommaJoin(diff_names), ),
13157
                                 errors.ECODE_NOENT)
13158

    
13159
  def Exec(self, feedback_fn):
13160
    """Remove the tag from the object.
13161

13162
    """
13163
    for tag in self.op.tags:
13164
      self.target.RemoveTag(tag)
13165
    self.cfg.Update(self.target, feedback_fn)
13166

    
13167

    
13168
class LUTestDelay(NoHooksLU):
13169
  """Sleep for a specified amount of time.
13170

13171
  This LU sleeps on the master and/or nodes for a specified amount of
13172
  time.
13173

13174
  """
13175
  REQ_BGL = False
13176

    
13177
  def ExpandNames(self):
13178
    """Expand names and set required locks.
13179

13180
    This expands the node list, if any.
13181

13182
    """
13183
    self.needed_locks = {}
13184
    if self.op.on_nodes:
13185
      # _GetWantedNodes can be used here, but is not always appropriate to use
13186
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13187
      # more information.
13188
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13189
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13190

    
13191
  def _TestDelay(self):
13192
    """Do the actual sleep.
13193

13194
    """
13195
    if self.op.on_master:
13196
      if not utils.TestDelay(self.op.duration):
13197
        raise errors.OpExecError("Error during master delay test")
13198
    if self.op.on_nodes:
13199
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13200
      for node, node_result in result.items():
13201
        node_result.Raise("Failure during rpc call to node %s" % node)
13202

    
13203
  def Exec(self, feedback_fn):
13204
    """Execute the test delay opcode, with the wanted repetitions.
13205

13206
    """
13207
    if self.op.repeat == 0:
13208
      self._TestDelay()
13209
    else:
13210
      top_value = self.op.repeat - 1
13211
      for i in range(self.op.repeat):
13212
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13213
        self._TestDelay()
13214

    
13215

    
13216
class LUTestJqueue(NoHooksLU):
13217
  """Utility LU to test some aspects of the job queue.
13218

13219
  """
13220
  REQ_BGL = False
13221

    
13222
  # Must be lower than default timeout for WaitForJobChange to see whether it
13223
  # notices changed jobs
13224
  _CLIENT_CONNECT_TIMEOUT = 20.0
13225
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13226

    
13227
  @classmethod
13228
  def _NotifyUsingSocket(cls, cb, errcls):
13229
    """Opens a Unix socket and waits for another program to connect.
13230

13231
    @type cb: callable
13232
    @param cb: Callback to send socket name to client
13233
    @type errcls: class
13234
    @param errcls: Exception class to use for errors
13235

13236
    """
13237
    # Using a temporary directory as there's no easy way to create temporary
13238
    # sockets without writing a custom loop around tempfile.mktemp and
13239
    # socket.bind
13240
    tmpdir = tempfile.mkdtemp()
13241
    try:
13242
      tmpsock = utils.PathJoin(tmpdir, "sock")
13243

    
13244
      logging.debug("Creating temporary socket at %s", tmpsock)
13245
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13246
      try:
13247
        sock.bind(tmpsock)
13248
        sock.listen(1)
13249

    
13250
        # Send details to client
13251
        cb(tmpsock)
13252

    
13253
        # Wait for client to connect before continuing
13254
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13255
        try:
13256
          (conn, _) = sock.accept()
13257
        except socket.error, err:
13258
          raise errcls("Client didn't connect in time (%s)" % err)
13259
      finally:
13260
        sock.close()
13261
    finally:
13262
      # Remove as soon as client is connected
13263
      shutil.rmtree(tmpdir)
13264

    
13265
    # Wait for client to close
13266
    try:
13267
      try:
13268
        # pylint: disable=E1101
13269
        # Instance of '_socketobject' has no ... member
13270
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13271
        conn.recv(1)
13272
      except socket.error, err:
13273
        raise errcls("Client failed to confirm notification (%s)" % err)
13274
    finally:
13275
      conn.close()
13276

    
13277
  def _SendNotification(self, test, arg, sockname):
13278
    """Sends a notification to the client.
13279

13280
    @type test: string
13281
    @param test: Test name
13282
    @param arg: Test argument (depends on test)
13283
    @type sockname: string
13284
    @param sockname: Socket path
13285

13286
    """
13287
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13288

    
13289
  def _Notify(self, prereq, test, arg):
13290
    """Notifies the client of a test.
13291

13292
    @type prereq: bool
13293
    @param prereq: Whether this is a prereq-phase test
13294
    @type test: string
13295
    @param test: Test name
13296
    @param arg: Test argument (depends on test)
13297

13298
    """
13299
    if prereq:
13300
      errcls = errors.OpPrereqError
13301
    else:
13302
      errcls = errors.OpExecError
13303

    
13304
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13305
                                                  test, arg),
13306
                                   errcls)
13307

    
13308
  def CheckArguments(self):
13309
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13310
    self.expandnames_calls = 0
13311

    
13312
  def ExpandNames(self):
13313
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13314
    if checkargs_calls < 1:
13315
      raise errors.ProgrammerError("CheckArguments was not called")
13316

    
13317
    self.expandnames_calls += 1
13318

    
13319
    if self.op.notify_waitlock:
13320
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13321

    
13322
    self.LogInfo("Expanding names")
13323

    
13324
    # Get lock on master node (just to get a lock, not for a particular reason)
13325
    self.needed_locks = {
13326
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13327
      }
13328

    
13329
  def Exec(self, feedback_fn):
13330
    if self.expandnames_calls < 1:
13331
      raise errors.ProgrammerError("ExpandNames was not called")
13332

    
13333
    if self.op.notify_exec:
13334
      self._Notify(False, constants.JQT_EXEC, None)
13335

    
13336
    self.LogInfo("Executing")
13337

    
13338
    if self.op.log_messages:
13339
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13340
      for idx, msg in enumerate(self.op.log_messages):
13341
        self.LogInfo("Sending log message %s", idx + 1)
13342
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13343
        # Report how many test messages have been sent
13344
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13345

    
13346
    if self.op.fail:
13347
      raise errors.OpExecError("Opcode failure was requested")
13348

    
13349
    return True
13350

    
13351

    
13352
class IAllocator(object):
13353
  """IAllocator framework.
13354

13355
  An IAllocator instance has three sets of attributes:
13356
    - cfg that is needed to query the cluster
13357
    - input data (all members of the _KEYS class attribute are required)
13358
    - four buffer attributes (in|out_data|text), that represent the
13359
      input (to the external script) in text and data structure format,
13360
      and the output from it, again in two formats
13361
    - the result variables from the script (success, info, nodes) for
13362
      easy usage
13363

13364
  """
13365
  # pylint: disable=R0902
13366
  # lots of instance attributes
13367

    
13368
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13369
    self.cfg = cfg
13370
    self.rpc = rpc_runner
13371
    # init buffer variables
13372
    self.in_text = self.out_text = self.in_data = self.out_data = None
13373
    # init all input fields so that pylint is happy
13374
    self.mode = mode
13375
    self.memory = self.disks = self.disk_template = None
13376
    self.os = self.tags = self.nics = self.vcpus = None
13377
    self.hypervisor = None
13378
    self.relocate_from = None
13379
    self.name = None
13380
    self.instances = None
13381
    self.evac_mode = None
13382
    self.target_groups = []
13383
    # computed fields
13384
    self.required_nodes = None
13385
    # init result fields
13386
    self.success = self.info = self.result = None
13387

    
13388
    try:
13389
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13390
    except KeyError:
13391
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13392
                                   " IAllocator" % self.mode)
13393

    
13394
    keyset = [n for (n, _) in keydata]
13395

    
13396
    for key in kwargs:
13397
      if key not in keyset:
13398
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13399
                                     " IAllocator" % key)
13400
      setattr(self, key, kwargs[key])
13401

    
13402
    for key in keyset:
13403
      if key not in kwargs:
13404
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13405
                                     " IAllocator" % key)
13406
    self._BuildInputData(compat.partial(fn, self), keydata)
13407

    
13408
  def _ComputeClusterData(self):
13409
    """Compute the generic allocator input data.
13410

13411
    This is the data that is independent of the actual operation.
13412

13413
    """
13414
    cfg = self.cfg
13415
    cluster_info = cfg.GetClusterInfo()
13416
    # cluster data
13417
    data = {
13418
      "version": constants.IALLOCATOR_VERSION,
13419
      "cluster_name": cfg.GetClusterName(),
13420
      "cluster_tags": list(cluster_info.GetTags()),
13421
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13422
      # we don't have job IDs
13423
      }
13424
    ninfo = cfg.GetAllNodesInfo()
13425
    iinfo = cfg.GetAllInstancesInfo().values()
13426
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13427

    
13428
    # node data
13429
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13430

    
13431
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13432
      hypervisor_name = self.hypervisor
13433
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13434
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13435
    else:
13436
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13437

    
13438
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13439
                                        hypervisor_name)
13440
    node_iinfo = \
13441
      self.rpc.call_all_instances_info(node_list,
13442
                                       cluster_info.enabled_hypervisors)
13443

    
13444
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13445

    
13446
    config_ndata = self._ComputeBasicNodeData(ninfo)
13447
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13448
                                                 i_list, config_ndata)
13449
    assert len(data["nodes"]) == len(ninfo), \
13450
        "Incomplete node data computed"
13451

    
13452
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13453

    
13454
    self.in_data = data
13455

    
13456
  @staticmethod
13457
  def _ComputeNodeGroupData(cfg):
13458
    """Compute node groups data.
13459

13460
    """
13461
    ng = dict((guuid, {
13462
      "name": gdata.name,
13463
      "alloc_policy": gdata.alloc_policy,
13464
      })
13465
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13466

    
13467
    return ng
13468

    
13469
  @staticmethod
13470
  def _ComputeBasicNodeData(node_cfg):
13471
    """Compute global node data.
13472

13473
    @rtype: dict
13474
    @returns: a dict of name: (node dict, node config)
13475

13476
    """
13477
    # fill in static (config-based) values
13478
    node_results = dict((ninfo.name, {
13479
      "tags": list(ninfo.GetTags()),
13480
      "primary_ip": ninfo.primary_ip,
13481
      "secondary_ip": ninfo.secondary_ip,
13482
      "offline": ninfo.offline,
13483
      "drained": ninfo.drained,
13484
      "master_candidate": ninfo.master_candidate,
13485
      "group": ninfo.group,
13486
      "master_capable": ninfo.master_capable,
13487
      "vm_capable": ninfo.vm_capable,
13488
      })
13489
      for ninfo in node_cfg.values())
13490

    
13491
    return node_results
13492

    
13493
  @staticmethod
13494
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13495
                              node_results):
13496
    """Compute global node data.
13497

13498
    @param node_results: the basic node structures as filled from the config
13499

13500
    """
13501
    # make a copy of the current dict
13502
    node_results = dict(node_results)
13503
    for nname, nresult in node_data.items():
13504
      assert nname in node_results, "Missing basic data for node %s" % nname
13505
      ninfo = node_cfg[nname]
13506

    
13507
      if not (ninfo.offline or ninfo.drained):
13508
        nresult.Raise("Can't get data for node %s" % nname)
13509
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13510
                                nname)
13511
        remote_info = nresult.payload
13512

    
13513
        for attr in ["memory_total", "memory_free", "memory_dom0",
13514
                     "vg_size", "vg_free", "cpu_total"]:
13515
          if attr not in remote_info:
13516
            raise errors.OpExecError("Node '%s' didn't return attribute"
13517
                                     " '%s'" % (nname, attr))
13518
          if not isinstance(remote_info[attr], int):
13519
            raise errors.OpExecError("Node '%s' returned invalid value"
13520
                                     " for '%s': %s" %
13521
                                     (nname, attr, remote_info[attr]))
13522
        # compute memory used by primary instances
13523
        i_p_mem = i_p_up_mem = 0
13524
        for iinfo, beinfo in i_list:
13525
          if iinfo.primary_node == nname:
13526
            i_p_mem += beinfo[constants.BE_MEMORY]
13527
            if iinfo.name not in node_iinfo[nname].payload:
13528
              i_used_mem = 0
13529
            else:
13530
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13531
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13532
            remote_info["memory_free"] -= max(0, i_mem_diff)
13533

    
13534
            if iinfo.admin_state == constants.ADMINST_UP:
13535
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13536

    
13537
        # compute memory used by instances
13538
        pnr_dyn = {
13539
          "total_memory": remote_info["memory_total"],
13540
          "reserved_memory": remote_info["memory_dom0"],
13541
          "free_memory": remote_info["memory_free"],
13542
          "total_disk": remote_info["vg_size"],
13543
          "free_disk": remote_info["vg_free"],
13544
          "total_cpus": remote_info["cpu_total"],
13545
          "i_pri_memory": i_p_mem,
13546
          "i_pri_up_memory": i_p_up_mem,
13547
          }
13548
        pnr_dyn.update(node_results[nname])
13549
        node_results[nname] = pnr_dyn
13550

    
13551
    return node_results
13552

    
13553
  @staticmethod
13554
  def _ComputeInstanceData(cluster_info, i_list):
13555
    """Compute global instance data.
13556

13557
    """
13558
    instance_data = {}
13559
    for iinfo, beinfo in i_list:
13560
      nic_data = []
13561
      for nic in iinfo.nics:
13562
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13563
        nic_dict = {
13564
          "mac": nic.mac,
13565
          "ip": nic.ip,
13566
          "mode": filled_params[constants.NIC_MODE],
13567
          "link": filled_params[constants.NIC_LINK],
13568
          }
13569
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13570
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13571
        nic_data.append(nic_dict)
13572
      pir = {
13573
        "tags": list(iinfo.GetTags()),
13574
        "admin_state": iinfo.admin_state,
13575
        "vcpus": beinfo[constants.BE_VCPUS],
13576
        "memory": beinfo[constants.BE_MEMORY],
13577
        "os": iinfo.os,
13578
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13579
        "nics": nic_data,
13580
        "disks": [{constants.IDISK_SIZE: dsk.size,
13581
                   constants.IDISK_MODE: dsk.mode}
13582
                  for dsk in iinfo.disks],
13583
        "disk_template": iinfo.disk_template,
13584
        "hypervisor": iinfo.hypervisor,
13585
        }
13586
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13587
                                                 pir["disks"])
13588
      instance_data[iinfo.name] = pir
13589

    
13590
    return instance_data
13591

    
13592
  def _AddNewInstance(self):
13593
    """Add new instance data to allocator structure.
13594

13595
    This in combination with _AllocatorGetClusterData will create the
13596
    correct structure needed as input for the allocator.
13597

13598
    The checks for the completeness of the opcode must have already been
13599
    done.
13600

13601
    """
13602
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13603

    
13604
    if self.disk_template in constants.DTS_INT_MIRROR:
13605
      self.required_nodes = 2
13606
    else:
13607
      self.required_nodes = 1
13608

    
13609
    request = {
13610
      "name": self.name,
13611
      "disk_template": self.disk_template,
13612
      "tags": self.tags,
13613
      "os": self.os,
13614
      "vcpus": self.vcpus,
13615
      "memory": self.memory,
13616
      "disks": self.disks,
13617
      "disk_space_total": disk_space,
13618
      "nics": self.nics,
13619
      "required_nodes": self.required_nodes,
13620
      "hypervisor": self.hypervisor,
13621
      }
13622

    
13623
    return request
13624

    
13625
  def _AddRelocateInstance(self):
13626
    """Add relocate instance data to allocator structure.
13627

13628
    This in combination with _IAllocatorGetClusterData will create the
13629
    correct structure needed as input for the allocator.
13630

13631
    The checks for the completeness of the opcode must have already been
13632
    done.
13633

13634
    """
13635
    instance = self.cfg.GetInstanceInfo(self.name)
13636
    if instance is None:
13637
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13638
                                   " IAllocator" % self.name)
13639

    
13640
    if instance.disk_template not in constants.DTS_MIRRORED:
13641
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13642
                                 errors.ECODE_INVAL)
13643

    
13644
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13645
        len(instance.secondary_nodes) != 1:
13646
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13647
                                 errors.ECODE_STATE)
13648

    
13649
    self.required_nodes = 1
13650
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13651
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13652

    
13653
    request = {
13654
      "name": self.name,
13655
      "disk_space_total": disk_space,
13656
      "required_nodes": self.required_nodes,
13657
      "relocate_from": self.relocate_from,
13658
      }
13659
    return request
13660

    
13661
  def _AddNodeEvacuate(self):
13662
    """Get data for node-evacuate requests.
13663

13664
    """
13665
    return {
13666
      "instances": self.instances,
13667
      "evac_mode": self.evac_mode,
13668
      }
13669

    
13670
  def _AddChangeGroup(self):
13671
    """Get data for node-evacuate requests.
13672

13673
    """
13674
    return {
13675
      "instances": self.instances,
13676
      "target_groups": self.target_groups,
13677
      }
13678

    
13679
  def _BuildInputData(self, fn, keydata):
13680
    """Build input data structures.
13681

13682
    """
13683
    self._ComputeClusterData()
13684

    
13685
    request = fn()
13686
    request["type"] = self.mode
13687
    for keyname, keytype in keydata:
13688
      if keyname not in request:
13689
        raise errors.ProgrammerError("Request parameter %s is missing" %
13690
                                     keyname)
13691
      val = request[keyname]
13692
      if not keytype(val):
13693
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13694
                                     " validation, value %s, expected"
13695
                                     " type %s" % (keyname, val, keytype))
13696
    self.in_data["request"] = request
13697

    
13698
    self.in_text = serializer.Dump(self.in_data)
13699

    
13700
  _STRING_LIST = ht.TListOf(ht.TString)
13701
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13702
     # pylint: disable=E1101
13703
     # Class '...' has no 'OP_ID' member
13704
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13705
                          opcodes.OpInstanceMigrate.OP_ID,
13706
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13707
     })))
13708

    
13709
  _NEVAC_MOVED = \
13710
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13711
                       ht.TItems([ht.TNonEmptyString,
13712
                                  ht.TNonEmptyString,
13713
                                  ht.TListOf(ht.TNonEmptyString),
13714
                                 ])))
13715
  _NEVAC_FAILED = \
13716
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13717
                       ht.TItems([ht.TNonEmptyString,
13718
                                  ht.TMaybeString,
13719
                                 ])))
13720
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13721
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13722

    
13723
  _MODE_DATA = {
13724
    constants.IALLOCATOR_MODE_ALLOC:
13725
      (_AddNewInstance,
13726
       [
13727
        ("name", ht.TString),
13728
        ("memory", ht.TInt),
13729
        ("disks", ht.TListOf(ht.TDict)),
13730
        ("disk_template", ht.TString),
13731
        ("os", ht.TString),
13732
        ("tags", _STRING_LIST),
13733
        ("nics", ht.TListOf(ht.TDict)),
13734
        ("vcpus", ht.TInt),
13735
        ("hypervisor", ht.TString),
13736
        ], ht.TList),
13737
    constants.IALLOCATOR_MODE_RELOC:
13738
      (_AddRelocateInstance,
13739
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13740
       ht.TList),
13741
     constants.IALLOCATOR_MODE_NODE_EVAC:
13742
      (_AddNodeEvacuate, [
13743
        ("instances", _STRING_LIST),
13744
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13745
        ], _NEVAC_RESULT),
13746
     constants.IALLOCATOR_MODE_CHG_GROUP:
13747
      (_AddChangeGroup, [
13748
        ("instances", _STRING_LIST),
13749
        ("target_groups", _STRING_LIST),
13750
        ], _NEVAC_RESULT),
13751
    }
13752

    
13753
  def Run(self, name, validate=True, call_fn=None):
13754
    """Run an instance allocator and return the results.
13755

13756
    """
13757
    if call_fn is None:
13758
      call_fn = self.rpc.call_iallocator_runner
13759

    
13760
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13761
    result.Raise("Failure while running the iallocator script")
13762

    
13763
    self.out_text = result.payload
13764
    if validate:
13765
      self._ValidateResult()
13766

    
13767
  def _ValidateResult(self):
13768
    """Process the allocator results.
13769

13770
    This will process and if successful save the result in
13771
    self.out_data and the other parameters.
13772

13773
    """
13774
    try:
13775
      rdict = serializer.Load(self.out_text)
13776
    except Exception, err:
13777
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13778

    
13779
    if not isinstance(rdict, dict):
13780
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13781

    
13782
    # TODO: remove backwards compatiblity in later versions
13783
    if "nodes" in rdict and "result" not in rdict:
13784
      rdict["result"] = rdict["nodes"]
13785
      del rdict["nodes"]
13786

    
13787
    for key in "success", "info", "result":
13788
      if key not in rdict:
13789
        raise errors.OpExecError("Can't parse iallocator results:"
13790
                                 " missing key '%s'" % key)
13791
      setattr(self, key, rdict[key])
13792

    
13793
    if not self._result_check(self.result):
13794
      raise errors.OpExecError("Iallocator returned invalid result,"
13795
                               " expected %s, got %s" %
13796
                               (self._result_check, self.result),
13797
                               errors.ECODE_INVAL)
13798

    
13799
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13800
      assert self.relocate_from is not None
13801
      assert self.required_nodes == 1
13802

    
13803
      node2group = dict((name, ndata["group"])
13804
                        for (name, ndata) in self.in_data["nodes"].items())
13805

    
13806
      fn = compat.partial(self._NodesToGroups, node2group,
13807
                          self.in_data["nodegroups"])
13808

    
13809
      instance = self.cfg.GetInstanceInfo(self.name)
13810
      request_groups = fn(self.relocate_from + [instance.primary_node])
13811
      result_groups = fn(rdict["result"] + [instance.primary_node])
13812

    
13813
      if self.success and not set(result_groups).issubset(request_groups):
13814
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13815
                                 " differ from original groups (%s)" %
13816
                                 (utils.CommaJoin(result_groups),
13817
                                  utils.CommaJoin(request_groups)))
13818

    
13819
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13820
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13821

    
13822
    self.out_data = rdict
13823

    
13824
  @staticmethod
13825
  def _NodesToGroups(node2group, groups, nodes):
13826
    """Returns a list of unique group names for a list of nodes.
13827

13828
    @type node2group: dict
13829
    @param node2group: Map from node name to group UUID
13830
    @type groups: dict
13831
    @param groups: Group information
13832
    @type nodes: list
13833
    @param nodes: Node names
13834

13835
    """
13836
    result = set()
13837

    
13838
    for node in nodes:
13839
      try:
13840
        group_uuid = node2group[node]
13841
      except KeyError:
13842
        # Ignore unknown node
13843
        pass
13844
      else:
13845
        try:
13846
          group = groups[group_uuid]
13847
        except KeyError:
13848
          # Can't find group, let's use UUID
13849
          group_name = group_uuid
13850
        else:
13851
          group_name = group["name"]
13852

    
13853
        result.add(group_name)
13854

    
13855
    return sorted(result)
13856

    
13857

    
13858
class LUTestAllocator(NoHooksLU):
13859
  """Run allocator tests.
13860

13861
  This LU runs the allocator tests
13862

13863
  """
13864
  def CheckPrereq(self):
13865
    """Check prerequisites.
13866

13867
    This checks the opcode parameters depending on the director and mode test.
13868

13869
    """
13870
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13871
      for attr in ["memory", "disks", "disk_template",
13872
                   "os", "tags", "nics", "vcpus"]:
13873
        if not hasattr(self.op, attr):
13874
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13875
                                     attr, errors.ECODE_INVAL)
13876
      iname = self.cfg.ExpandInstanceName(self.op.name)
13877
      if iname is not None:
13878
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13879
                                   iname, errors.ECODE_EXISTS)
13880
      if not isinstance(self.op.nics, list):
13881
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13882
                                   errors.ECODE_INVAL)
13883
      if not isinstance(self.op.disks, list):
13884
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13885
                                   errors.ECODE_INVAL)
13886
      for row in self.op.disks:
13887
        if (not isinstance(row, dict) or
13888
            constants.IDISK_SIZE not in row or
13889
            not isinstance(row[constants.IDISK_SIZE], int) or
13890
            constants.IDISK_MODE not in row or
13891
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13892
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13893
                                     " parameter", errors.ECODE_INVAL)
13894
      if self.op.hypervisor is None:
13895
        self.op.hypervisor = self.cfg.GetHypervisorType()
13896
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13897
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13898
      self.op.name = fname
13899
      self.relocate_from = \
13900
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13901
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13902
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13903
      if not self.op.instances:
13904
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13905
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13906
    else:
13907
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13908
                                 self.op.mode, errors.ECODE_INVAL)
13909

    
13910
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13911
      if self.op.allocator is None:
13912
        raise errors.OpPrereqError("Missing allocator name",
13913
                                   errors.ECODE_INVAL)
13914
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13915
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13916
                                 self.op.direction, errors.ECODE_INVAL)
13917

    
13918
  def Exec(self, feedback_fn):
13919
    """Run the allocator test.
13920

13921
    """
13922
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13923
      ial = IAllocator(self.cfg, self.rpc,
13924
                       mode=self.op.mode,
13925
                       name=self.op.name,
13926
                       memory=self.op.memory,
13927
                       disks=self.op.disks,
13928
                       disk_template=self.op.disk_template,
13929
                       os=self.op.os,
13930
                       tags=self.op.tags,
13931
                       nics=self.op.nics,
13932
                       vcpus=self.op.vcpus,
13933
                       hypervisor=self.op.hypervisor,
13934
                       )
13935
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13936
      ial = IAllocator(self.cfg, self.rpc,
13937
                       mode=self.op.mode,
13938
                       name=self.op.name,
13939
                       relocate_from=list(self.relocate_from),
13940
                       )
13941
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13942
      ial = IAllocator(self.cfg, self.rpc,
13943
                       mode=self.op.mode,
13944
                       instances=self.op.instances,
13945
                       target_groups=self.op.target_groups)
13946
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13947
      ial = IAllocator(self.cfg, self.rpc,
13948
                       mode=self.op.mode,
13949
                       instances=self.op.instances,
13950
                       evac_mode=self.op.evac_mode)
13951
    else:
13952
      raise errors.ProgrammerError("Uncatched mode %s in"
13953
                                   " LUTestAllocator.Exec", self.op.mode)
13954

    
13955
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13956
      result = ial.in_text
13957
    else:
13958
      ial.Run(self.op.allocator, validate=False)
13959
      result = ial.out_text
13960
    return result
13961

    
13962

    
13963
#: Query type implementations
13964
_QUERY_IMPL = {
13965
  constants.QR_INSTANCE: _InstanceQuery,
13966
  constants.QR_NODE: _NodeQuery,
13967
  constants.QR_GROUP: _GroupQuery,
13968
  constants.QR_OS: _OsQuery,
13969
  }
13970

    
13971
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13972

    
13973

    
13974
def _GetQueryImplementation(name):
13975
  """Returns the implemtnation for a query type.
13976

13977
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13978

13979
  """
13980
  try:
13981
    return _QUERY_IMPL[name]
13982
  except KeyError:
13983
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13984
                               errors.ECODE_INVAL)