Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 1db993d5

History | View | Annotate | Download (496.3 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70
# States of instance
71
INSTANCE_UP = [constants.ADMINST_UP]
72
INSTANCE_DOWN = [constants.ADMINST_DOWN]
73
INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc_runner):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    # readability alias
135
    self.owned_locks = context.glm.list_owned
136
    self.context = context
137
    self.rpc = rpc_runner
138
    # Dicts used to declare locking needs to mcpu
139
    self.needed_locks = None
140
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141
    self.add_locks = {}
142
    self.remove_locks = {}
143
    # Used to force good behavior when calling helper functions
144
    self.recalculate_locks = {}
145
    # logging
146
    self.Log = processor.Log # pylint: disable=C0103
147
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
148
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
149
    self.LogStep = processor.LogStep # pylint: disable=C0103
150
    # support for dry-run
151
    self.dry_run_result = None
152
    # support for generic debug attribute
153
    if (not hasattr(self.op, "debug_level") or
154
        not isinstance(self.op.debug_level, int)):
155
      self.op.debug_level = 0
156

    
157
    # Tasklets
158
    self.tasklets = None
159

    
160
    # Validate opcode parameters and set defaults
161
    self.op.Validate(True)
162

    
163
    self.CheckArguments()
164

    
165
  def CheckArguments(self):
166
    """Check syntactic validity for the opcode arguments.
167

168
    This method is for doing a simple syntactic check and ensure
169
    validity of opcode parameters, without any cluster-related
170
    checks. While the same can be accomplished in ExpandNames and/or
171
    CheckPrereq, doing these separate is better because:
172

173
      - ExpandNames is left as as purely a lock-related function
174
      - CheckPrereq is run after we have acquired locks (and possible
175
        waited for them)
176

177
    The function is allowed to change the self.op attribute so that
178
    later methods can no longer worry about missing parameters.
179

180
    """
181
    pass
182

    
183
  def ExpandNames(self):
184
    """Expand names for this LU.
185

186
    This method is called before starting to execute the opcode, and it should
187
    update all the parameters of the opcode to their canonical form (e.g. a
188
    short node name must be fully expanded after this method has successfully
189
    completed). This way locking, hooks, logging, etc. can work correctly.
190

191
    LUs which implement this method must also populate the self.needed_locks
192
    member, as a dict with lock levels as keys, and a list of needed lock names
193
    as values. Rules:
194

195
      - use an empty dict if you don't need any lock
196
      - if you don't need any lock at a particular level omit that level
197
      - don't put anything for the BGL level
198
      - if you want all locks at a level use locking.ALL_SET as a value
199

200
    If you need to share locks (rather than acquire them exclusively) at one
201
    level you can modify self.share_locks, setting a true value (usually 1) for
202
    that level. By default locks are not shared.
203

204
    This function can also define a list of tasklets, which then will be
205
    executed in order instead of the usual LU-level CheckPrereq and Exec
206
    functions, if those are not defined by the LU.
207

208
    Examples::
209

210
      # Acquire all nodes and one instance
211
      self.needed_locks = {
212
        locking.LEVEL_NODE: locking.ALL_SET,
213
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
214
      }
215
      # Acquire just two nodes
216
      self.needed_locks = {
217
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218
      }
219
      # Acquire no locks
220
      self.needed_locks = {} # No, you can't leave it to the default value None
221

222
    """
223
    # The implementation of this method is mandatory only if the new LU is
224
    # concurrent, so that old LUs don't need to be changed all at the same
225
    # time.
226
    if self.REQ_BGL:
227
      self.needed_locks = {} # Exclusive LUs don't need locks.
228
    else:
229
      raise NotImplementedError
230

    
231
  def DeclareLocks(self, level):
232
    """Declare LU locking needs for a level
233

234
    While most LUs can just declare their locking needs at ExpandNames time,
235
    sometimes there's the need to calculate some locks after having acquired
236
    the ones before. This function is called just before acquiring locks at a
237
    particular level, but after acquiring the ones at lower levels, and permits
238
    such calculations. It can be used to modify self.needed_locks, and by
239
    default it does nothing.
240

241
    This function is only called if you have something already set in
242
    self.needed_locks for the level.
243

244
    @param level: Locking level which is going to be locked
245
    @type level: member of ganeti.locking.LEVELS
246

247
    """
248

    
249
  def CheckPrereq(self):
250
    """Check prerequisites for this LU.
251

252
    This method should check that the prerequisites for the execution
253
    of this LU are fulfilled. It can do internode communication, but
254
    it should be idempotent - no cluster or system changes are
255
    allowed.
256

257
    The method should raise errors.OpPrereqError in case something is
258
    not fulfilled. Its return value is ignored.
259

260
    This method should also update all the parameters of the opcode to
261
    their canonical form if it hasn't been done by ExpandNames before.
262

263
    """
264
    if self.tasklets is not None:
265
      for (idx, tl) in enumerate(self.tasklets):
266
        logging.debug("Checking prerequisites for tasklet %s/%s",
267
                      idx + 1, len(self.tasklets))
268
        tl.CheckPrereq()
269
    else:
270
      pass
271

    
272
  def Exec(self, feedback_fn):
273
    """Execute the LU.
274

275
    This method should implement the actual work. It should raise
276
    errors.OpExecError for failures that are somewhat dealt with in
277
    code, or expected.
278

279
    """
280
    if self.tasklets is not None:
281
      for (idx, tl) in enumerate(self.tasklets):
282
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283
        tl.Exec(feedback_fn)
284
    else:
285
      raise NotImplementedError
286

    
287
  def BuildHooksEnv(self):
288
    """Build hooks environment for this LU.
289

290
    @rtype: dict
291
    @return: Dictionary containing the environment that will be used for
292
      running the hooks for this LU. The keys of the dict must not be prefixed
293
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294
      will extend the environment with additional variables. If no environment
295
      should be defined, an empty dictionary should be returned (not C{None}).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def BuildHooksNodes(self):
303
    """Build list of nodes to run LU's hooks.
304

305
    @rtype: tuple; (list, list)
306
    @return: Tuple containing a list of node names on which the hook
307
      should run before the execution and a list of node names on which the
308
      hook should run after the execution. No nodes should be returned as an
309
      empty list (and not None).
310
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311
      will not be called.
312

313
    """
314
    raise NotImplementedError
315

    
316
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317
    """Notify the LU about the results of its hooks.
318

319
    This method is called every time a hooks phase is executed, and notifies
320
    the Logical Unit about the hooks' result. The LU can then use it to alter
321
    its result based on the hooks.  By default the method does nothing and the
322
    previous result is passed back unchanged but any LU can define it if it
323
    wants to use the local cluster hook-scripts somehow.
324

325
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
326
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327
    @param hook_results: the results of the multi-node hooks rpc call
328
    @param feedback_fn: function used send feedback back to the caller
329
    @param lu_result: the previous Exec result this LU had, or None
330
        in the PRE phase
331
    @return: the new Exec result, based on the previous result
332
        and hook results
333

334
    """
335
    # API must be kept, thus we ignore the unused argument and could
336
    # be a function warnings
337
    # pylint: disable=W0613,R0201
338
    return lu_result
339

    
340
  def _ExpandAndLockInstance(self):
341
    """Helper function to expand and lock an instance.
342

343
    Many LUs that work on an instance take its name in self.op.instance_name
344
    and need to expand it and then declare the expanded name for locking. This
345
    function does it, and then updates self.op.instance_name to the expanded
346
    name. It also initializes needed_locks as a dict, if this hasn't been done
347
    before.
348

349
    """
350
    if self.needed_locks is None:
351
      self.needed_locks = {}
352
    else:
353
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354
        "_ExpandAndLockInstance called with instance-level locks set"
355
    self.op.instance_name = _ExpandInstanceName(self.cfg,
356
                                                self.op.instance_name)
357
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358

    
359
  def _LockInstancesNodes(self, primary_only=False,
360
                          level=locking.LEVEL_NODE):
361
    """Helper function to declare instances' nodes for locking.
362

363
    This function should be called after locking one or more instances to lock
364
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365
    with all primary or secondary nodes for instances already locked and
366
    present in self.needed_locks[locking.LEVEL_INSTANCE].
367

368
    It should be called from DeclareLocks, and for safety only works if
369
    self.recalculate_locks[locking.LEVEL_NODE] is set.
370

371
    In the future it may grow parameters to just lock some instance's nodes, or
372
    to just lock primaries or secondary nodes, if needed.
373

374
    If should be called in DeclareLocks in a way similar to::
375

376
      if level == locking.LEVEL_NODE:
377
        self._LockInstancesNodes()
378

379
    @type primary_only: boolean
380
    @param primary_only: only lock primary nodes of locked instances
381
    @param level: Which lock level to use for locking nodes
382

383
    """
384
    assert level in self.recalculate_locks, \
385
      "_LockInstancesNodes helper function called with no nodes to recalculate"
386

    
387
    # TODO: check if we're really been called with the instance locks held
388

    
389
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390
    # future we might want to have different behaviors depending on the value
391
    # of self.recalculate_locks[locking.LEVEL_NODE]
392
    wanted_nodes = []
393
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395
      wanted_nodes.append(instance.primary_node)
396
      if not primary_only:
397
        wanted_nodes.extend(instance.secondary_nodes)
398

    
399
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400
      self.needed_locks[level] = wanted_nodes
401
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402
      self.needed_locks[level].extend(wanted_nodes)
403
    else:
404
      raise errors.ProgrammerError("Unknown recalculation mode")
405

    
406
    del self.recalculate_locks[level]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
426

    
427
  def BuildHooksNodes(self):
428
    """Empty BuildHooksNodes for NoHooksLU.
429

430
    """
431
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
432

    
433

    
434
class Tasklet:
435
  """Tasklet base class.
436

437
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
439
  tasklets know nothing about locks.
440

441
  Subclasses must follow these rules:
442
    - Implement CheckPrereq
443
    - Implement Exec
444

445
  """
446
  def __init__(self, lu):
447
    self.lu = lu
448

    
449
    # Shortcuts
450
    self.cfg = lu.cfg
451
    self.rpc = lu.rpc
452

    
453
  def CheckPrereq(self):
454
    """Check prerequisites for this tasklets.
455

456
    This method should check whether the prerequisites for the execution of
457
    this tasklet are fulfilled. It can do internode communication, but it
458
    should be idempotent - no cluster or system changes are allowed.
459

460
    The method should raise errors.OpPrereqError in case something is not
461
    fulfilled. Its return value is ignored.
462

463
    This method should also update all parameters to their canonical form if it
464
    hasn't been done before.
465

466
    """
467
    pass
468

    
469
  def Exec(self, feedback_fn):
470
    """Execute the tasklet.
471

472
    This method should implement the actual work. It should raise
473
    errors.OpExecError for failures that are somewhat dealt with in code, or
474
    expected.
475

476
    """
477
    raise NotImplementedError
478

    
479

    
480
class _QueryBase:
481
  """Base for query utility classes.
482

483
  """
484
  #: Attribute holding field definitions
485
  FIELDS = None
486

    
487
  def __init__(self, qfilter, fields, use_locking):
488
    """Initializes this class.
489

490
    """
491
    self.use_locking = use_locking
492

    
493
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
494
                             namefield="name")
495
    self.requested_data = self.query.RequestedData()
496
    self.names = self.query.RequestedNames()
497

    
498
    # Sort only if no names were requested
499
    self.sort_by_name = not self.names
500

    
501
    self.do_locking = None
502
    self.wanted = None
503

    
504
  def _GetNames(self, lu, all_names, lock_level):
505
    """Helper function to determine names asked for in the query.
506

507
    """
508
    if self.do_locking:
509
      names = lu.owned_locks(lock_level)
510
    else:
511
      names = all_names
512

    
513
    if self.wanted == locking.ALL_SET:
514
      assert not self.names
515
      # caller didn't specify names, so ordering is not important
516
      return utils.NiceSort(names)
517

    
518
    # caller specified names and we must keep the same order
519
    assert self.names
520
    assert not self.do_locking or lu.glm.is_owned(lock_level)
521

    
522
    missing = set(self.wanted).difference(names)
523
    if missing:
524
      raise errors.OpExecError("Some items were removed before retrieving"
525
                               " their data: %s" % missing)
526

    
527
    # Return expanded names
528
    return self.wanted
529

    
530
  def ExpandNames(self, lu):
531
    """Expand names for this query.
532

533
    See L{LogicalUnit.ExpandNames}.
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def DeclareLocks(self, lu, level):
539
    """Declare locks for this query.
540

541
    See L{LogicalUnit.DeclareLocks}.
542

543
    """
544
    raise NotImplementedError()
545

    
546
  def _GetQueryData(self, lu):
547
    """Collects all data for this query.
548

549
    @return: Query data object
550

551
    """
552
    raise NotImplementedError()
553

    
554
  def NewStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559
                                  sort_by_name=self.sort_by_name)
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu),
566
                                    sort_by_name=self.sort_by_name)
567

    
568

    
569
def _ShareAll():
570
  """Returns a dict declaring all lock levels shared.
571

572
  """
573
  return dict.fromkeys(locking.LEVELS, 1)
574

    
575

    
576
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
577
  """Checks if the owned node groups are still correct for an instance.
578

579
  @type cfg: L{config.ConfigWriter}
580
  @param cfg: The cluster configuration
581
  @type instance_name: string
582
  @param instance_name: Instance name
583
  @type owned_groups: set or frozenset
584
  @param owned_groups: List of currently owned node groups
585

586
  """
587
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
588

    
589
  if not owned_groups.issuperset(inst_groups):
590
    raise errors.OpPrereqError("Instance %s's node groups changed since"
591
                               " locks were acquired, current groups are"
592
                               " are '%s', owning groups '%s'; retry the"
593
                               " operation" %
594
                               (instance_name,
595
                                utils.CommaJoin(inst_groups),
596
                                utils.CommaJoin(owned_groups)),
597
                               errors.ECODE_STATE)
598

    
599
  return inst_groups
600

    
601

    
602
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
603
  """Checks if the instances in a node group are still correct.
604

605
  @type cfg: L{config.ConfigWriter}
606
  @param cfg: The cluster configuration
607
  @type group_uuid: string
608
  @param group_uuid: Node group UUID
609
  @type owned_instances: set or frozenset
610
  @param owned_instances: List of currently owned instances
611

612
  """
613
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
614
  if owned_instances != wanted_instances:
615
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
616
                               " locks were acquired, wanted '%s', have '%s';"
617
                               " retry the operation" %
618
                               (group_uuid,
619
                                utils.CommaJoin(wanted_instances),
620
                                utils.CommaJoin(owned_instances)),
621
                               errors.ECODE_STATE)
622

    
623
  return wanted_instances
624

    
625

    
626
def _SupportsOob(cfg, node):
627
  """Tells if node supports OOB.
628

629
  @type cfg: L{config.ConfigWriter}
630
  @param cfg: The cluster configuration
631
  @type node: L{objects.Node}
632
  @param node: The node
633
  @return: The OOB script if supported or an empty string otherwise
634

635
  """
636
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
637

    
638

    
639
def _GetWantedNodes(lu, nodes):
640
  """Returns list of checked and expanded node names.
641

642
  @type lu: L{LogicalUnit}
643
  @param lu: the logical unit on whose behalf we execute
644
  @type nodes: list
645
  @param nodes: list of node names or None for all nodes
646
  @rtype: list
647
  @return: the list of nodes, sorted
648
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
649

650
  """
651
  if nodes:
652
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
653

    
654
  return utils.NiceSort(lu.cfg.GetNodeList())
655

    
656

    
657
def _GetWantedInstances(lu, instances):
658
  """Returns list of checked and expanded instance names.
659

660
  @type lu: L{LogicalUnit}
661
  @param lu: the logical unit on whose behalf we execute
662
  @type instances: list
663
  @param instances: list of instance names or None for all instances
664
  @rtype: list
665
  @return: the list of instances, sorted
666
  @raise errors.OpPrereqError: if the instances parameter is wrong type
667
  @raise errors.OpPrereqError: if any of the passed instances is not found
668

669
  """
670
  if instances:
671
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
672
  else:
673
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
674
  return wanted
675

    
676

    
677
def _GetUpdatedParams(old_params, update_dict,
678
                      use_default=True, use_none=False):
679
  """Return the new version of a parameter dictionary.
680

681
  @type old_params: dict
682
  @param old_params: old parameters
683
  @type update_dict: dict
684
  @param update_dict: dict containing new parameter values, or
685
      constants.VALUE_DEFAULT to reset the parameter to its default
686
      value
687
  @param use_default: boolean
688
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
689
      values as 'to be deleted' values
690
  @param use_none: boolean
691
  @type use_none: whether to recognise C{None} values as 'to be
692
      deleted' values
693
  @rtype: dict
694
  @return: the new parameter dictionary
695

696
  """
697
  params_copy = copy.deepcopy(old_params)
698
  for key, val in update_dict.iteritems():
699
    if ((use_default and val == constants.VALUE_DEFAULT) or
700
        (use_none and val is None)):
701
      try:
702
        del params_copy[key]
703
      except KeyError:
704
        pass
705
    else:
706
      params_copy[key] = val
707
  return params_copy
708

    
709

    
710
def _ReleaseLocks(lu, level, names=None, keep=None):
711
  """Releases locks owned by an LU.
712

713
  @type lu: L{LogicalUnit}
714
  @param level: Lock level
715
  @type names: list or None
716
  @param names: Names of locks to release
717
  @type keep: list or None
718
  @param keep: Names of locks to retain
719

720
  """
721
  assert not (keep is not None and names is not None), \
722
         "Only one of the 'names' and the 'keep' parameters can be given"
723

    
724
  if names is not None:
725
    should_release = names.__contains__
726
  elif keep:
727
    should_release = lambda name: name not in keep
728
  else:
729
    should_release = None
730

    
731
  owned = lu.owned_locks(level)
732
  if not owned:
733
    # Not owning any lock at this level, do nothing
734
    pass
735

    
736
  elif should_release:
737
    retain = []
738
    release = []
739

    
740
    # Determine which locks to release
741
    for name in owned:
742
      if should_release(name):
743
        release.append(name)
744
      else:
745
        retain.append(name)
746

    
747
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
748

    
749
    # Release just some locks
750
    lu.glm.release(level, names=release)
751

    
752
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
753
  else:
754
    # Release everything
755
    lu.glm.release(level)
756

    
757
    assert not lu.glm.is_owned(level), "No locks should be owned"
758

    
759

    
760
def _MapInstanceDisksToNodes(instances):
761
  """Creates a map from (node, volume) to instance name.
762

763
  @type instances: list of L{objects.Instance}
764
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
765

766
  """
767
  return dict(((node, vol), inst.name)
768
              for inst in instances
769
              for (node, vols) in inst.MapLVsByNode().items()
770
              for vol in vols)
771

    
772

    
773
def _RunPostHook(lu, node_name):
774
  """Runs the post-hook for an opcode on a single node.
775

776
  """
777
  hm = lu.proc.BuildHooksManager(lu)
778
  try:
779
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
780
  except:
781
    # pylint: disable=W0702
782
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
783

    
784

    
785
def _CheckOutputFields(static, dynamic, selected):
786
  """Checks whether all selected fields are valid.
787

788
  @type static: L{utils.FieldSet}
789
  @param static: static fields set
790
  @type dynamic: L{utils.FieldSet}
791
  @param dynamic: dynamic fields set
792

793
  """
794
  f = utils.FieldSet()
795
  f.Extend(static)
796
  f.Extend(dynamic)
797

    
798
  delta = f.NonMatching(selected)
799
  if delta:
800
    raise errors.OpPrereqError("Unknown output fields selected: %s"
801
                               % ",".join(delta), errors.ECODE_INVAL)
802

    
803

    
804
def _CheckGlobalHvParams(params):
805
  """Validates that given hypervisor params are not global ones.
806

807
  This will ensure that instances don't get customised versions of
808
  global params.
809

810
  """
811
  used_globals = constants.HVC_GLOBALS.intersection(params)
812
  if used_globals:
813
    msg = ("The following hypervisor parameters are global and cannot"
814
           " be customized at instance level, please modify them at"
815
           " cluster level: %s" % utils.CommaJoin(used_globals))
816
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
817

    
818

    
819
def _CheckNodeOnline(lu, node, msg=None):
820
  """Ensure that a given node is online.
821

822
  @param lu: the LU on behalf of which we make the check
823
  @param node: the node to check
824
  @param msg: if passed, should be a message to replace the default one
825
  @raise errors.OpPrereqError: if the node is offline
826

827
  """
828
  if msg is None:
829
    msg = "Can't use offline node"
830
  if lu.cfg.GetNodeInfo(node).offline:
831
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
832

    
833

    
834
def _CheckNodeNotDrained(lu, node):
835
  """Ensure that a given node is not drained.
836

837
  @param lu: the LU on behalf of which we make the check
838
  @param node: the node to check
839
  @raise errors.OpPrereqError: if the node is drained
840

841
  """
842
  if lu.cfg.GetNodeInfo(node).drained:
843
    raise errors.OpPrereqError("Can't use drained node %s" % node,
844
                               errors.ECODE_STATE)
845

    
846

    
847
def _CheckNodeVmCapable(lu, node):
848
  """Ensure that a given node is vm capable.
849

850
  @param lu: the LU on behalf of which we make the check
851
  @param node: the node to check
852
  @raise errors.OpPrereqError: if the node is not vm capable
853

854
  """
855
  if not lu.cfg.GetNodeInfo(node).vm_capable:
856
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
857
                               errors.ECODE_STATE)
858

    
859

    
860
def _CheckNodeHasOS(lu, node, os_name, force_variant):
861
  """Ensure that a node supports a given OS.
862

863
  @param lu: the LU on behalf of which we make the check
864
  @param node: the node to check
865
  @param os_name: the OS to query about
866
  @param force_variant: whether to ignore variant errors
867
  @raise errors.OpPrereqError: if the node is not supporting the OS
868

869
  """
870
  result = lu.rpc.call_os_get(node, os_name)
871
  result.Raise("OS '%s' not in supported OS list for node %s" %
872
               (os_name, node),
873
               prereq=True, ecode=errors.ECODE_INVAL)
874
  if not force_variant:
875
    _CheckOSVariant(result.payload, os_name)
876

    
877

    
878
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
879
  """Ensure that a node has the given secondary ip.
880

881
  @type lu: L{LogicalUnit}
882
  @param lu: the LU on behalf of which we make the check
883
  @type node: string
884
  @param node: the node to check
885
  @type secondary_ip: string
886
  @param secondary_ip: the ip to check
887
  @type prereq: boolean
888
  @param prereq: whether to throw a prerequisite or an execute error
889
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
890
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
891

892
  """
893
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
894
  result.Raise("Failure checking secondary ip on node %s" % node,
895
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
896
  if not result.payload:
897
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
898
           " please fix and re-run this command" % secondary_ip)
899
    if prereq:
900
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
901
    else:
902
      raise errors.OpExecError(msg)
903

    
904

    
905
def _GetClusterDomainSecret():
906
  """Reads the cluster domain secret.
907

908
  """
909
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
910
                               strict=True)
911

    
912

    
913
def _CheckInstanceState(lu, instance, req_states, msg=None):
914
  """Ensure that an instance is in one of the required states.
915

916
  @param lu: the LU on behalf of which we make the check
917
  @param instance: the instance to check
918
  @param msg: if passed, should be a message to replace the default one
919
  @raise errors.OpPrereqError: if the instance is not in the required state
920

921
  """
922
  if msg is None:
923
    msg = "can't use instance from outside %s states" % ", ".join(req_states)
924
  if instance.admin_state not in req_states:
925
    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
926
                               (instance, instance.admin_state, msg),
927
                               errors.ECODE_STATE)
928

    
929
  if constants.ADMINST_UP not in req_states:
930
    pnode = instance.primary_node
931
    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
932
    ins_l.Raise("Can't contact node %s for instance information" % pnode,
933
                prereq=True, ecode=errors.ECODE_ENVIRON)
934

    
935
    if instance.name in ins_l.payload:
936
      raise errors.OpPrereqError("Instance %s is running, %s" %
937
                                 (instance.name, msg), errors.ECODE_STATE)
938

    
939

    
940
def _ExpandItemName(fn, name, kind):
941
  """Expand an item name.
942

943
  @param fn: the function to use for expansion
944
  @param name: requested item name
945
  @param kind: text description ('Node' or 'Instance')
946
  @return: the resolved (full) name
947
  @raise errors.OpPrereqError: if the item is not found
948

949
  """
950
  full_name = fn(name)
951
  if full_name is None:
952
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
953
                               errors.ECODE_NOENT)
954
  return full_name
955

    
956

    
957
def _ExpandNodeName(cfg, name):
958
  """Wrapper over L{_ExpandItemName} for nodes."""
959
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
960

    
961

    
962
def _ExpandInstanceName(cfg, name):
963
  """Wrapper over L{_ExpandItemName} for instance."""
964
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
965

    
966

    
967
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
968
                          minmem, maxmem, vcpus, nics, disk_template, disks,
969
                          bep, hvp, hypervisor_name, tags):
970
  """Builds instance related env variables for hooks
971

972
  This builds the hook environment from individual variables.
973

974
  @type name: string
975
  @param name: the name of the instance
976
  @type primary_node: string
977
  @param primary_node: the name of the instance's primary node
978
  @type secondary_nodes: list
979
  @param secondary_nodes: list of secondary nodes as strings
980
  @type os_type: string
981
  @param os_type: the name of the instance's OS
982
  @type status: string
983
  @param status: the desired status of the instance
984
  @type minmem: string
985
  @param minmem: the minimum memory size of the instance
986
  @type maxmem: string
987
  @param maxmem: the maximum memory size of the instance
988
  @type vcpus: string
989
  @param vcpus: the count of VCPUs the instance has
990
  @type nics: list
991
  @param nics: list of tuples (ip, mac, mode, link) representing
992
      the NICs the instance has
993
  @type disk_template: string
994
  @param disk_template: the disk template of the instance
995
  @type disks: list
996
  @param disks: the list of (size, mode) pairs
997
  @type bep: dict
998
  @param bep: the backend parameters for the instance
999
  @type hvp: dict
1000
  @param hvp: the hypervisor parameters for the instance
1001
  @type hypervisor_name: string
1002
  @param hypervisor_name: the hypervisor for the instance
1003
  @type tags: list
1004
  @param tags: list of instance tags as strings
1005
  @rtype: dict
1006
  @return: the hook environment for this instance
1007

1008
  """
1009
  env = {
1010
    "OP_TARGET": name,
1011
    "INSTANCE_NAME": name,
1012
    "INSTANCE_PRIMARY": primary_node,
1013
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1014
    "INSTANCE_OS_TYPE": os_type,
1015
    "INSTANCE_STATUS": status,
1016
    "INSTANCE_MINMEM": minmem,
1017
    "INSTANCE_MAXMEM": maxmem,
1018
    # TODO(2.7) remove deprecated "memory" value
1019
    "INSTANCE_MEMORY": maxmem,
1020
    "INSTANCE_VCPUS": vcpus,
1021
    "INSTANCE_DISK_TEMPLATE": disk_template,
1022
    "INSTANCE_HYPERVISOR": hypervisor_name,
1023
  }
1024
  if nics:
1025
    nic_count = len(nics)
1026
    for idx, (ip, mac, mode, link) in enumerate(nics):
1027
      if ip is None:
1028
        ip = ""
1029
      env["INSTANCE_NIC%d_IP" % idx] = ip
1030
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1031
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1032
      env["INSTANCE_NIC%d_LINK" % idx] = link
1033
      if mode == constants.NIC_MODE_BRIDGED:
1034
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1035
  else:
1036
    nic_count = 0
1037

    
1038
  env["INSTANCE_NIC_COUNT"] = nic_count
1039

    
1040
  if disks:
1041
    disk_count = len(disks)
1042
    for idx, (size, mode) in enumerate(disks):
1043
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1044
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1045
  else:
1046
    disk_count = 0
1047

    
1048
  env["INSTANCE_DISK_COUNT"] = disk_count
1049

    
1050
  if not tags:
1051
    tags = []
1052

    
1053
  env["INSTANCE_TAGS"] = " ".join(tags)
1054

    
1055
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1056
    for key, value in source.items():
1057
      env["INSTANCE_%s_%s" % (kind, key)] = value
1058

    
1059
  return env
1060

    
1061

    
1062
def _NICListToTuple(lu, nics):
1063
  """Build a list of nic information tuples.
1064

1065
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1066
  value in LUInstanceQueryData.
1067

1068
  @type lu:  L{LogicalUnit}
1069
  @param lu: the logical unit on whose behalf we execute
1070
  @type nics: list of L{objects.NIC}
1071
  @param nics: list of nics to convert to hooks tuples
1072

1073
  """
1074
  hooks_nics = []
1075
  cluster = lu.cfg.GetClusterInfo()
1076
  for nic in nics:
1077
    ip = nic.ip
1078
    mac = nic.mac
1079
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1080
    mode = filled_params[constants.NIC_MODE]
1081
    link = filled_params[constants.NIC_LINK]
1082
    hooks_nics.append((ip, mac, mode, link))
1083
  return hooks_nics
1084

    
1085

    
1086
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1087
  """Builds instance related env variables for hooks from an object.
1088

1089
  @type lu: L{LogicalUnit}
1090
  @param lu: the logical unit on whose behalf we execute
1091
  @type instance: L{objects.Instance}
1092
  @param instance: the instance for which we should build the
1093
      environment
1094
  @type override: dict
1095
  @param override: dictionary with key/values that will override
1096
      our values
1097
  @rtype: dict
1098
  @return: the hook environment dictionary
1099

1100
  """
1101
  cluster = lu.cfg.GetClusterInfo()
1102
  bep = cluster.FillBE(instance)
1103
  hvp = cluster.FillHV(instance)
1104
  args = {
1105
    "name": instance.name,
1106
    "primary_node": instance.primary_node,
1107
    "secondary_nodes": instance.secondary_nodes,
1108
    "os_type": instance.os,
1109
    "status": instance.admin_state,
1110
    "maxmem": bep[constants.BE_MAXMEM],
1111
    "minmem": bep[constants.BE_MINMEM],
1112
    "vcpus": bep[constants.BE_VCPUS],
1113
    "nics": _NICListToTuple(lu, instance.nics),
1114
    "disk_template": instance.disk_template,
1115
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1116
    "bep": bep,
1117
    "hvp": hvp,
1118
    "hypervisor_name": instance.hypervisor,
1119
    "tags": instance.tags,
1120
  }
1121
  if override:
1122
    args.update(override)
1123
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1124

    
1125

    
1126
def _AdjustCandidatePool(lu, exceptions):
1127
  """Adjust the candidate pool after node operations.
1128

1129
  """
1130
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1131
  if mod_list:
1132
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1133
               utils.CommaJoin(node.name for node in mod_list))
1134
    for name in mod_list:
1135
      lu.context.ReaddNode(name)
1136
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1137
  if mc_now > mc_max:
1138
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1139
               (mc_now, mc_max))
1140

    
1141

    
1142
def _DecideSelfPromotion(lu, exceptions=None):
1143
  """Decide whether I should promote myself as a master candidate.
1144

1145
  """
1146
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1147
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1148
  # the new node will increase mc_max with one, so:
1149
  mc_should = min(mc_should + 1, cp_size)
1150
  return mc_now < mc_should
1151

    
1152

    
1153
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1154
  """Check that the brigdes needed by a list of nics exist.
1155

1156
  """
1157
  cluster = lu.cfg.GetClusterInfo()
1158
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1159
  brlist = [params[constants.NIC_LINK] for params in paramslist
1160
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1161
  if brlist:
1162
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1163
    result.Raise("Error checking bridges on destination node '%s'" %
1164
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1165

    
1166

    
1167
def _CheckInstanceBridgesExist(lu, instance, node=None):
1168
  """Check that the brigdes needed by an instance exist.
1169

1170
  """
1171
  if node is None:
1172
    node = instance.primary_node
1173
  _CheckNicsBridgesExist(lu, instance.nics, node)
1174

    
1175

    
1176
def _CheckOSVariant(os_obj, name):
1177
  """Check whether an OS name conforms to the os variants specification.
1178

1179
  @type os_obj: L{objects.OS}
1180
  @param os_obj: OS object to check
1181
  @type name: string
1182
  @param name: OS name passed by the user, to check for validity
1183

1184
  """
1185
  variant = objects.OS.GetVariant(name)
1186
  if not os_obj.supported_variants:
1187
    if variant:
1188
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1189
                                 " passed)" % (os_obj.name, variant),
1190
                                 errors.ECODE_INVAL)
1191
    return
1192
  if not variant:
1193
    raise errors.OpPrereqError("OS name must include a variant",
1194
                               errors.ECODE_INVAL)
1195

    
1196
  if variant not in os_obj.supported_variants:
1197
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1198

    
1199

    
1200
def _GetNodeInstancesInner(cfg, fn):
1201
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1202

    
1203

    
1204
def _GetNodeInstances(cfg, node_name):
1205
  """Returns a list of all primary and secondary instances on a node.
1206

1207
  """
1208

    
1209
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1210

    
1211

    
1212
def _GetNodePrimaryInstances(cfg, node_name):
1213
  """Returns primary instances on a node.
1214

1215
  """
1216
  return _GetNodeInstancesInner(cfg,
1217
                                lambda inst: node_name == inst.primary_node)
1218

    
1219

    
1220
def _GetNodeSecondaryInstances(cfg, node_name):
1221
  """Returns secondary instances on a node.
1222

1223
  """
1224
  return _GetNodeInstancesInner(cfg,
1225
                                lambda inst: node_name in inst.secondary_nodes)
1226

    
1227

    
1228
def _GetStorageTypeArgs(cfg, storage_type):
1229
  """Returns the arguments for a storage type.
1230

1231
  """
1232
  # Special case for file storage
1233
  if storage_type == constants.ST_FILE:
1234
    # storage.FileStorage wants a list of storage directories
1235
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1236

    
1237
  return []
1238

    
1239

    
1240
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1241
  faulty = []
1242

    
1243
  for dev in instance.disks:
1244
    cfg.SetDiskID(dev, node_name)
1245

    
1246
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1247
  result.Raise("Failed to get disk status from node %s" % node_name,
1248
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1249

    
1250
  for idx, bdev_status in enumerate(result.payload):
1251
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1252
      faulty.append(idx)
1253

    
1254
  return faulty
1255

    
1256

    
1257
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1258
  """Check the sanity of iallocator and node arguments and use the
1259
  cluster-wide iallocator if appropriate.
1260

1261
  Check that at most one of (iallocator, node) is specified. If none is
1262
  specified, then the LU's opcode's iallocator slot is filled with the
1263
  cluster-wide default iallocator.
1264

1265
  @type iallocator_slot: string
1266
  @param iallocator_slot: the name of the opcode iallocator slot
1267
  @type node_slot: string
1268
  @param node_slot: the name of the opcode target node slot
1269

1270
  """
1271
  node = getattr(lu.op, node_slot, None)
1272
  iallocator = getattr(lu.op, iallocator_slot, None)
1273

    
1274
  if node is not None and iallocator is not None:
1275
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1276
                               errors.ECODE_INVAL)
1277
  elif node is None and iallocator is None:
1278
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1279
    if default_iallocator:
1280
      setattr(lu.op, iallocator_slot, default_iallocator)
1281
    else:
1282
      raise errors.OpPrereqError("No iallocator or node given and no"
1283
                                 " cluster-wide default iallocator found;"
1284
                                 " please specify either an iallocator or a"
1285
                                 " node, or set a cluster-wide default"
1286
                                 " iallocator")
1287

    
1288

    
1289
def _GetDefaultIAllocator(cfg, iallocator):
1290
  """Decides on which iallocator to use.
1291

1292
  @type cfg: L{config.ConfigWriter}
1293
  @param cfg: Cluster configuration object
1294
  @type iallocator: string or None
1295
  @param iallocator: Iallocator specified in opcode
1296
  @rtype: string
1297
  @return: Iallocator name
1298

1299
  """
1300
  if not iallocator:
1301
    # Use default iallocator
1302
    iallocator = cfg.GetDefaultIAllocator()
1303

    
1304
  if not iallocator:
1305
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1306
                               " opcode nor as a cluster-wide default",
1307
                               errors.ECODE_INVAL)
1308

    
1309
  return iallocator
1310

    
1311

    
1312
class LUClusterPostInit(LogicalUnit):
1313
  """Logical unit for running hooks after cluster initialization.
1314

1315
  """
1316
  HPATH = "cluster-init"
1317
  HTYPE = constants.HTYPE_CLUSTER
1318

    
1319
  def BuildHooksEnv(self):
1320
    """Build hooks env.
1321

1322
    """
1323
    return {
1324
      "OP_TARGET": self.cfg.GetClusterName(),
1325
      }
1326

    
1327
  def BuildHooksNodes(self):
1328
    """Build hooks nodes.
1329

1330
    """
1331
    return ([], [self.cfg.GetMasterNode()])
1332

    
1333
  def Exec(self, feedback_fn):
1334
    """Nothing to do.
1335

1336
    """
1337
    return True
1338

    
1339

    
1340
class LUClusterDestroy(LogicalUnit):
1341
  """Logical unit for destroying the cluster.
1342

1343
  """
1344
  HPATH = "cluster-destroy"
1345
  HTYPE = constants.HTYPE_CLUSTER
1346

    
1347
  def BuildHooksEnv(self):
1348
    """Build hooks env.
1349

1350
    """
1351
    return {
1352
      "OP_TARGET": self.cfg.GetClusterName(),
1353
      }
1354

    
1355
  def BuildHooksNodes(self):
1356
    """Build hooks nodes.
1357

1358
    """
1359
    return ([], [])
1360

    
1361
  def CheckPrereq(self):
1362
    """Check prerequisites.
1363

1364
    This checks whether the cluster is empty.
1365

1366
    Any errors are signaled by raising errors.OpPrereqError.
1367

1368
    """
1369
    master = self.cfg.GetMasterNode()
1370

    
1371
    nodelist = self.cfg.GetNodeList()
1372
    if len(nodelist) != 1 or nodelist[0] != master:
1373
      raise errors.OpPrereqError("There are still %d node(s) in"
1374
                                 " this cluster." % (len(nodelist) - 1),
1375
                                 errors.ECODE_INVAL)
1376
    instancelist = self.cfg.GetInstanceList()
1377
    if instancelist:
1378
      raise errors.OpPrereqError("There are still %d instance(s) in"
1379
                                 " this cluster." % len(instancelist),
1380
                                 errors.ECODE_INVAL)
1381

    
1382
  def Exec(self, feedback_fn):
1383
    """Destroys the cluster.
1384

1385
    """
1386
    master_params = self.cfg.GetMasterNetworkParameters()
1387

    
1388
    # Run post hooks on master node before it's removed
1389
    _RunPostHook(self, master_params.name)
1390

    
1391
    ems = self.cfg.GetUseExternalMipScript()
1392
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1393
                                                     master_params, ems)
1394
    result.Raise("Could not disable the master role")
1395

    
1396
    return master_params.name
1397

    
1398

    
1399
def _VerifyCertificate(filename):
1400
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1401

1402
  @type filename: string
1403
  @param filename: Path to PEM file
1404

1405
  """
1406
  try:
1407
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1408
                                           utils.ReadFile(filename))
1409
  except Exception, err: # pylint: disable=W0703
1410
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1411
            "Failed to load X509 certificate %s: %s" % (filename, err))
1412

    
1413
  (errcode, msg) = \
1414
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1415
                                constants.SSL_CERT_EXPIRATION_ERROR)
1416

    
1417
  if msg:
1418
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1419
  else:
1420
    fnamemsg = None
1421

    
1422
  if errcode is None:
1423
    return (None, fnamemsg)
1424
  elif errcode == utils.CERT_WARNING:
1425
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1426
  elif errcode == utils.CERT_ERROR:
1427
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1428

    
1429
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1430

    
1431

    
1432
def _GetAllHypervisorParameters(cluster, instances):
1433
  """Compute the set of all hypervisor parameters.
1434

1435
  @type cluster: L{objects.Cluster}
1436
  @param cluster: the cluster object
1437
  @param instances: list of L{objects.Instance}
1438
  @param instances: additional instances from which to obtain parameters
1439
  @rtype: list of (origin, hypervisor, parameters)
1440
  @return: a list with all parameters found, indicating the hypervisor they
1441
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1442

1443
  """
1444
  hvp_data = []
1445

    
1446
  for hv_name in cluster.enabled_hypervisors:
1447
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1448

    
1449
  for os_name, os_hvp in cluster.os_hvp.items():
1450
    for hv_name, hv_params in os_hvp.items():
1451
      if hv_params:
1452
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1453
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1454

    
1455
  # TODO: collapse identical parameter values in a single one
1456
  for instance in instances:
1457
    if instance.hvparams:
1458
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1459
                       cluster.FillHV(instance)))
1460

    
1461
  return hvp_data
1462

    
1463

    
1464
class _VerifyErrors(object):
1465
  """Mix-in for cluster/group verify LUs.
1466

1467
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1468
  self.op and self._feedback_fn to be available.)
1469

1470
  """
1471

    
1472
  ETYPE_FIELD = "code"
1473
  ETYPE_ERROR = "ERROR"
1474
  ETYPE_WARNING = "WARNING"
1475

    
1476
  def _Error(self, ecode, item, msg, *args, **kwargs):
1477
    """Format an error message.
1478

1479
    Based on the opcode's error_codes parameter, either format a
1480
    parseable error code, or a simpler error string.
1481

1482
    This must be called only from Exec and functions called from Exec.
1483

1484
    """
1485
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1486
    itype, etxt, _ = ecode
1487
    # first complete the msg
1488
    if args:
1489
      msg = msg % args
1490
    # then format the whole message
1491
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1492
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1493
    else:
1494
      if item:
1495
        item = " " + item
1496
      else:
1497
        item = ""
1498
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1499
    # and finally report it via the feedback_fn
1500
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1501

    
1502
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1503
    """Log an error message if the passed condition is True.
1504

1505
    """
1506
    cond = (bool(cond)
1507
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1508

    
1509
    # If the error code is in the list of ignored errors, demote the error to a
1510
    # warning
1511
    (_, etxt, _) = ecode
1512
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1513
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1514

    
1515
    if cond:
1516
      self._Error(ecode, *args, **kwargs)
1517

    
1518
    # do not mark the operation as failed for WARN cases only
1519
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1520
      self.bad = self.bad or cond
1521

    
1522

    
1523
class LUClusterVerify(NoHooksLU):
1524
  """Submits all jobs necessary to verify the cluster.
1525

1526
  """
1527
  REQ_BGL = False
1528

    
1529
  def ExpandNames(self):
1530
    self.needed_locks = {}
1531

    
1532
  def Exec(self, feedback_fn):
1533
    jobs = []
1534

    
1535
    if self.op.group_name:
1536
      groups = [self.op.group_name]
1537
      depends_fn = lambda: None
1538
    else:
1539
      groups = self.cfg.GetNodeGroupList()
1540

    
1541
      # Verify global configuration
1542
      jobs.append([
1543
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1544
        ])
1545

    
1546
      # Always depend on global verification
1547
      depends_fn = lambda: [(-len(jobs), [])]
1548

    
1549
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1550
                                            ignore_errors=self.op.ignore_errors,
1551
                                            depends=depends_fn())]
1552
                for group in groups)
1553

    
1554
    # Fix up all parameters
1555
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1556
      op.debug_simulate_errors = self.op.debug_simulate_errors
1557
      op.verbose = self.op.verbose
1558
      op.error_codes = self.op.error_codes
1559
      try:
1560
        op.skip_checks = self.op.skip_checks
1561
      except AttributeError:
1562
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1563

    
1564
    return ResultWithJobs(jobs)
1565

    
1566

    
1567
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1568
  """Verifies the cluster config.
1569

1570
  """
1571
  REQ_BGL = True
1572

    
1573
  def _VerifyHVP(self, hvp_data):
1574
    """Verifies locally the syntax of the hypervisor parameters.
1575

1576
    """
1577
    for item, hv_name, hv_params in hvp_data:
1578
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1579
             (item, hv_name))
1580
      try:
1581
        hv_class = hypervisor.GetHypervisor(hv_name)
1582
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1583
        hv_class.CheckParameterSyntax(hv_params)
1584
      except errors.GenericError, err:
1585
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1586

    
1587
  def ExpandNames(self):
1588
    # Information can be safely retrieved as the BGL is acquired in exclusive
1589
    # mode
1590
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1591
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1592
    self.all_node_info = self.cfg.GetAllNodesInfo()
1593
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1594
    self.needed_locks = {}
1595

    
1596
  def Exec(self, feedback_fn):
1597
    """Verify integrity of cluster, performing various test on nodes.
1598

1599
    """
1600
    self.bad = False
1601
    self._feedback_fn = feedback_fn
1602

    
1603
    feedback_fn("* Verifying cluster config")
1604

    
1605
    for msg in self.cfg.VerifyConfig():
1606
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1607

    
1608
    feedback_fn("* Verifying cluster certificate files")
1609

    
1610
    for cert_filename in constants.ALL_CERT_FILES:
1611
      (errcode, msg) = _VerifyCertificate(cert_filename)
1612
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1613

    
1614
    feedback_fn("* Verifying hypervisor parameters")
1615

    
1616
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1617
                                                self.all_inst_info.values()))
1618

    
1619
    feedback_fn("* Verifying all nodes belong to an existing group")
1620

    
1621
    # We do this verification here because, should this bogus circumstance
1622
    # occur, it would never be caught by VerifyGroup, which only acts on
1623
    # nodes/instances reachable from existing node groups.
1624

    
1625
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1626
                         if node.group not in self.all_group_info)
1627

    
1628
    dangling_instances = {}
1629
    no_node_instances = []
1630

    
1631
    for inst in self.all_inst_info.values():
1632
      if inst.primary_node in dangling_nodes:
1633
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1634
      elif inst.primary_node not in self.all_node_info:
1635
        no_node_instances.append(inst.name)
1636

    
1637
    pretty_dangling = [
1638
        "%s (%s)" %
1639
        (node.name,
1640
         utils.CommaJoin(dangling_instances.get(node.name,
1641
                                                ["no instances"])))
1642
        for node in dangling_nodes]
1643

    
1644
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1645
                  None,
1646
                  "the following nodes (and their instances) belong to a non"
1647
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1648

    
1649
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1650
                  None,
1651
                  "the following instances have a non-existing primary-node:"
1652
                  " %s", utils.CommaJoin(no_node_instances))
1653

    
1654
    return not self.bad
1655

    
1656

    
1657
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1658
  """Verifies the status of a node group.
1659

1660
  """
1661
  HPATH = "cluster-verify"
1662
  HTYPE = constants.HTYPE_CLUSTER
1663
  REQ_BGL = False
1664

    
1665
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1666

    
1667
  class NodeImage(object):
1668
    """A class representing the logical and physical status of a node.
1669

1670
    @type name: string
1671
    @ivar name: the node name to which this object refers
1672
    @ivar volumes: a structure as returned from
1673
        L{ganeti.backend.GetVolumeList} (runtime)
1674
    @ivar instances: a list of running instances (runtime)
1675
    @ivar pinst: list of configured primary instances (config)
1676
    @ivar sinst: list of configured secondary instances (config)
1677
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1678
        instances for which this node is secondary (config)
1679
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1680
    @ivar dfree: free disk, as reported by the node (runtime)
1681
    @ivar offline: the offline status (config)
1682
    @type rpc_fail: boolean
1683
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1684
        not whether the individual keys were correct) (runtime)
1685
    @type lvm_fail: boolean
1686
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1687
    @type hyp_fail: boolean
1688
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1689
    @type ghost: boolean
1690
    @ivar ghost: whether this is a known node or not (config)
1691
    @type os_fail: boolean
1692
    @ivar os_fail: whether the RPC call didn't return valid OS data
1693
    @type oslist: list
1694
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1695
    @type vm_capable: boolean
1696
    @ivar vm_capable: whether the node can host instances
1697

1698
    """
1699
    def __init__(self, offline=False, name=None, vm_capable=True):
1700
      self.name = name
1701
      self.volumes = {}
1702
      self.instances = []
1703
      self.pinst = []
1704
      self.sinst = []
1705
      self.sbp = {}
1706
      self.mfree = 0
1707
      self.dfree = 0
1708
      self.offline = offline
1709
      self.vm_capable = vm_capable
1710
      self.rpc_fail = False
1711
      self.lvm_fail = False
1712
      self.hyp_fail = False
1713
      self.ghost = False
1714
      self.os_fail = False
1715
      self.oslist = {}
1716

    
1717
  def ExpandNames(self):
1718
    # This raises errors.OpPrereqError on its own:
1719
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1720

    
1721
    # Get instances in node group; this is unsafe and needs verification later
1722
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1723

    
1724
    self.needed_locks = {
1725
      locking.LEVEL_INSTANCE: inst_names,
1726
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1727
      locking.LEVEL_NODE: [],
1728
      }
1729

    
1730
    self.share_locks = _ShareAll()
1731

    
1732
  def DeclareLocks(self, level):
1733
    if level == locking.LEVEL_NODE:
1734
      # Get members of node group; this is unsafe and needs verification later
1735
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1736

    
1737
      all_inst_info = self.cfg.GetAllInstancesInfo()
1738

    
1739
      # In Exec(), we warn about mirrored instances that have primary and
1740
      # secondary living in separate node groups. To fully verify that
1741
      # volumes for these instances are healthy, we will need to do an
1742
      # extra call to their secondaries. We ensure here those nodes will
1743
      # be locked.
1744
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1745
        # Important: access only the instances whose lock is owned
1746
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1747
          nodes.update(all_inst_info[inst].secondary_nodes)
1748

    
1749
      self.needed_locks[locking.LEVEL_NODE] = nodes
1750

    
1751
  def CheckPrereq(self):
1752
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1753
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1754

    
1755
    group_nodes = set(self.group_info.members)
1756
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1757

    
1758
    unlocked_nodes = \
1759
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1760

    
1761
    unlocked_instances = \
1762
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1763

    
1764
    if unlocked_nodes:
1765
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1766
                                 utils.CommaJoin(unlocked_nodes))
1767

    
1768
    if unlocked_instances:
1769
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1770
                                 utils.CommaJoin(unlocked_instances))
1771

    
1772
    self.all_node_info = self.cfg.GetAllNodesInfo()
1773
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1774

    
1775
    self.my_node_names = utils.NiceSort(group_nodes)
1776
    self.my_inst_names = utils.NiceSort(group_instances)
1777

    
1778
    self.my_node_info = dict((name, self.all_node_info[name])
1779
                             for name in self.my_node_names)
1780

    
1781
    self.my_inst_info = dict((name, self.all_inst_info[name])
1782
                             for name in self.my_inst_names)
1783

    
1784
    # We detect here the nodes that will need the extra RPC calls for verifying
1785
    # split LV volumes; they should be locked.
1786
    extra_lv_nodes = set()
1787

    
1788
    for inst in self.my_inst_info.values():
1789
      if inst.disk_template in constants.DTS_INT_MIRROR:
1790
        group = self.my_node_info[inst.primary_node].group
1791
        for nname in inst.secondary_nodes:
1792
          if self.all_node_info[nname].group != group:
1793
            extra_lv_nodes.add(nname)
1794

    
1795
    unlocked_lv_nodes = \
1796
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1797

    
1798
    if unlocked_lv_nodes:
1799
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1800
                                 utils.CommaJoin(unlocked_lv_nodes))
1801
    self.extra_lv_nodes = list(extra_lv_nodes)
1802

    
1803
  def _VerifyNode(self, ninfo, nresult):
1804
    """Perform some basic validation on data returned from a node.
1805

1806
      - check the result data structure is well formed and has all the
1807
        mandatory fields
1808
      - check ganeti version
1809

1810
    @type ninfo: L{objects.Node}
1811
    @param ninfo: the node to check
1812
    @param nresult: the results from the node
1813
    @rtype: boolean
1814
    @return: whether overall this call was successful (and we can expect
1815
         reasonable values in the respose)
1816

1817
    """
1818
    node = ninfo.name
1819
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1820

    
1821
    # main result, nresult should be a non-empty dict
1822
    test = not nresult or not isinstance(nresult, dict)
1823
    _ErrorIf(test, constants.CV_ENODERPC, node,
1824
                  "unable to verify node: no data returned")
1825
    if test:
1826
      return False
1827

    
1828
    # compares ganeti version
1829
    local_version = constants.PROTOCOL_VERSION
1830
    remote_version = nresult.get("version", None)
1831
    test = not (remote_version and
1832
                isinstance(remote_version, (list, tuple)) and
1833
                len(remote_version) == 2)
1834
    _ErrorIf(test, constants.CV_ENODERPC, node,
1835
             "connection to node returned invalid data")
1836
    if test:
1837
      return False
1838

    
1839
    test = local_version != remote_version[0]
1840
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1841
             "incompatible protocol versions: master %s,"
1842
             " node %s", local_version, remote_version[0])
1843
    if test:
1844
      return False
1845

    
1846
    # node seems compatible, we can actually try to look into its results
1847

    
1848
    # full package version
1849
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1850
                  constants.CV_ENODEVERSION, node,
1851
                  "software version mismatch: master %s, node %s",
1852
                  constants.RELEASE_VERSION, remote_version[1],
1853
                  code=self.ETYPE_WARNING)
1854

    
1855
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1856
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1857
      for hv_name, hv_result in hyp_result.iteritems():
1858
        test = hv_result is not None
1859
        _ErrorIf(test, constants.CV_ENODEHV, node,
1860
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1861

    
1862
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1863
    if ninfo.vm_capable and isinstance(hvp_result, list):
1864
      for item, hv_name, hv_result in hvp_result:
1865
        _ErrorIf(True, constants.CV_ENODEHV, node,
1866
                 "hypervisor %s parameter verify failure (source %s): %s",
1867
                 hv_name, item, hv_result)
1868

    
1869
    test = nresult.get(constants.NV_NODESETUP,
1870
                       ["Missing NODESETUP results"])
1871
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1872
             "; ".join(test))
1873

    
1874
    return True
1875

    
1876
  def _VerifyNodeTime(self, ninfo, nresult,
1877
                      nvinfo_starttime, nvinfo_endtime):
1878
    """Check the node time.
1879

1880
    @type ninfo: L{objects.Node}
1881
    @param ninfo: the node to check
1882
    @param nresult: the remote results for the node
1883
    @param nvinfo_starttime: the start time of the RPC call
1884
    @param nvinfo_endtime: the end time of the RPC call
1885

1886
    """
1887
    node = ninfo.name
1888
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1889

    
1890
    ntime = nresult.get(constants.NV_TIME, None)
1891
    try:
1892
      ntime_merged = utils.MergeTime(ntime)
1893
    except (ValueError, TypeError):
1894
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1895
      return
1896

    
1897
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1898
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1899
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1900
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1901
    else:
1902
      ntime_diff = None
1903

    
1904
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1905
             "Node time diverges by at least %s from master node time",
1906
             ntime_diff)
1907

    
1908
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1909
    """Check the node LVM results.
1910

1911
    @type ninfo: L{objects.Node}
1912
    @param ninfo: the node to check
1913
    @param nresult: the remote results for the node
1914
    @param vg_name: the configured VG name
1915

1916
    """
1917
    if vg_name is None:
1918
      return
1919

    
1920
    node = ninfo.name
1921
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1922

    
1923
    # checks vg existence and size > 20G
1924
    vglist = nresult.get(constants.NV_VGLIST, None)
1925
    test = not vglist
1926
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1927
    if not test:
1928
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1929
                                            constants.MIN_VG_SIZE)
1930
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1931

    
1932
    # check pv names
1933
    pvlist = nresult.get(constants.NV_PVLIST, None)
1934
    test = pvlist is None
1935
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1936
    if not test:
1937
      # check that ':' is not present in PV names, since it's a
1938
      # special character for lvcreate (denotes the range of PEs to
1939
      # use on the PV)
1940
      for _, pvname, owner_vg in pvlist:
1941
        test = ":" in pvname
1942
        _ErrorIf(test, constants.CV_ENODELVM, node,
1943
                 "Invalid character ':' in PV '%s' of VG '%s'",
1944
                 pvname, owner_vg)
1945

    
1946
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1947
    """Check the node bridges.
1948

1949
    @type ninfo: L{objects.Node}
1950
    @param ninfo: the node to check
1951
    @param nresult: the remote results for the node
1952
    @param bridges: the expected list of bridges
1953

1954
    """
1955
    if not bridges:
1956
      return
1957

    
1958
    node = ninfo.name
1959
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1960

    
1961
    missing = nresult.get(constants.NV_BRIDGES, None)
1962
    test = not isinstance(missing, list)
1963
    _ErrorIf(test, constants.CV_ENODENET, node,
1964
             "did not return valid bridge information")
1965
    if not test:
1966
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1967
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1968

    
1969
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1970
    """Check the results of user scripts presence and executability on the node
1971

1972
    @type ninfo: L{objects.Node}
1973
    @param ninfo: the node to check
1974
    @param nresult: the remote results for the node
1975

1976
    """
1977
    node = ninfo.name
1978

    
1979
    test = not constants.NV_USERSCRIPTS in nresult
1980
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1981
                  "did not return user scripts information")
1982

    
1983
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1984
    if not test:
1985
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1986
                    "user scripts not present or not executable: %s" %
1987
                    utils.CommaJoin(sorted(broken_scripts)))
1988

    
1989
  def _VerifyNodeNetwork(self, ninfo, nresult):
1990
    """Check the node network connectivity results.
1991

1992
    @type ninfo: L{objects.Node}
1993
    @param ninfo: the node to check
1994
    @param nresult: the remote results for the node
1995

1996
    """
1997
    node = ninfo.name
1998
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1999

    
2000
    test = constants.NV_NODELIST not in nresult
2001
    _ErrorIf(test, constants.CV_ENODESSH, node,
2002
             "node hasn't returned node ssh connectivity data")
2003
    if not test:
2004
      if nresult[constants.NV_NODELIST]:
2005
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2006
          _ErrorIf(True, constants.CV_ENODESSH, node,
2007
                   "ssh communication with node '%s': %s", a_node, a_msg)
2008

    
2009
    test = constants.NV_NODENETTEST not in nresult
2010
    _ErrorIf(test, constants.CV_ENODENET, node,
2011
             "node hasn't returned node tcp connectivity data")
2012
    if not test:
2013
      if nresult[constants.NV_NODENETTEST]:
2014
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2015
        for anode in nlist:
2016
          _ErrorIf(True, constants.CV_ENODENET, node,
2017
                   "tcp communication with node '%s': %s",
2018
                   anode, nresult[constants.NV_NODENETTEST][anode])
2019

    
2020
    test = constants.NV_MASTERIP not in nresult
2021
    _ErrorIf(test, constants.CV_ENODENET, node,
2022
             "node hasn't returned node master IP reachability data")
2023
    if not test:
2024
      if not nresult[constants.NV_MASTERIP]:
2025
        if node == self.master_node:
2026
          msg = "the master node cannot reach the master IP (not configured?)"
2027
        else:
2028
          msg = "cannot reach the master IP"
2029
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2030

    
2031
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2032
                      diskstatus):
2033
    """Verify an instance.
2034

2035
    This function checks to see if the required block devices are
2036
    available on the instance's node.
2037

2038
    """
2039
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2040
    node_current = instanceconfig.primary_node
2041

    
2042
    node_vol_should = {}
2043
    instanceconfig.MapLVsByNode(node_vol_should)
2044

    
2045
    for node in node_vol_should:
2046
      n_img = node_image[node]
2047
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2048
        # ignore missing volumes on offline or broken nodes
2049
        continue
2050
      for volume in node_vol_should[node]:
2051
        test = volume not in n_img.volumes
2052
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2053
                 "volume %s missing on node %s", volume, node)
2054

    
2055
    if instanceconfig.admin_state == constants.ADMINST_UP:
2056
      pri_img = node_image[node_current]
2057
      test = instance not in pri_img.instances and not pri_img.offline
2058
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2059
               "instance not running on its primary node %s",
2060
               node_current)
2061

    
2062
    diskdata = [(nname, success, status, idx)
2063
                for (nname, disks) in diskstatus.items()
2064
                for idx, (success, status) in enumerate(disks)]
2065

    
2066
    for nname, success, bdev_status, idx in diskdata:
2067
      # the 'ghost node' construction in Exec() ensures that we have a
2068
      # node here
2069
      snode = node_image[nname]
2070
      bad_snode = snode.ghost or snode.offline
2071
      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2072
               not success and not bad_snode,
2073
               constants.CV_EINSTANCEFAULTYDISK, instance,
2074
               "couldn't retrieve status for disk/%s on %s: %s",
2075
               idx, nname, bdev_status)
2076
      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2077
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2078
               constants.CV_EINSTANCEFAULTYDISK, instance,
2079
               "disk/%s on %s is faulty", idx, nname)
2080

    
2081
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2082
    """Verify if there are any unknown volumes in the cluster.
2083

2084
    The .os, .swap and backup volumes are ignored. All other volumes are
2085
    reported as unknown.
2086

2087
    @type reserved: L{ganeti.utils.FieldSet}
2088
    @param reserved: a FieldSet of reserved volume names
2089

2090
    """
2091
    for node, n_img in node_image.items():
2092
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2093
        # skip non-healthy nodes
2094
        continue
2095
      for volume in n_img.volumes:
2096
        test = ((node not in node_vol_should or
2097
                volume not in node_vol_should[node]) and
2098
                not reserved.Matches(volume))
2099
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2100
                      "volume %s is unknown", volume)
2101

    
2102
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2103
    """Verify N+1 Memory Resilience.
2104

2105
    Check that if one single node dies we can still start all the
2106
    instances it was primary for.
2107

2108
    """
2109
    cluster_info = self.cfg.GetClusterInfo()
2110
    for node, n_img in node_image.items():
2111
      # This code checks that every node which is now listed as
2112
      # secondary has enough memory to host all instances it is
2113
      # supposed to should a single other node in the cluster fail.
2114
      # FIXME: not ready for failover to an arbitrary node
2115
      # FIXME: does not support file-backed instances
2116
      # WARNING: we currently take into account down instances as well
2117
      # as up ones, considering that even if they're down someone
2118
      # might want to start them even in the event of a node failure.
2119
      if n_img.offline:
2120
        # we're skipping offline nodes from the N+1 warning, since
2121
        # most likely we don't have good memory infromation from them;
2122
        # we already list instances living on such nodes, and that's
2123
        # enough warning
2124
        continue
2125
      for prinode, instances in n_img.sbp.items():
2126
        needed_mem = 0
2127
        for instance in instances:
2128
          bep = cluster_info.FillBE(instance_cfg[instance])
2129
          if bep[constants.BE_AUTO_BALANCE]:
2130
            needed_mem += bep[constants.BE_MEMORY]
2131
        test = n_img.mfree < needed_mem
2132
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2133
                      "not enough memory to accomodate instance failovers"
2134
                      " should node %s fail (%dMiB needed, %dMiB available)",
2135
                      prinode, needed_mem, n_img.mfree)
2136

    
2137
  @classmethod
2138
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2139
                   (files_all, files_opt, files_mc, files_vm)):
2140
    """Verifies file checksums collected from all nodes.
2141

2142
    @param errorif: Callback for reporting errors
2143
    @param nodeinfo: List of L{objects.Node} objects
2144
    @param master_node: Name of master node
2145
    @param all_nvinfo: RPC results
2146

2147
    """
2148
    # Define functions determining which nodes to consider for a file
2149
    files2nodefn = [
2150
      (files_all, None),
2151
      (files_mc, lambda node: (node.master_candidate or
2152
                               node.name == master_node)),
2153
      (files_vm, lambda node: node.vm_capable),
2154
      ]
2155

    
2156
    # Build mapping from filename to list of nodes which should have the file
2157
    nodefiles = {}
2158
    for (files, fn) in files2nodefn:
2159
      if fn is None:
2160
        filenodes = nodeinfo
2161
      else:
2162
        filenodes = filter(fn, nodeinfo)
2163
      nodefiles.update((filename,
2164
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2165
                       for filename in files)
2166

    
2167
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2168

    
2169
    fileinfo = dict((filename, {}) for filename in nodefiles)
2170
    ignore_nodes = set()
2171

    
2172
    for node in nodeinfo:
2173
      if node.offline:
2174
        ignore_nodes.add(node.name)
2175
        continue
2176

    
2177
      nresult = all_nvinfo[node.name]
2178

    
2179
      if nresult.fail_msg or not nresult.payload:
2180
        node_files = None
2181
      else:
2182
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2183

    
2184
      test = not (node_files and isinstance(node_files, dict))
2185
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2186
              "Node did not return file checksum data")
2187
      if test:
2188
        ignore_nodes.add(node.name)
2189
        continue
2190

    
2191
      # Build per-checksum mapping from filename to nodes having it
2192
      for (filename, checksum) in node_files.items():
2193
        assert filename in nodefiles
2194
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2195

    
2196
    for (filename, checksums) in fileinfo.items():
2197
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2198

    
2199
      # Nodes having the file
2200
      with_file = frozenset(node_name
2201
                            for nodes in fileinfo[filename].values()
2202
                            for node_name in nodes) - ignore_nodes
2203

    
2204
      expected_nodes = nodefiles[filename] - ignore_nodes
2205

    
2206
      # Nodes missing file
2207
      missing_file = expected_nodes - with_file
2208

    
2209
      if filename in files_opt:
2210
        # All or no nodes
2211
        errorif(missing_file and missing_file != expected_nodes,
2212
                constants.CV_ECLUSTERFILECHECK, None,
2213
                "File %s is optional, but it must exist on all or no"
2214
                " nodes (not found on %s)",
2215
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2216
      else:
2217
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2218
                "File %s is missing from node(s) %s", filename,
2219
                utils.CommaJoin(utils.NiceSort(missing_file)))
2220

    
2221
        # Warn if a node has a file it shouldn't
2222
        unexpected = with_file - expected_nodes
2223
        errorif(unexpected,
2224
                constants.CV_ECLUSTERFILECHECK, None,
2225
                "File %s should not exist on node(s) %s",
2226
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2227

    
2228
      # See if there are multiple versions of the file
2229
      test = len(checksums) > 1
2230
      if test:
2231
        variants = ["variant %s on %s" %
2232
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2233
                    for (idx, (checksum, nodes)) in
2234
                      enumerate(sorted(checksums.items()))]
2235
      else:
2236
        variants = []
2237

    
2238
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2239
              "File %s found with %s different checksums (%s)",
2240
              filename, len(checksums), "; ".join(variants))
2241

    
2242
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2243
                      drbd_map):
2244
    """Verifies and the node DRBD status.
2245

2246
    @type ninfo: L{objects.Node}
2247
    @param ninfo: the node to check
2248
    @param nresult: the remote results for the node
2249
    @param instanceinfo: the dict of instances
2250
    @param drbd_helper: the configured DRBD usermode helper
2251
    @param drbd_map: the DRBD map as returned by
2252
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2253

2254
    """
2255
    node = ninfo.name
2256
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2257

    
2258
    if drbd_helper:
2259
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2260
      test = (helper_result == None)
2261
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2262
               "no drbd usermode helper returned")
2263
      if helper_result:
2264
        status, payload = helper_result
2265
        test = not status
2266
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2267
                 "drbd usermode helper check unsuccessful: %s", payload)
2268
        test = status and (payload != drbd_helper)
2269
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2270
                 "wrong drbd usermode helper: %s", payload)
2271

    
2272
    # compute the DRBD minors
2273
    node_drbd = {}
2274
    for minor, instance in drbd_map[node].items():
2275
      test = instance not in instanceinfo
2276
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2277
               "ghost instance '%s' in temporary DRBD map", instance)
2278
        # ghost instance should not be running, but otherwise we
2279
        # don't give double warnings (both ghost instance and
2280
        # unallocated minor in use)
2281
      if test:
2282
        node_drbd[minor] = (instance, False)
2283
      else:
2284
        instance = instanceinfo[instance]
2285
        node_drbd[minor] = (instance.name,
2286
                            instance.admin_state == constants.ADMINST_UP)
2287

    
2288
    # and now check them
2289
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2290
    test = not isinstance(used_minors, (tuple, list))
2291
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2292
             "cannot parse drbd status file: %s", str(used_minors))
2293
    if test:
2294
      # we cannot check drbd status
2295
      return
2296

    
2297
    for minor, (iname, must_exist) in node_drbd.items():
2298
      test = minor not in used_minors and must_exist
2299
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2300
               "drbd minor %d of instance %s is not active", minor, iname)
2301
    for minor in used_minors:
2302
      test = minor not in node_drbd
2303
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2304
               "unallocated drbd minor %d is in use", minor)
2305

    
2306
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2307
    """Builds the node OS structures.
2308

2309
    @type ninfo: L{objects.Node}
2310
    @param ninfo: the node to check
2311
    @param nresult: the remote results for the node
2312
    @param nimg: the node image object
2313

2314
    """
2315
    node = ninfo.name
2316
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2317

    
2318
    remote_os = nresult.get(constants.NV_OSLIST, None)
2319
    test = (not isinstance(remote_os, list) or
2320
            not compat.all(isinstance(v, list) and len(v) == 7
2321
                           for v in remote_os))
2322

    
2323
    _ErrorIf(test, constants.CV_ENODEOS, node,
2324
             "node hasn't returned valid OS data")
2325

    
2326
    nimg.os_fail = test
2327

    
2328
    if test:
2329
      return
2330

    
2331
    os_dict = {}
2332

    
2333
    for (name, os_path, status, diagnose,
2334
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2335

    
2336
      if name not in os_dict:
2337
        os_dict[name] = []
2338

    
2339
      # parameters is a list of lists instead of list of tuples due to
2340
      # JSON lacking a real tuple type, fix it:
2341
      parameters = [tuple(v) for v in parameters]
2342
      os_dict[name].append((os_path, status, diagnose,
2343
                            set(variants), set(parameters), set(api_ver)))
2344

    
2345
    nimg.oslist = os_dict
2346

    
2347
  def _VerifyNodeOS(self, ninfo, nimg, base):
2348
    """Verifies the node OS list.
2349

2350
    @type ninfo: L{objects.Node}
2351
    @param ninfo: the node to check
2352
    @param nimg: the node image object
2353
    @param base: the 'template' node we match against (e.g. from the master)
2354

2355
    """
2356
    node = ninfo.name
2357
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2358

    
2359
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2360

    
2361
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2362
    for os_name, os_data in nimg.oslist.items():
2363
      assert os_data, "Empty OS status for OS %s?!" % os_name
2364
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2365
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2366
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2367
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2368
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2369
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2370
      # comparisons with the 'base' image
2371
      test = os_name not in base.oslist
2372
      _ErrorIf(test, constants.CV_ENODEOS, node,
2373
               "Extra OS %s not present on reference node (%s)",
2374
               os_name, base.name)
2375
      if test:
2376
        continue
2377
      assert base.oslist[os_name], "Base node has empty OS status?"
2378
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2379
      if not b_status:
2380
        # base OS is invalid, skipping
2381
        continue
2382
      for kind, a, b in [("API version", f_api, b_api),
2383
                         ("variants list", f_var, b_var),
2384
                         ("parameters", beautify_params(f_param),
2385
                          beautify_params(b_param))]:
2386
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2387
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2388
                 kind, os_name, base.name,
2389
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2390

    
2391
    # check any missing OSes
2392
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2393
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2394
             "OSes present on reference node %s but missing on this node: %s",
2395
             base.name, utils.CommaJoin(missing))
2396

    
2397
  def _VerifyOob(self, ninfo, nresult):
2398
    """Verifies out of band functionality of a node.
2399

2400
    @type ninfo: L{objects.Node}
2401
    @param ninfo: the node to check
2402
    @param nresult: the remote results for the node
2403

2404
    """
2405
    node = ninfo.name
2406
    # We just have to verify the paths on master and/or master candidates
2407
    # as the oob helper is invoked on the master
2408
    if ((ninfo.master_candidate or ninfo.master_capable) and
2409
        constants.NV_OOB_PATHS in nresult):
2410
      for path_result in nresult[constants.NV_OOB_PATHS]:
2411
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2412

    
2413
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2414
    """Verifies and updates the node volume data.
2415

2416
    This function will update a L{NodeImage}'s internal structures
2417
    with data from the remote call.
2418

2419
    @type ninfo: L{objects.Node}
2420
    @param ninfo: the node to check
2421
    @param nresult: the remote results for the node
2422
    @param nimg: the node image object
2423
    @param vg_name: the configured VG name
2424

2425
    """
2426
    node = ninfo.name
2427
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2428

    
2429
    nimg.lvm_fail = True
2430
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2431
    if vg_name is None:
2432
      pass
2433
    elif isinstance(lvdata, basestring):
2434
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2435
               utils.SafeEncode(lvdata))
2436
    elif not isinstance(lvdata, dict):
2437
      _ErrorIf(True, constants.CV_ENODELVM, node,
2438
               "rpc call to node failed (lvlist)")
2439
    else:
2440
      nimg.volumes = lvdata
2441
      nimg.lvm_fail = False
2442

    
2443
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2444
    """Verifies and updates the node instance list.
2445

2446
    If the listing was successful, then updates this node's instance
2447
    list. Otherwise, it marks the RPC call as failed for the instance
2448
    list key.
2449

2450
    @type ninfo: L{objects.Node}
2451
    @param ninfo: the node to check
2452
    @param nresult: the remote results for the node
2453
    @param nimg: the node image object
2454

2455
    """
2456
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2457
    test = not isinstance(idata, list)
2458
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2459
                  "rpc call to node failed (instancelist): %s",
2460
                  utils.SafeEncode(str(idata)))
2461
    if test:
2462
      nimg.hyp_fail = True
2463
    else:
2464
      nimg.instances = idata
2465

    
2466
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2467
    """Verifies and computes a node information map
2468

2469
    @type ninfo: L{objects.Node}
2470
    @param ninfo: the node to check
2471
    @param nresult: the remote results for the node
2472
    @param nimg: the node image object
2473
    @param vg_name: the configured VG name
2474

2475
    """
2476
    node = ninfo.name
2477
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2478

    
2479
    # try to read free memory (from the hypervisor)
2480
    hv_info = nresult.get(constants.NV_HVINFO, None)
2481
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2482
    _ErrorIf(test, constants.CV_ENODEHV, node,
2483
             "rpc call to node failed (hvinfo)")
2484
    if not test:
2485
      try:
2486
        nimg.mfree = int(hv_info["memory_free"])
2487
      except (ValueError, TypeError):
2488
        _ErrorIf(True, constants.CV_ENODERPC, node,
2489
                 "node returned invalid nodeinfo, check hypervisor")
2490

    
2491
    # FIXME: devise a free space model for file based instances as well
2492
    if vg_name is not None:
2493
      test = (constants.NV_VGLIST not in nresult or
2494
              vg_name not in nresult[constants.NV_VGLIST])
2495
      _ErrorIf(test, constants.CV_ENODELVM, node,
2496
               "node didn't return data for the volume group '%s'"
2497
               " - it is either missing or broken", vg_name)
2498
      if not test:
2499
        try:
2500
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2501
        except (ValueError, TypeError):
2502
          _ErrorIf(True, constants.CV_ENODERPC, node,
2503
                   "node returned invalid LVM info, check LVM status")
2504

    
2505
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2506
    """Gets per-disk status information for all instances.
2507

2508
    @type nodelist: list of strings
2509
    @param nodelist: Node names
2510
    @type node_image: dict of (name, L{objects.Node})
2511
    @param node_image: Node objects
2512
    @type instanceinfo: dict of (name, L{objects.Instance})
2513
    @param instanceinfo: Instance objects
2514
    @rtype: {instance: {node: [(succes, payload)]}}
2515
    @return: a dictionary of per-instance dictionaries with nodes as
2516
        keys and disk information as values; the disk information is a
2517
        list of tuples (success, payload)
2518

2519
    """
2520
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2521

    
2522
    node_disks = {}
2523
    node_disks_devonly = {}
2524
    diskless_instances = set()
2525
    diskless = constants.DT_DISKLESS
2526

    
2527
    for nname in nodelist:
2528
      node_instances = list(itertools.chain(node_image[nname].pinst,
2529
                                            node_image[nname].sinst))
2530
      diskless_instances.update(inst for inst in node_instances
2531
                                if instanceinfo[inst].disk_template == diskless)
2532
      disks = [(inst, disk)
2533
               for inst in node_instances
2534
               for disk in instanceinfo[inst].disks]
2535

    
2536
      if not disks:
2537
        # No need to collect data
2538
        continue
2539

    
2540
      node_disks[nname] = disks
2541

    
2542
      # Creating copies as SetDiskID below will modify the objects and that can
2543
      # lead to incorrect data returned from nodes
2544
      devonly = [dev.Copy() for (_, dev) in disks]
2545

    
2546
      for dev in devonly:
2547
        self.cfg.SetDiskID(dev, nname)
2548

    
2549
      node_disks_devonly[nname] = devonly
2550

    
2551
    assert len(node_disks) == len(node_disks_devonly)
2552

    
2553
    # Collect data from all nodes with disks
2554
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2555
                                                          node_disks_devonly)
2556

    
2557
    assert len(result) == len(node_disks)
2558

    
2559
    instdisk = {}
2560

    
2561
    for (nname, nres) in result.items():
2562
      disks = node_disks[nname]
2563

    
2564
      if nres.offline:
2565
        # No data from this node
2566
        data = len(disks) * [(False, "node offline")]
2567
      else:
2568
        msg = nres.fail_msg
2569
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2570
                 "while getting disk information: %s", msg)
2571
        if msg:
2572
          # No data from this node
2573
          data = len(disks) * [(False, msg)]
2574
        else:
2575
          data = []
2576
          for idx, i in enumerate(nres.payload):
2577
            if isinstance(i, (tuple, list)) and len(i) == 2:
2578
              data.append(i)
2579
            else:
2580
              logging.warning("Invalid result from node %s, entry %d: %s",
2581
                              nname, idx, i)
2582
              data.append((False, "Invalid result from the remote node"))
2583

    
2584
      for ((inst, _), status) in zip(disks, data):
2585
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2586

    
2587
    # Add empty entries for diskless instances.
2588
    for inst in diskless_instances:
2589
      assert inst not in instdisk
2590
      instdisk[inst] = {}
2591

    
2592
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2593
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2594
                      compat.all(isinstance(s, (tuple, list)) and
2595
                                 len(s) == 2 for s in statuses)
2596
                      for inst, nnames in instdisk.items()
2597
                      for nname, statuses in nnames.items())
2598
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2599

    
2600
    return instdisk
2601

    
2602
  @staticmethod
2603
  def _SshNodeSelector(group_uuid, all_nodes):
2604
    """Create endless iterators for all potential SSH check hosts.
2605

2606
    """
2607
    nodes = [node for node in all_nodes
2608
             if (node.group != group_uuid and
2609
                 not node.offline)]
2610
    keyfunc = operator.attrgetter("group")
2611

    
2612
    return map(itertools.cycle,
2613
               [sorted(map(operator.attrgetter("name"), names))
2614
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2615
                                                  keyfunc)])
2616

    
2617
  @classmethod
2618
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2619
    """Choose which nodes should talk to which other nodes.
2620

2621
    We will make nodes contact all nodes in their group, and one node from
2622
    every other group.
2623

2624
    @warning: This algorithm has a known issue if one node group is much
2625
      smaller than others (e.g. just one node). In such a case all other
2626
      nodes will talk to the single node.
2627

2628
    """
2629
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2630
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2631

    
2632
    return (online_nodes,
2633
            dict((name, sorted([i.next() for i in sel]))
2634
                 for name in online_nodes))
2635

    
2636
  def BuildHooksEnv(self):
2637
    """Build hooks env.
2638

2639
    Cluster-Verify hooks just ran in the post phase and their failure makes
2640
    the output be logged in the verify output and the verification to fail.
2641

2642
    """
2643
    env = {
2644
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2645
      }
2646

    
2647
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2648
               for node in self.my_node_info.values())
2649

    
2650
    return env
2651

    
2652
  def BuildHooksNodes(self):
2653
    """Build hooks nodes.
2654

2655
    """
2656
    return ([], self.my_node_names)
2657

    
2658
  def Exec(self, feedback_fn):
2659
    """Verify integrity of the node group, performing various test on nodes.
2660

2661
    """
2662
    # This method has too many local variables. pylint: disable=R0914
2663
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2664

    
2665
    if not self.my_node_names:
2666
      # empty node group
2667
      feedback_fn("* Empty node group, skipping verification")
2668
      return True
2669

    
2670
    self.bad = False
2671
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2672
    verbose = self.op.verbose
2673
    self._feedback_fn = feedback_fn
2674

    
2675
    vg_name = self.cfg.GetVGName()
2676
    drbd_helper = self.cfg.GetDRBDHelper()
2677
    cluster = self.cfg.GetClusterInfo()
2678
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2679
    hypervisors = cluster.enabled_hypervisors
2680
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2681

    
2682
    i_non_redundant = [] # Non redundant instances
2683
    i_non_a_balanced = [] # Non auto-balanced instances
2684
    i_offline = 0 # Count of offline instances
2685
    n_offline = 0 # Count of offline nodes
2686
    n_drained = 0 # Count of nodes being drained
2687
    node_vol_should = {}
2688

    
2689
    # FIXME: verify OS list
2690

    
2691
    # File verification
2692
    filemap = _ComputeAncillaryFiles(cluster, False)
2693

    
2694
    # do local checksums
2695
    master_node = self.master_node = self.cfg.GetMasterNode()
2696
    master_ip = self.cfg.GetMasterIP()
2697

    
2698
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2699

    
2700
    user_scripts = []
2701
    if self.cfg.GetUseExternalMipScript():
2702
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2703

    
2704
    node_verify_param = {
2705
      constants.NV_FILELIST:
2706
        utils.UniqueSequence(filename
2707
                             for files in filemap
2708
                             for filename in files),
2709
      constants.NV_NODELIST:
2710
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2711
                                  self.all_node_info.values()),
2712
      constants.NV_HYPERVISOR: hypervisors,
2713
      constants.NV_HVPARAMS:
2714
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2715
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2716
                                 for node in node_data_list
2717
                                 if not node.offline],
2718
      constants.NV_INSTANCELIST: hypervisors,
2719
      constants.NV_VERSION: None,
2720
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2721
      constants.NV_NODESETUP: None,
2722
      constants.NV_TIME: None,
2723
      constants.NV_MASTERIP: (master_node, master_ip),
2724
      constants.NV_OSLIST: None,
2725
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2726
      constants.NV_USERSCRIPTS: user_scripts,
2727
      }
2728

    
2729
    if vg_name is not None:
2730
      node_verify_param[constants.NV_VGLIST] = None
2731
      node_verify_param[constants.NV_LVLIST] = vg_name
2732
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2733
      node_verify_param[constants.NV_DRBDLIST] = None
2734

    
2735
    if drbd_helper:
2736
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2737

    
2738
    # bridge checks
2739
    # FIXME: this needs to be changed per node-group, not cluster-wide
2740
    bridges = set()
2741
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2742
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2743
      bridges.add(default_nicpp[constants.NIC_LINK])
2744
    for instance in self.my_inst_info.values():
2745
      for nic in instance.nics:
2746
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2747
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2748
          bridges.add(full_nic[constants.NIC_LINK])
2749

    
2750
    if bridges:
2751
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2752

    
2753
    # Build our expected cluster state
2754
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2755
                                                 name=node.name,
2756
                                                 vm_capable=node.vm_capable))
2757
                      for node in node_data_list)
2758

    
2759
    # Gather OOB paths
2760
    oob_paths = []
2761
    for node in self.all_node_info.values():
2762
      path = _SupportsOob(self.cfg, node)
2763
      if path and path not in oob_paths:
2764
        oob_paths.append(path)
2765

    
2766
    if oob_paths:
2767
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2768

    
2769
    for instance in self.my_inst_names:
2770
      inst_config = self.my_inst_info[instance]
2771

    
2772
      for nname in inst_config.all_nodes:
2773
        if nname not in node_image:
2774
          gnode = self.NodeImage(name=nname)
2775
          gnode.ghost = (nname not in self.all_node_info)
2776
          node_image[nname] = gnode
2777

    
2778
      inst_config.MapLVsByNode(node_vol_should)
2779

    
2780
      pnode = inst_config.primary_node
2781
      node_image[pnode].pinst.append(instance)
2782

    
2783
      for snode in inst_config.secondary_nodes:
2784
        nimg = node_image[snode]
2785
        nimg.sinst.append(instance)
2786
        if pnode not in nimg.sbp:
2787
          nimg.sbp[pnode] = []
2788
        nimg.sbp[pnode].append(instance)
2789

    
2790
    # At this point, we have the in-memory data structures complete,
2791
    # except for the runtime information, which we'll gather next
2792

    
2793
    # Due to the way our RPC system works, exact response times cannot be
2794
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2795
    # time before and after executing the request, we can at least have a time
2796
    # window.
2797
    nvinfo_starttime = time.time()
2798
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2799
                                           node_verify_param,
2800
                                           self.cfg.GetClusterName())
2801
    nvinfo_endtime = time.time()
2802

    
2803
    if self.extra_lv_nodes and vg_name is not None:
2804
      extra_lv_nvinfo = \
2805
          self.rpc.call_node_verify(self.extra_lv_nodes,
2806
                                    {constants.NV_LVLIST: vg_name},
2807
                                    self.cfg.GetClusterName())
2808
    else:
2809
      extra_lv_nvinfo = {}
2810

    
2811
    all_drbd_map = self.cfg.ComputeDRBDMap()
2812

    
2813
    feedback_fn("* Gathering disk information (%s nodes)" %
2814
                len(self.my_node_names))
2815
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2816
                                     self.my_inst_info)
2817

    
2818
    feedback_fn("* Verifying configuration file consistency")
2819

    
2820
    # If not all nodes are being checked, we need to make sure the master node
2821
    # and a non-checked vm_capable node are in the list.
2822
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2823
    if absent_nodes:
2824
      vf_nvinfo = all_nvinfo.copy()
2825
      vf_node_info = list(self.my_node_info.values())
2826
      additional_nodes = []
2827
      if master_node not in self.my_node_info:
2828
        additional_nodes.append(master_node)
2829
        vf_node_info.append(self.all_node_info[master_node])
2830
      # Add the first vm_capable node we find which is not included
2831
      for node in absent_nodes:
2832
        nodeinfo = self.all_node_info[node]
2833
        if nodeinfo.vm_capable and not nodeinfo.offline:
2834
          additional_nodes.append(node)
2835
          vf_node_info.append(self.all_node_info[node])
2836
          break
2837
      key = constants.NV_FILELIST
2838
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2839
                                                 {key: node_verify_param[key]},
2840
                                                 self.cfg.GetClusterName()))
2841
    else:
2842
      vf_nvinfo = all_nvinfo
2843
      vf_node_info = self.my_node_info.values()
2844

    
2845
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2846

    
2847
    feedback_fn("* Verifying node status")
2848

    
2849
    refos_img = None
2850

    
2851
    for node_i in node_data_list:
2852
      node = node_i.name
2853
      nimg = node_image[node]
2854

    
2855
      if node_i.offline:
2856
        if verbose:
2857
          feedback_fn("* Skipping offline node %s" % (node,))
2858
        n_offline += 1
2859
        continue
2860

    
2861
      if node == master_node:
2862
        ntype = "master"
2863
      elif node_i.master_candidate:
2864
        ntype = "master candidate"
2865
      elif node_i.drained:
2866
        ntype = "drained"
2867
        n_drained += 1
2868
      else:
2869
        ntype = "regular"
2870
      if verbose:
2871
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2872

    
2873
      msg = all_nvinfo[node].fail_msg
2874
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2875
               msg)
2876
      if msg:
2877
        nimg.rpc_fail = True
2878
        continue
2879

    
2880
      nresult = all_nvinfo[node].payload
2881

    
2882
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2883
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2884
      self._VerifyNodeNetwork(node_i, nresult)
2885
      self._VerifyNodeUserScripts(node_i, nresult)
2886
      self._VerifyOob(node_i, nresult)
2887

    
2888
      if nimg.vm_capable:
2889
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2890
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2891
                             all_drbd_map)
2892

    
2893
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2894
        self._UpdateNodeInstances(node_i, nresult, nimg)
2895
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2896
        self._UpdateNodeOS(node_i, nresult, nimg)
2897

    
2898
        if not nimg.os_fail:
2899
          if refos_img is None:
2900
            refos_img = nimg
2901
          self._VerifyNodeOS(node_i, nimg, refos_img)
2902
        self._VerifyNodeBridges(node_i, nresult, bridges)
2903

    
2904
        # Check whether all running instancies are primary for the node. (This
2905
        # can no longer be done from _VerifyInstance below, since some of the
2906
        # wrong instances could be from other node groups.)
2907
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2908

    
2909
        for inst in non_primary_inst:
2910
          # FIXME: investigate best way to handle offline insts
2911
          if inst.admin_state == constants.ADMINST_OFFLINE:
2912
            if verbose:
2913
              feedback_fn("* Skipping offline instance %s" % inst.name)
2914
            i_offline += 1
2915
            continue
2916
          test = inst in self.all_inst_info
2917
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2918
                   "instance should not run on node %s", node_i.name)
2919
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2920
                   "node is running unknown instance %s", inst)
2921

    
2922
    for node, result in extra_lv_nvinfo.items():
2923
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2924
                              node_image[node], vg_name)
2925

    
2926
    feedback_fn("* Verifying instance status")
2927
    for instance in self.my_inst_names:
2928
      if verbose:
2929
        feedback_fn("* Verifying instance %s" % instance)
2930
      inst_config = self.my_inst_info[instance]
2931
      self._VerifyInstance(instance, inst_config, node_image,
2932
                           instdisk[instance])
2933
      inst_nodes_offline = []
2934

    
2935
      pnode = inst_config.primary_node
2936
      pnode_img = node_image[pnode]
2937
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2938
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2939
               " primary node failed", instance)
2940

    
2941
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2942
               pnode_img.offline,
2943
               constants.CV_EINSTANCEBADNODE, instance,
2944
               "instance is marked as running and lives on offline node %s",
2945
               inst_config.primary_node)
2946

    
2947
      # If the instance is non-redundant we cannot survive losing its primary
2948
      # node, so we are not N+1 compliant. On the other hand we have no disk
2949
      # templates with more than one secondary so that situation is not well
2950
      # supported either.
2951
      # FIXME: does not support file-backed instances
2952
      if not inst_config.secondary_nodes:
2953
        i_non_redundant.append(instance)
2954

    
2955
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2956
               constants.CV_EINSTANCELAYOUT,
2957
               instance, "instance has multiple secondary nodes: %s",
2958
               utils.CommaJoin(inst_config.secondary_nodes),
2959
               code=self.ETYPE_WARNING)
2960

    
2961
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2962
        pnode = inst_config.primary_node
2963
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2964
        instance_groups = {}
2965

    
2966
        for node in instance_nodes:
2967
          instance_groups.setdefault(self.all_node_info[node].group,
2968
                                     []).append(node)
2969

    
2970
        pretty_list = [
2971
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2972
          # Sort so that we always list the primary node first.
2973
          for group, nodes in sorted(instance_groups.items(),
2974
                                     key=lambda (_, nodes): pnode in nodes,
2975
                                     reverse=True)]
2976

    
2977
        self._ErrorIf(len(instance_groups) > 1,
2978
                      constants.CV_EINSTANCESPLITGROUPS,
2979
                      instance, "instance has primary and secondary nodes in"
2980
                      " different groups: %s", utils.CommaJoin(pretty_list),
2981
                      code=self.ETYPE_WARNING)
2982

    
2983
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2984
        i_non_a_balanced.append(instance)
2985

    
2986
      for snode in inst_config.secondary_nodes:
2987
        s_img = node_image[snode]
2988
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2989
                 snode, "instance %s, connection to secondary node failed",
2990
                 instance)
2991

    
2992
        if s_img.offline:
2993
          inst_nodes_offline.append(snode)
2994

    
2995
      # warn that the instance lives on offline nodes
2996
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2997
               "instance has offline secondary node(s) %s",
2998
               utils.CommaJoin(inst_nodes_offline))
2999
      # ... or ghost/non-vm_capable nodes
3000
      for node in inst_config.all_nodes:
3001
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3002
                 instance, "instance lives on ghost node %s", node)
3003
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3004
                 instance, "instance lives on non-vm_capable node %s", node)
3005

    
3006
    feedback_fn("* Verifying orphan volumes")
3007
    reserved = utils.FieldSet(*cluster.reserved_lvs)
3008

    
3009
    # We will get spurious "unknown volume" warnings if any node of this group
3010
    # is secondary for an instance whose primary is in another group. To avoid
3011
    # them, we find these instances and add their volumes to node_vol_should.
3012
    for inst in self.all_inst_info.values():
3013
      for secondary in inst.secondary_nodes:
3014
        if (secondary in self.my_node_info
3015
            and inst.name not in self.my_inst_info):
3016
          inst.MapLVsByNode(node_vol_should)
3017
          break
3018

    
3019
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3020

    
3021
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3022
      feedback_fn("* Verifying N+1 Memory redundancy")
3023
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3024

    
3025
    feedback_fn("* Other Notes")
3026
    if i_non_redundant:
3027
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3028
                  % len(i_non_redundant))
3029

    
3030
    if i_non_a_balanced:
3031
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3032
                  % len(i_non_a_balanced))
3033

    
3034
    if i_offline:
3035
      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3036

    
3037
    if n_offline:
3038
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3039

    
3040
    if n_drained:
3041
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3042

    
3043
    return not self.bad
3044

    
3045
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3046
    """Analyze the post-hooks' result
3047

3048
    This method analyses the hook result, handles it, and sends some
3049
    nicely-formatted feedback back to the user.
3050

3051
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3052
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3053
    @param hooks_results: the results of the multi-node hooks rpc call
3054
    @param feedback_fn: function used send feedback back to the caller
3055
    @param lu_result: previous Exec result
3056
    @return: the new Exec result, based on the previous result
3057
        and hook results
3058

3059
    """
3060
    # We only really run POST phase hooks, only for non-empty groups,
3061
    # and are only interested in their results
3062
    if not self.my_node_names:
3063
      # empty node group
3064
      pass
3065
    elif phase == constants.HOOKS_PHASE_POST:
3066
      # Used to change hooks' output to proper indentation
3067
      feedback_fn("* Hooks Results")
3068
      assert hooks_results, "invalid result from hooks"
3069

    
3070
      for node_name in hooks_results:
3071
        res = hooks_results[node_name]
3072
        msg = res.fail_msg
3073
        test = msg and not res.offline
3074
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3075
                      "Communication failure in hooks execution: %s", msg)
3076
        if res.offline or msg:
3077
          # No need to investigate payload if node is offline or gave
3078
          # an error.
3079
          continue
3080
        for script, hkr, output in res.payload:
3081
          test = hkr == constants.HKR_FAIL
3082
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3083
                        "Script %s failed, output:", script)
3084
          if test:
3085
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3086
            feedback_fn("%s" % output)
3087
            lu_result = False
3088

    
3089
    return lu_result
3090

    
3091

    
3092
class LUClusterVerifyDisks(NoHooksLU):
3093
  """Verifies the cluster disks status.
3094

3095
  """
3096
  REQ_BGL = False
3097

    
3098
  def ExpandNames(self):
3099
    self.share_locks = _ShareAll()
3100
    self.needed_locks = {
3101
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3102
      }
3103

    
3104
  def Exec(self, feedback_fn):
3105
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3106

    
3107
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3108
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3109
                           for group in group_names])
3110

    
3111

    
3112
class LUGroupVerifyDisks(NoHooksLU):
3113
  """Verifies the status of all disks in a node group.
3114

3115
  """
3116
  REQ_BGL = False
3117

    
3118
  def ExpandNames(self):
3119
    # Raises errors.OpPrereqError on its own if group can't be found
3120
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3121

    
3122
    self.share_locks = _ShareAll()
3123
    self.needed_locks = {
3124
      locking.LEVEL_INSTANCE: [],
3125
      locking.LEVEL_NODEGROUP: [],
3126
      locking.LEVEL_NODE: [],
3127
      }
3128

    
3129
  def DeclareLocks(self, level):
3130
    if level == locking.LEVEL_INSTANCE:
3131
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3132

    
3133
      # Lock instances optimistically, needs verification once node and group
3134
      # locks have been acquired
3135
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3136
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3137

    
3138
    elif level == locking.LEVEL_NODEGROUP:
3139
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3140

    
3141
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3142
        set([self.group_uuid] +
3143
            # Lock all groups used by instances optimistically; this requires
3144
            # going via the node before it's locked, requiring verification
3145
            # later on
3146
            [group_uuid
3147
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3148
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3149

    
3150
    elif level == locking.LEVEL_NODE:
3151
      # This will only lock the nodes in the group to be verified which contain
3152
      # actual instances
3153
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3154
      self._LockInstancesNodes()
3155

    
3156
      # Lock all nodes in group to be verified
3157
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3158
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3159
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3160

    
3161
  def CheckPrereq(self):
3162
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3163
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3164
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3165

    
3166
    assert self.group_uuid in owned_groups
3167

    
3168
    # Check if locked instances are still correct
3169
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3170

    
3171
    # Get instance information
3172
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3173

    
3174
    # Check if node groups for locked instances are still correct
3175
    for (instance_name, inst) in self.instances.items():
3176
      assert owned_nodes.issuperset(inst.all_nodes), \
3177
        "Instance %s's nodes changed while we kept the lock" % instance_name
3178

    
3179
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3180
                                             owned_groups)
3181

    
3182
      assert self.group_uuid in inst_groups, \
3183
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3184

    
3185
  def Exec(self, feedback_fn):
3186
    """Verify integrity of cluster disks.
3187

3188
    @rtype: tuple of three items
3189
    @return: a tuple of (dict of node-to-node_error, list of instances
3190
        which need activate-disks, dict of instance: (node, volume) for
3191
        missing volumes
3192

3193
    """
3194
    res_nodes = {}
3195
    res_instances = set()
3196
    res_missing = {}
3197

    
3198
    nv_dict = _MapInstanceDisksToNodes([inst
3199
            for inst in self.instances.values()
3200
            if inst.admin_state == constants.ADMINST_UP])
3201

    
3202
    if nv_dict:
3203
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3204
                             set(self.cfg.GetVmCapableNodeList()))
3205

    
3206
      node_lvs = self.rpc.call_lv_list(nodes, [])
3207

    
3208
      for (node, node_res) in node_lvs.items():
3209
        if node_res.offline:
3210
          continue
3211

    
3212
        msg = node_res.fail_msg
3213
        if msg:
3214
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3215
          res_nodes[node] = msg
3216
          continue
3217

    
3218
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3219
          inst = nv_dict.pop((node, lv_name), None)
3220
          if not (lv_online or inst is None):
3221
            res_instances.add(inst)
3222

    
3223
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3224
      # better
3225
      for key, inst in nv_dict.iteritems():
3226
        res_missing.setdefault(inst, []).append(list(key))
3227

    
3228
    return (res_nodes, list(res_instances), res_missing)
3229

    
3230

    
3231
class LUClusterRepairDiskSizes(NoHooksLU):
3232
  """Verifies the cluster disks sizes.
3233

3234
  """
3235
  REQ_BGL = False
3236

    
3237
  def ExpandNames(self):
3238
    if self.op.instances:
3239
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3240
      self.needed_locks = {
3241
        locking.LEVEL_NODE_RES: [],
3242
        locking.LEVEL_INSTANCE: self.wanted_names,
3243
        }
3244
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3245
    else:
3246
      self.wanted_names = None
3247
      self.needed_locks = {
3248
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3249
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3250
        }
3251
    self.share_locks = {
3252
      locking.LEVEL_NODE_RES: 1,
3253
      locking.LEVEL_INSTANCE: 0,
3254
      }
3255

    
3256
  def DeclareLocks(self, level):
3257
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3258
      self._LockInstancesNodes(primary_only=True, level=level)
3259

    
3260
  def CheckPrereq(self):
3261
    """Check prerequisites.
3262

3263
    This only checks the optional instance list against the existing names.
3264

3265
    """
3266
    if self.wanted_names is None:
3267
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3268

    
3269
    self.wanted_instances = \
3270
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3271

    
3272
  def _EnsureChildSizes(self, disk):
3273
    """Ensure children of the disk have the needed disk size.
3274

3275
    This is valid mainly for DRBD8 and fixes an issue where the
3276
    children have smaller disk size.
3277

3278
    @param disk: an L{ganeti.objects.Disk} object
3279

3280
    """
3281
    if disk.dev_type == constants.LD_DRBD8:
3282
      assert disk.children, "Empty children for DRBD8?"
3283
      fchild = disk.children[0]
3284
      mismatch = fchild.size < disk.size
3285
      if mismatch:
3286
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3287
                     fchild.size, disk.size)
3288
        fchild.size = disk.size
3289

    
3290
      # and we recurse on this child only, not on the metadev
3291
      return self._EnsureChildSizes(fchild) or mismatch
3292
    else:
3293
      return False
3294

    
3295
  def Exec(self, feedback_fn):
3296
    """Verify the size of cluster disks.
3297

3298
    """
3299
    # TODO: check child disks too
3300
    # TODO: check differences in size between primary/secondary nodes
3301
    per_node_disks = {}
3302
    for instance in self.wanted_instances:
3303
      pnode = instance.primary_node
3304
      if pnode not in per_node_disks:
3305
        per_node_disks[pnode] = []
3306
      for idx, disk in enumerate(instance.disks):
3307
        per_node_disks[pnode].append((instance, idx, disk))
3308

    
3309
    assert not (frozenset(per_node_disks.keys()) -
3310
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3311
      "Not owning correct locks"
3312
    assert not self.owned_locks(locking.LEVEL_NODE)
3313

    
3314
    changed = []
3315
    for node, dskl in per_node_disks.items():
3316
      newl = [v[2].Copy() for v in dskl]
3317
      for dsk in newl:
3318
        self.cfg.SetDiskID(dsk, node)
3319
      result = self.rpc.call_blockdev_getsize(node, newl)
3320
      if result.fail_msg:
3321
        self.LogWarning("Failure in blockdev_getsize call to node"
3322
                        " %s, ignoring", node)
3323
        continue
3324
      if len(result.payload) != len(dskl):
3325
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3326
                        " result.payload=%s", node, len(dskl), result.payload)
3327
        self.LogWarning("Invalid result from node %s, ignoring node results",
3328
                        node)
3329
        continue
3330
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3331
        if size is None:
3332
          self.LogWarning("Disk %d of instance %s did not return size"
3333
                          " information, ignoring", idx, instance.name)
3334
          continue
3335
        if not isinstance(size, (int, long)):
3336
          self.LogWarning("Disk %d of instance %s did not return valid"
3337
                          " size information, ignoring", idx, instance.name)
3338
          continue
3339
        size = size >> 20
3340
        if size != disk.size:
3341
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3342
                       " correcting: recorded %d, actual %d", idx,
3343
                       instance.name, disk.size, size)
3344
          disk.size = size
3345
          self.cfg.Update(instance, feedback_fn)
3346
          changed.append((instance.name, idx, size))
3347
        if self._EnsureChildSizes(disk):
3348
          self.cfg.Update(instance, feedback_fn)
3349
          changed.append((instance.name, idx, disk.size))
3350
    return changed
3351

    
3352

    
3353
class LUClusterRename(LogicalUnit):
3354
  """Rename the cluster.
3355

3356
  """
3357
  HPATH = "cluster-rename"
3358
  HTYPE = constants.HTYPE_CLUSTER
3359

    
3360
  def BuildHooksEnv(self):
3361
    """Build hooks env.
3362

3363
    """
3364
    return {
3365
      "OP_TARGET": self.cfg.GetClusterName(),
3366
      "NEW_NAME": self.op.name,
3367
      }
3368

    
3369
  def BuildHooksNodes(self):
3370
    """Build hooks nodes.
3371

3372
    """
3373
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3374

    
3375
  def CheckPrereq(self):
3376
    """Verify that the passed name is a valid one.
3377

3378
    """
3379
    hostname = netutils.GetHostname(name=self.op.name,
3380
                                    family=self.cfg.GetPrimaryIPFamily())
3381

    
3382
    new_name = hostname.name
3383
    self.ip = new_ip = hostname.ip
3384
    old_name = self.cfg.GetClusterName()
3385
    old_ip = self.cfg.GetMasterIP()
3386
    if new_name == old_name and new_ip == old_ip:
3387
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3388
                                 " cluster has changed",
3389
                                 errors.ECODE_INVAL)
3390
    if new_ip != old_ip:
3391
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3392
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3393
                                   " reachable on the network" %
3394
                                   new_ip, errors.ECODE_NOTUNIQUE)
3395

    
3396
    self.op.name = new_name
3397

    
3398
  def Exec(self, feedback_fn):
3399
    """Rename the cluster.
3400

3401
    """
3402
    clustername = self.op.name
3403
    new_ip = self.ip
3404

    
3405
    # shutdown the master IP
3406
    master_params = self.cfg.GetMasterNetworkParameters()
3407
    ems = self.cfg.GetUseExternalMipScript()
3408
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3409
                                                     master_params, ems)
3410
    result.Raise("Could not disable the master role")
3411

    
3412
    try:
3413
      cluster = self.cfg.GetClusterInfo()
3414
      cluster.cluster_name = clustername
3415
      cluster.master_ip = new_ip
3416
      self.cfg.Update(cluster, feedback_fn)
3417

    
3418
      # update the known hosts file
3419
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3420
      node_list = self.cfg.GetOnlineNodeList()
3421
      try:
3422
        node_list.remove(master_params.name)
3423
      except ValueError:
3424
        pass
3425
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3426
    finally:
3427
      master_params.ip = new_ip
3428
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3429
                                                     master_params, ems)
3430
      msg = result.fail_msg
3431
      if msg:
3432
        self.LogWarning("Could not re-enable the master role on"
3433
                        " the master, please restart manually: %s", msg)
3434

    
3435
    return clustername
3436

    
3437

    
3438
def _ValidateNetmask(cfg, netmask):
3439
  """Checks if a netmask is valid.
3440

3441
  @type cfg: L{config.ConfigWriter}
3442
  @param cfg: The cluster configuration
3443
  @type netmask: int
3444
  @param netmask: the netmask to be verified
3445
  @raise errors.OpPrereqError: if the validation fails
3446

3447
  """
3448
  ip_family = cfg.GetPrimaryIPFamily()
3449
  try:
3450
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3451
  except errors.ProgrammerError:
3452
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3453
                               ip_family)
3454
  if not ipcls.ValidateNetmask(netmask):
3455
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3456
                                (netmask))
3457

    
3458

    
3459
class LUClusterSetParams(LogicalUnit):
3460
  """Change the parameters of the cluster.
3461

3462
  """
3463
  HPATH = "cluster-modify"
3464
  HTYPE = constants.HTYPE_CLUSTER
3465
  REQ_BGL = False
3466

    
3467
  def CheckArguments(self):
3468
    """Check parameters
3469

3470
    """
3471
    if self.op.uid_pool:
3472
      uidpool.CheckUidPool(self.op.uid_pool)
3473

    
3474
    if self.op.add_uids:
3475
      uidpool.CheckUidPool(self.op.add_uids)
3476

    
3477
    if self.op.remove_uids:
3478
      uidpool.CheckUidPool(self.op.remove_uids)
3479

    
3480
    if self.op.master_netmask is not None:
3481
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3482

    
3483
  def ExpandNames(self):
3484
    # FIXME: in the future maybe other cluster params won't require checking on
3485
    # all nodes to be modified.
3486
    self.needed_locks = {
3487
      locking.LEVEL_NODE: locking.ALL_SET,
3488
    }
3489
    self.share_locks[locking.LEVEL_NODE] = 1
3490

    
3491
  def BuildHooksEnv(self):
3492
    """Build hooks env.
3493

3494
    """
3495
    return {
3496
      "OP_TARGET": self.cfg.GetClusterName(),
3497
      "NEW_VG_NAME": self.op.vg_name,
3498
      }
3499

    
3500
  def BuildHooksNodes(self):
3501
    """Build hooks nodes.
3502

3503
    """
3504
    mn = self.cfg.GetMasterNode()
3505
    return ([mn], [mn])
3506

    
3507
  def CheckPrereq(self):
3508
    """Check prerequisites.
3509

3510
    This checks whether the given params don't conflict and
3511
    if the given volume group is valid.
3512

3513
    """
3514
    if self.op.vg_name is not None and not self.op.vg_name:
3515
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3516
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3517
                                   " instances exist", errors.ECODE_INVAL)
3518

    
3519
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3520
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3521
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3522
                                   " drbd-based instances exist",
3523
                                   errors.ECODE_INVAL)
3524

    
3525
    node_list = self.owned_locks(locking.LEVEL_NODE)
3526

    
3527
    # if vg_name not None, checks given volume group on all nodes
3528
    if self.op.vg_name:
3529
      vglist = self.rpc.call_vg_list(node_list)
3530
      for node in node_list:
3531
        msg = vglist[node].fail_msg
3532
        if msg:
3533
          # ignoring down node
3534
          self.LogWarning("Error while gathering data on node %s"
3535
                          " (ignoring node): %s", node, msg)
3536
          continue
3537
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3538
                                              self.op.vg_name,
3539
                                              constants.MIN_VG_SIZE)
3540
        if vgstatus:
3541
          raise errors.OpPrereqError("Error on node '%s': %s" %
3542
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3543

    
3544
    if self.op.drbd_helper:
3545
      # checks given drbd helper on all nodes
3546
      helpers = self.rpc.call_drbd_helper(node_list)
3547
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3548
        if ninfo.offline:
3549
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3550
          continue
3551
        msg = helpers[node].fail_msg
3552
        if msg:
3553
          raise errors.OpPrereqError("Error checking drbd helper on node"
3554
                                     " '%s': %s" % (node, msg),
3555
                                     errors.ECODE_ENVIRON)
3556
        node_helper = helpers[node].payload
3557
        if node_helper != self.op.drbd_helper:
3558
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3559
                                     (node, node_helper), errors.ECODE_ENVIRON)
3560

    
3561
    self.cluster = cluster = self.cfg.GetClusterInfo()
3562
    # validate params changes
3563
    if self.op.beparams:
3564
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3565
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3566

    
3567
    if self.op.ndparams:
3568
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3569
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3570

    
3571
      # TODO: we need a more general way to handle resetting
3572
      # cluster-level parameters to default values
3573
      if self.new_ndparams["oob_program"] == "":
3574
        self.new_ndparams["oob_program"] = \
3575
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3576

    
3577
    if self.op.nicparams:
3578
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3579
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3580
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3581
      nic_errors = []
3582

    
3583
      # check all instances for consistency
3584
      for instance in self.cfg.GetAllInstancesInfo().values():
3585
        for nic_idx, nic in enumerate(instance.nics):
3586
          params_copy = copy.deepcopy(nic.nicparams)
3587
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3588

    
3589
          # check parameter syntax
3590
          try:
3591
            objects.NIC.CheckParameterSyntax(params_filled)
3592
          except errors.ConfigurationError, err:
3593
            nic_errors.append("Instance %s, nic/%d: %s" %
3594
                              (instance.name, nic_idx, err))
3595

    
3596
          # if we're moving instances to routed, check that they have an ip
3597
          target_mode = params_filled[constants.NIC_MODE]
3598
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3599
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3600
                              " address" % (instance.name, nic_idx))
3601
      if nic_errors:
3602
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3603
                                   "\n".join(nic_errors))
3604

    
3605
    # hypervisor list/parameters
3606
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3607
    if self.op.hvparams:
3608
      for hv_name, hv_dict in self.op.hvparams.items():
3609
        if hv_name not in self.new_hvparams:
3610
          self.new_hvparams[hv_name] = hv_dict
3611
        else:
3612
          self.new_hvparams[hv_name].update(hv_dict)
3613

    
3614
    # os hypervisor parameters
3615
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3616
    if self.op.os_hvp:
3617
      for os_name, hvs in self.op.os_hvp.items():
3618
        if os_name not in self.new_os_hvp:
3619
          self.new_os_hvp[os_name] = hvs
3620
        else:
3621
          for hv_name, hv_dict in hvs.items():
3622
            if hv_name not in self.new_os_hvp[os_name]:
3623
              self.new_os_hvp[os_name][hv_name] = hv_dict
3624
            else:
3625
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3626

    
3627
    # os parameters
3628
    self.new_osp = objects.FillDict(cluster.osparams, {})
3629
    if self.op.osparams:
3630
      for os_name, osp in self.op.osparams.items():
3631
        if os_name not in self.new_osp:
3632
          self.new_osp[os_name] = {}
3633

    
3634
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3635
                                                  use_none=True)
3636

    
3637
        if not self.new_osp[os_name]:
3638
          # we removed all parameters
3639
          del self.new_osp[os_name]
3640
        else:
3641
          # check the parameter validity (remote check)
3642
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3643
                         os_name, self.new_osp[os_name])
3644

    
3645
    # changes to the hypervisor list
3646
    if self.op.enabled_hypervisors is not None:
3647
      self.hv_list = self.op.enabled_hypervisors
3648
      for hv in self.hv_list:
3649
        # if the hypervisor doesn't already exist in the cluster
3650
        # hvparams, we initialize it to empty, and then (in both
3651
        # cases) we make sure to fill the defaults, as we might not
3652
        # have a complete defaults list if the hypervisor wasn't
3653
        # enabled before
3654
        if hv not in new_hvp:
3655
          new_hvp[hv] = {}
3656
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3657
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3658
    else:
3659
      self.hv_list = cluster.enabled_hypervisors
3660

    
3661
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3662
      # either the enabled list has changed, or the parameters have, validate
3663
      for hv_name, hv_params in self.new_hvparams.items():
3664
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3665
            (self.op.enabled_hypervisors and
3666
             hv_name in self.op.enabled_hypervisors)):
3667
          # either this is a new hypervisor, or its parameters have changed
3668
          hv_class = hypervisor.GetHypervisor(hv_name)
3669
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3670
          hv_class.CheckParameterSyntax(hv_params)
3671
          _CheckHVParams(self, node_list, hv_name, hv_params)
3672

    
3673
    if self.op.os_hvp:
3674
      # no need to check any newly-enabled hypervisors, since the
3675
      # defaults have already been checked in the above code-block
3676
      for os_name, os_hvp in self.new_os_hvp.items():
3677
        for hv_name, hv_params in os_hvp.items():
3678
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3679
          # we need to fill in the new os_hvp on top of the actual hv_p
3680
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3681
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3682
          hv_class = hypervisor.GetHypervisor(hv_name)
3683
          hv_class.CheckParameterSyntax(new_osp)
3684
          _CheckHVParams(self, node_list, hv_name, new_osp)
3685

    
3686
    if self.op.default_iallocator:
3687
      alloc_script = utils.FindFile(self.op.default_iallocator,
3688
                                    constants.IALLOCATOR_SEARCH_PATH,
3689
                                    os.path.isfile)
3690
      if alloc_script is None:
3691
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3692
                                   " specified" % self.op.default_iallocator,
3693
                                   errors.ECODE_INVAL)
3694

    
3695
  def Exec(self, feedback_fn):
3696
    """Change the parameters of the cluster.
3697

3698
    """
3699
    if self.op.vg_name is not None:
3700
      new_volume = self.op.vg_name
3701
      if not new_volume:
3702
        new_volume = None
3703
      if new_volume != self.cfg.GetVGName():
3704
        self.cfg.SetVGName(new_volume)
3705
      else:
3706
        feedback_fn("Cluster LVM configuration already in desired"
3707
                    " state, not changing")
3708
    if self.op.drbd_helper is not None:
3709
      new_helper = self.op.drbd_helper
3710
      if not new_helper:
3711
        new_helper = None
3712
      if new_helper != self.cfg.GetDRBDHelper():
3713
        self.cfg.SetDRBDHelper(new_helper)
3714
      else:
3715
        feedback_fn("Cluster DRBD helper already in desired state,"
3716
                    " not changing")
3717
    if self.op.hvparams:
3718
      self.cluster.hvparams = self.new_hvparams
3719
    if self.op.os_hvp:
3720
      self.cluster.os_hvp = self.new_os_hvp
3721
    if self.op.enabled_hypervisors is not None:
3722
      self.cluster.hvparams = self.new_hvparams
3723
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3724
    if self.op.beparams:
3725
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3726
    if self.op.nicparams:
3727
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3728
    if self.op.osparams:
3729
      self.cluster.osparams = self.new_osp
3730
    if self.op.ndparams:
3731
      self.cluster.ndparams = self.new_ndparams
3732

    
3733
    if self.op.candidate_pool_size is not None:
3734
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3735
      # we need to update the pool size here, otherwise the save will fail
3736
      _AdjustCandidatePool(self, [])
3737

    
3738
    if self.op.maintain_node_health is not None:
3739
      if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3740
        feedback_fn("Note: CONFD was disabled at build time, node health"
3741
                    " maintenance is not useful (still enabling it)")
3742
      self.cluster.maintain_node_health = self.op.maintain_node_health
3743

    
3744
    if self.op.prealloc_wipe_disks is not None:
3745
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3746

    
3747
    if self.op.add_uids is not None:
3748
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3749

    
3750
    if self.op.remove_uids is not None:
3751
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3752

    
3753
    if self.op.uid_pool is not None:
3754
      self.cluster.uid_pool = self.op.uid_pool
3755

    
3756
    if self.op.default_iallocator is not None:
3757
      self.cluster.default_iallocator = self.op.default_iallocator
3758

    
3759
    if self.op.reserved_lvs is not None:
3760
      self.cluster.reserved_lvs = self.op.reserved_lvs
3761

    
3762
    if self.op.use_external_mip_script is not None:
3763
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3764

    
3765
    def helper_os(aname, mods, desc):
3766
      desc += " OS list"
3767
      lst = getattr(self.cluster, aname)
3768
      for key, val in mods:
3769
        if key == constants.DDM_ADD:
3770
          if val in lst:
3771
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3772
          else:
3773
            lst.append(val)
3774
        elif key == constants.DDM_REMOVE:
3775
          if val in lst:
3776
            lst.remove(val)
3777
          else:
3778
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3779
        else:
3780
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3781

    
3782
    if self.op.hidden_os:
3783
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3784

    
3785
    if self.op.blacklisted_os:
3786
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3787

    
3788
    if self.op.master_netdev:
3789
      master_params = self.cfg.GetMasterNetworkParameters()
3790
      ems = self.cfg.GetUseExternalMipScript()
3791
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3792
                  self.cluster.master_netdev)
3793
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3794
                                                       master_params, ems)
3795
      result.Raise("Could not disable the master ip")
3796
      feedback_fn("Changing master_netdev from %s to %s" %
3797
                  (master_params.netdev, self.op.master_netdev))
3798
      self.cluster.master_netdev = self.op.master_netdev
3799

    
3800
    if self.op.master_netmask:
3801
      master_params = self.cfg.GetMasterNetworkParameters()
3802
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3803
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3804
                                                        master_params.netmask,
3805
                                                        self.op.master_netmask,
3806
                                                        master_params.ip,
3807
                                                        master_params.netdev)
3808
      if result.fail_msg:
3809
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3810
        feedback_fn(msg)
3811

    
3812
      self.cluster.master_netmask = self.op.master_netmask
3813

    
3814
    self.cfg.Update(self.cluster, feedback_fn)
3815

    
3816
    if self.op.master_netdev:
3817
      master_params = self.cfg.GetMasterNetworkParameters()
3818
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3819
                  self.op.master_netdev)
3820
      ems = self.cfg.GetUseExternalMipScript()
3821
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3822
                                                     master_params, ems)
3823
      if result.fail_msg:
3824
        self.LogWarning("Could not re-enable the master ip on"
3825
                        " the master, please restart manually: %s",
3826
                        result.fail_msg)
3827

    
3828

    
3829
def _UploadHelper(lu, nodes, fname):
3830
  """Helper for uploading a file and showing warnings.
3831

3832
  """
3833
  if os.path.exists(fname):
3834
    result = lu.rpc.call_upload_file(nodes, fname)
3835
    for to_node, to_result in result.items():
3836
      msg = to_result.fail_msg
3837
      if msg:
3838
        msg = ("Copy of file %s to node %s failed: %s" %
3839
               (fname, to_node, msg))
3840
        lu.proc.LogWarning(msg)
3841

    
3842

    
3843
def _ComputeAncillaryFiles(cluster, redist):
3844
  """Compute files external to Ganeti which need to be consistent.
3845

3846
  @type redist: boolean
3847
  @param redist: Whether to include files which need to be redistributed
3848

3849
  """
3850
  # Compute files for all nodes
3851
  files_all = set([
3852
    constants.SSH_KNOWN_HOSTS_FILE,
3853
    constants.CONFD_HMAC_KEY,
3854
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3855
    constants.SPICE_CERT_FILE,
3856
    constants.SPICE_CACERT_FILE,
3857
    constants.RAPI_USERS_FILE,
3858
    ])
3859

    
3860
  if not redist:
3861
    files_all.update(constants.ALL_CERT_FILES)
3862
    files_all.update(ssconf.SimpleStore().GetFileList())
3863
  else:
3864
    # we need to ship at least the RAPI certificate
3865
    files_all.add(constants.RAPI_CERT_FILE)
3866

    
3867
  if cluster.modify_etc_hosts:
3868
    files_all.add(constants.ETC_HOSTS)
3869

    
3870
  # Files which are optional, these must:
3871
  # - be present in one other category as well
3872
  # - either exist or not exist on all nodes of that category (mc, vm all)
3873
  files_opt = set([
3874
    constants.RAPI_USERS_FILE,
3875
    ])
3876

    
3877
  # Files which should only be on master candidates
3878
  files_mc = set()
3879

    
3880
  if not redist:
3881
    files_mc.add(constants.CLUSTER_CONF_FILE)
3882

    
3883
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3884
    # replication
3885
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3886

    
3887
  # Files which should only be on VM-capable nodes
3888
  files_vm = set(filename
3889
    for hv_name in cluster.enabled_hypervisors
3890
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3891

    
3892
  files_opt |= set(filename
3893
    for hv_name in cluster.enabled_hypervisors
3894
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3895

    
3896
  # Filenames in each category must be unique
3897
  all_files_set = files_all | files_mc | files_vm
3898
  assert (len(all_files_set) ==
3899
          sum(map(len, [files_all, files_mc, files_vm]))), \
3900
         "Found file listed in more than one file list"
3901

    
3902
  # Optional files must be present in one other category
3903
  assert all_files_set.issuperset(files_opt), \
3904
         "Optional file not in a different required list"
3905

    
3906
  return (files_all, files_opt, files_mc, files_vm)
3907

    
3908

    
3909
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3910
  """Distribute additional files which are part of the cluster configuration.
3911

3912
  ConfigWriter takes care of distributing the config and ssconf files, but
3913
  there are more files which should be distributed to all nodes. This function
3914
  makes sure those are copied.
3915

3916
  @param lu: calling logical unit
3917
  @param additional_nodes: list of nodes not in the config to distribute to
3918
  @type additional_vm: boolean
3919
  @param additional_vm: whether the additional nodes are vm-capable or not
3920

3921
  """
3922
  # Gather target nodes
3923
  cluster = lu.cfg.GetClusterInfo()
3924
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3925

    
3926
  online_nodes = lu.cfg.GetOnlineNodeList()
3927
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3928

    
3929
  if additional_nodes is not None:
3930
    online_nodes.extend(additional_nodes)
3931
    if additional_vm:
3932
      vm_nodes.extend(additional_nodes)
3933

    
3934
  # Never distribute to master node
3935
  for nodelist in [online_nodes, vm_nodes]:
3936
    if master_info.name in nodelist:
3937
      nodelist.remove(master_info.name)
3938

    
3939
  # Gather file lists
3940
  (files_all, _, files_mc, files_vm) = \
3941
    _ComputeAncillaryFiles(cluster, True)
3942

    
3943
  # Never re-distribute configuration file from here
3944
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3945
              constants.CLUSTER_CONF_FILE in files_vm)
3946
  assert not files_mc, "Master candidates not handled in this function"
3947

    
3948
  filemap = [
3949
    (online_nodes, files_all),
3950
    (vm_nodes, files_vm),
3951
    ]
3952

    
3953
  # Upload the files
3954
  for (node_list, files) in filemap:
3955
    for fname in files:
3956
      _UploadHelper(lu, node_list, fname)
3957

    
3958

    
3959
class LUClusterRedistConf(NoHooksLU):
3960
  """Force the redistribution of cluster configuration.
3961

3962
  This is a very simple LU.
3963

3964
  """
3965
  REQ_BGL = False
3966

    
3967
  def ExpandNames(self):
3968
    self.needed_locks = {
3969
      locking.LEVEL_NODE: locking.ALL_SET,
3970
    }
3971
    self.share_locks[locking.LEVEL_NODE] = 1
3972

    
3973
  def Exec(self, feedback_fn):
3974
    """Redistribute the configuration.
3975

3976
    """
3977
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3978
    _RedistributeAncillaryFiles(self)
3979

    
3980

    
3981
class LUClusterActivateMasterIp(NoHooksLU):
3982
  """Activate the master IP on the master node.
3983

3984
  """
3985
  def Exec(self, feedback_fn):
3986
    """Activate the master IP.
3987

3988
    """
3989
    master_params = self.cfg.GetMasterNetworkParameters()
3990
    ems = self.cfg.GetUseExternalMipScript()
3991
    result = self.rpc.call_node_activate_master_ip(master_params.name,
3992
                                                   master_params, ems)
3993
    result.Raise("Could not activate the master IP")
3994

    
3995

    
3996
class LUClusterDeactivateMasterIp(NoHooksLU):
3997
  """Deactivate the master IP on the master node.
3998

3999
  """
4000
  def Exec(self, feedback_fn):
4001
    """Deactivate the master IP.
4002

4003
    """
4004
    master_params = self.cfg.GetMasterNetworkParameters()
4005
    ems = self.cfg.GetUseExternalMipScript()
4006
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4007
                                                     master_params, ems)
4008
    result.Raise("Could not deactivate the master IP")
4009

    
4010

    
4011
def _WaitForSync(lu, instance, disks=None, oneshot=False):
4012
  """Sleep and poll for an instance's disk to sync.
4013

4014
  """
4015
  if not instance.disks or disks is not None and not disks:
4016
    return True
4017

    
4018
  disks = _ExpandCheckDisks(instance, disks)
4019

    
4020
  if not oneshot:
4021
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4022

    
4023
  node = instance.primary_node
4024

    
4025
  for dev in disks:
4026
    lu.cfg.SetDiskID(dev, node)
4027

    
4028
  # TODO: Convert to utils.Retry
4029

    
4030
  retries = 0
4031
  degr_retries = 10 # in seconds, as we sleep 1 second each time
4032
  while True:
4033
    max_time = 0
4034
    done = True
4035
    cumul_degraded = False
4036
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4037
    msg = rstats.fail_msg
4038
    if msg:
4039
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4040
      retries += 1
4041
      if retries >= 10:
4042
        raise errors.RemoteError("Can't contact node %s for mirror data,"
4043
                                 " aborting." % node)
4044
      time.sleep(6)
4045
      continue
4046
    rstats = rstats.payload
4047
    retries = 0
4048
    for i, mstat in enumerate(rstats):
4049
      if mstat is None:
4050
        lu.LogWarning("Can't compute data for node %s/%s",
4051
                           node, disks[i].iv_name)
4052
        continue
4053

    
4054
      cumul_degraded = (cumul_degraded or
4055
                        (mstat.is_degraded and mstat.sync_percent is None))
4056
      if mstat.sync_percent is not None:
4057
        done = False
4058
        if mstat.estimated_time is not None:
4059
          rem_time = ("%s remaining (estimated)" %
4060
                      utils.FormatSeconds(mstat.estimated_time))
4061
          max_time = mstat.estimated_time
4062
        else:
4063
          rem_time = "no time estimate"
4064
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4065
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4066

    
4067
    # if we're done but degraded, let's do a few small retries, to
4068
    # make sure we see a stable and not transient situation; therefore
4069
    # we force restart of the loop
4070
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4071
      logging.info("Degraded disks found, %d retries left", degr_retries)
4072
      degr_retries -= 1
4073
      time.sleep(1)
4074
      continue
4075

    
4076
    if done or oneshot:
4077
      break
4078

    
4079
    time.sleep(min(60, max_time))
4080

    
4081
  if done:
4082
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4083
  return not cumul_degraded
4084

    
4085

    
4086
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4087
  """Check that mirrors are not degraded.
4088

4089
  The ldisk parameter, if True, will change the test from the
4090
  is_degraded attribute (which represents overall non-ok status for
4091
  the device(s)) to the ldisk (representing the local storage status).
4092

4093
  """
4094
  lu.cfg.SetDiskID(dev, node)
4095

    
4096
  result = True
4097

    
4098
  if on_primary or dev.AssembleOnSecondary():
4099
    rstats = lu.rpc.call_blockdev_find(node, dev)
4100
    msg = rstats.fail_msg
4101
    if msg:
4102
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4103
      result = False
4104
    elif not rstats.payload:
4105
      lu.LogWarning("Can't find disk on node %s", node)
4106
      result = False
4107
    else:
4108
      if ldisk:
4109
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4110
      else:
4111
        result = result and not rstats.payload.is_degraded
4112

    
4113
  if dev.children:
4114
    for child in dev.children:
4115
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4116

    
4117
  return result
4118

    
4119

    
4120
class LUOobCommand(NoHooksLU):
4121
  """Logical unit for OOB handling.
4122

4123
  """
4124
  REG_BGL = False
4125
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4126

    
4127
  def ExpandNames(self):
4128
    """Gather locks we need.
4129

4130
    """
4131
    if self.op.node_names:
4132
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4133
      lock_names = self.op.node_names
4134
    else:
4135
      lock_names = locking.ALL_SET
4136

    
4137
    self.needed_locks = {
4138
      locking.LEVEL_NODE: lock_names,
4139
      }
4140

    
4141
  def CheckPrereq(self):
4142
    """Check prerequisites.
4143

4144
    This checks:
4145
     - the node exists in the configuration
4146
     - OOB is supported
4147

4148
    Any errors are signaled by raising errors.OpPrereqError.
4149

4150
    """
4151
    self.nodes = []
4152
    self.master_node = self.cfg.GetMasterNode()
4153

    
4154
    assert self.op.power_delay >= 0.0
4155

    
4156
    if self.op.node_names:
4157
      if (self.op.command in self._SKIP_MASTER and
4158
          self.master_node in self.op.node_names):
4159
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4160
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4161

    
4162
        if master_oob_handler:
4163
          additional_text = ("run '%s %s %s' if you want to operate on the"
4164
                             " master regardless") % (master_oob_handler,
4165
                                                      self.op.command,
4166
                                                      self.master_node)
4167
        else:
4168
          additional_text = "it does not support out-of-band operations"
4169

    
4170
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4171
                                    " allowed for %s; %s") %
4172
                                   (self.master_node, self.op.command,
4173
                                    additional_text), errors.ECODE_INVAL)
4174
    else:
4175
      self.op.node_names = self.cfg.GetNodeList()
4176
      if self.op.command in self._SKIP_MASTER:
4177
        self.op.node_names.remove(self.master_node)
4178

    
4179
    if self.op.command in self._SKIP_MASTER:
4180
      assert self.master_node not in self.op.node_names
4181

    
4182
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4183
      if node is None:
4184
        raise errors.OpPrereqError("Node %s not found" % node_name,
4185
                                   errors.ECODE_NOENT)
4186
      else:
4187
        self.nodes.append(node)
4188

    
4189
      if (not self.op.ignore_status and
4190
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4191
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4192
                                    " not marked offline") % node_name,
4193
                                   errors.ECODE_STATE)
4194

    
4195
  def Exec(self, feedback_fn):
4196
    """Execute OOB and return result if we expect any.
4197

4198
    """
4199
    master_node = self.master_node
4200
    ret = []
4201

    
4202
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4203
                                              key=lambda node: node.name)):
4204
      node_entry = [(constants.RS_NORMAL, node.name)]
4205
      ret.append(node_entry)
4206

    
4207
      oob_program = _SupportsOob(self.cfg, node)
4208

    
4209
      if not oob_program:
4210
        node_entry.append((constants.RS_UNAVAIL, None))
4211
        continue
4212

    
4213
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4214
                   self.op.command, oob_program, node.name)
4215
      result = self.rpc.call_run_oob(master_node, oob_program,
4216
                                     self.op.command, node.name,
4217
                                     self.op.timeout)
4218

    
4219
      if result.fail_msg:
4220
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4221
                        node.name, result.fail_msg)
4222
        node_entry.append((constants.RS_NODATA, None))
4223
      else:
4224
        try:
4225
          self._CheckPayload(result)
4226
        except errors.OpExecError, err:
4227
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4228
                          node.name, err)
4229
          node_entry.append((constants.RS_NODATA, None))
4230
        else:
4231
          if self.op.command == constants.OOB_HEALTH:
4232
            # For health we should log important events
4233
            for item, status in result.payload:
4234
              if status in [constants.OOB_STATUS_WARNING,
4235
                            constants.OOB_STATUS_CRITICAL]:
4236
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4237
                                item, node.name, status)
4238

    
4239
          if self.op.command == constants.OOB_POWER_ON:
4240
            node.powered = True
4241
          elif self.op.command == constants.OOB_POWER_OFF:
4242
            node.powered = False
4243
          elif self.op.command == constants.OOB_POWER_STATUS:
4244
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4245
            if powered != node.powered:
4246
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4247
                               " match actual power state (%s)"), node.powered,
4248
                              node.name, powered)
4249

    
4250
          # For configuration changing commands we should update the node
4251
          if self.op.command in (constants.OOB_POWER_ON,
4252
                                 constants.OOB_POWER_OFF):
4253
            self.cfg.Update(node, feedback_fn)
4254

    
4255
          node_entry.append((constants.RS_NORMAL, result.payload))
4256

    
4257
          if (self.op.command == constants.OOB_POWER_ON and
4258
              idx < len(self.nodes) - 1):
4259
            time.sleep(self.op.power_delay)
4260

    
4261
    return ret
4262

    
4263
  def _CheckPayload(self, result):
4264
    """Checks if the payload is valid.
4265

4266
    @param result: RPC result
4267
    @raises errors.OpExecError: If payload is not valid
4268

4269
    """
4270
    errs = []
4271
    if self.op.command == constants.OOB_HEALTH:
4272
      if not isinstance(result.payload, list):
4273
        errs.append("command 'health' is expected to return a list but got %s" %
4274
                    type(result.payload))
4275
      else:
4276
        for item, status in result.payload:
4277
          if status not in constants.OOB_STATUSES:
4278
            errs.append("health item '%s' has invalid status '%s'" %
4279
                        (item, status))
4280

    
4281
    if self.op.command == constants.OOB_POWER_STATUS:
4282
      if not isinstance(result.payload, dict):
4283
        errs.append("power-status is expected to return a dict but got %s" %
4284
                    type(result.payload))
4285

    
4286
    if self.op.command in [
4287
        constants.OOB_POWER_ON,
4288
        constants.OOB_POWER_OFF,
4289
        constants.OOB_POWER_CYCLE,
4290
        ]:
4291
      if result.payload is not None:
4292
        errs.append("%s is expected to not return payload but got '%s'" %
4293
                    (self.op.command, result.payload))
4294

    
4295
    if errs:
4296
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4297
                               utils.CommaJoin(errs))
4298

    
4299

    
4300
class _OsQuery(_QueryBase):
4301
  FIELDS = query.OS_FIELDS
4302

    
4303
  def ExpandNames(self, lu):
4304
    # Lock all nodes in shared mode
4305
    # Temporary removal of locks, should be reverted later
4306
    # TODO: reintroduce locks when they are lighter-weight
4307
    lu.needed_locks = {}
4308
    #self.share_locks[locking.LEVEL_NODE] = 1
4309
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4310

    
4311
    # The following variables interact with _QueryBase._GetNames
4312
    if self.names:
4313
      self.wanted = self.names
4314
    else:
4315
      self.wanted = locking.ALL_SET
4316

    
4317
    self.do_locking = self.use_locking
4318

    
4319
  def DeclareLocks(self, lu, level):
4320
    pass
4321

    
4322
  @staticmethod
4323
  def _DiagnoseByOS(rlist):
4324
    """Remaps a per-node return list into an a per-os per-node dictionary
4325

4326
    @param rlist: a map with node names as keys and OS objects as values
4327

4328
    @rtype: dict
4329
    @return: a dictionary with osnames as keys and as value another
4330
        map, with nodes as keys and tuples of (path, status, diagnose,
4331
        variants, parameters, api_versions) as values, eg::
4332

4333
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4334
                                     (/srv/..., False, "invalid api")],
4335
                           "node2": [(/srv/..., True, "", [], [])]}
4336
          }
4337

4338
    """
4339
    all_os = {}
4340
    # we build here the list of nodes that didn't fail the RPC (at RPC
4341
    # level), so that nodes with a non-responding node daemon don't
4342
    # make all OSes invalid
4343
    good_nodes = [node_name for node_name in rlist
4344
                  if not rlist[node_name].fail_msg]
4345
    for node_name, nr in rlist.items():
4346
      if nr.fail_msg or not nr.payload:
4347
        continue
4348
      for (name, path, status, diagnose, variants,
4349
           params, api_versions) in nr.payload:
4350
        if name not in all_os:
4351
          # build a list of nodes for this os containing empty lists
4352
          # for each node in node_list
4353
          all_os[name] = {}
4354
          for nname in good_nodes:
4355
            all_os[name][nname] = []
4356
        # convert params from [name, help] to (name, help)
4357
        params = [tuple(v) for v in params]
4358
        all_os[name][node_name].append((path, status, diagnose,
4359
                                        variants, params, api_versions))
4360
    return all_os
4361

    
4362
  def _GetQueryData(self, lu):
4363
    """Computes the list of nodes and their attributes.
4364

4365
    """
4366
    # Locking is not used
4367
    assert not (compat.any(lu.glm.is_owned(level)
4368
                           for level in locking.LEVELS
4369
                           if level != locking.LEVEL_CLUSTER) or
4370
                self.do_locking or self.use_locking)
4371

    
4372
    valid_nodes = [node.name
4373
                   for node in lu.cfg.GetAllNodesInfo().values()
4374
                   if not node.offline and node.vm_capable]
4375
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4376
    cluster = lu.cfg.GetClusterInfo()
4377

    
4378
    data = {}
4379

    
4380
    for (os_name, os_data) in pol.items():
4381
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4382
                          hidden=(os_name in cluster.hidden_os),
4383
                          blacklisted=(os_name in cluster.blacklisted_os))
4384

    
4385
      variants = set()
4386
      parameters = set()
4387
      api_versions = set()
4388

    
4389
      for idx, osl in enumerate(os_data.values()):
4390
        info.valid = bool(info.valid and osl and osl[0][1])
4391
        if not info.valid:
4392
          break
4393

    
4394
        (node_variants, node_params, node_api) = osl[0][3:6]
4395
        if idx == 0:
4396
          # First entry
4397
          variants.update(node_variants)
4398
          parameters.update(node_params)
4399
          api_versions.update(node_api)
4400
        else:
4401
          # Filter out inconsistent values
4402
          variants.intersection_update(node_variants)
4403
          parameters.intersection_update(node_params)
4404
          api_versions.intersection_update(node_api)
4405

    
4406
      info.variants = list(variants)
4407
      info.parameters = list(parameters)
4408
      info.api_versions = list(api_versions)
4409

    
4410
      data[os_name] = info
4411

    
4412
    # Prepare data in requested order
4413
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4414
            if name in data]
4415

    
4416

    
4417
class LUOsDiagnose(NoHooksLU):
4418
  """Logical unit for OS diagnose/query.
4419

4420
  """
4421
  REQ_BGL = False
4422

    
4423
  @staticmethod
4424
  def _BuildFilter(fields, names):
4425
    """Builds a filter for querying OSes.
4426

4427
    """
4428
    name_filter = qlang.MakeSimpleFilter("name", names)
4429

    
4430
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4431
    # respective field is not requested
4432
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4433
                     for fname in ["hidden", "blacklisted"]
4434
                     if fname not in fields]
4435
    if "valid" not in fields:
4436
      status_filter.append([qlang.OP_TRUE, "valid"])
4437

    
4438
    if status_filter:
4439
      status_filter.insert(0, qlang.OP_AND)
4440
    else:
4441
      status_filter = None
4442

    
4443
    if name_filter and status_filter:
4444
      return [qlang.OP_AND, name_filter, status_filter]
4445
    elif name_filter:
4446
      return name_filter
4447
    else:
4448
      return status_filter
4449

    
4450
  def CheckArguments(self):
4451
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4452
                       self.op.output_fields, False)
4453

    
4454
  def ExpandNames(self):
4455
    self.oq.ExpandNames(self)
4456

    
4457
  def Exec(self, feedback_fn):
4458
    return self.oq.OldStyleQuery(self)
4459

    
4460

    
4461
class LUNodeRemove(LogicalUnit):
4462
  """Logical unit for removing a node.
4463

4464
  """
4465
  HPATH = "node-remove"
4466
  HTYPE = constants.HTYPE_NODE
4467

    
4468
  def BuildHooksEnv(self):
4469
    """Build hooks env.
4470

4471
    This doesn't run on the target node in the pre phase as a failed
4472
    node would then be impossible to remove.
4473

4474
    """
4475
    return {
4476
      "OP_TARGET": self.op.node_name,
4477
      "NODE_NAME": self.op.node_name,
4478
      }
4479

    
4480
  def BuildHooksNodes(self):
4481
    """Build hooks nodes.
4482

4483
    """
4484
    all_nodes = self.cfg.GetNodeList()
4485
    try:
4486
      all_nodes.remove(self.op.node_name)
4487
    except ValueError:
4488
      logging.warning("Node '%s', which is about to be removed, was not found"
4489
                      " in the list of all nodes", self.op.node_name)
4490
    return (all_nodes, all_nodes)
4491

    
4492
  def CheckPrereq(self):
4493
    """Check prerequisites.
4494

4495
    This checks:
4496
     - the node exists in the configuration
4497
     - it does not have primary or secondary instances
4498
     - it's not the master
4499

4500
    Any errors are signaled by raising errors.OpPrereqError.
4501

4502
    """
4503
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4504
    node = self.cfg.GetNodeInfo(self.op.node_name)
4505
    assert node is not None
4506

    
4507
    masternode = self.cfg.GetMasterNode()
4508
    if node.name == masternode:
4509
      raise errors.OpPrereqError("Node is the master node, failover to another"
4510
                                 " node is required", errors.ECODE_INVAL)
4511

    
4512
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4513
      if node.name in instance.all_nodes:
4514
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4515
                                   " please remove first" % instance_name,
4516
                                   errors.ECODE_INVAL)
4517
    self.op.node_name = node.name
4518
    self.node = node
4519

    
4520
  def Exec(self, feedback_fn):
4521
    """Removes the node from the cluster.
4522

4523
    """
4524
    node = self.node
4525
    logging.info("Stopping the node daemon and removing configs from node %s",
4526
                 node.name)
4527

    
4528
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4529

    
4530
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4531
      "Not owning BGL"
4532

    
4533
    # Promote nodes to master candidate as needed
4534
    _AdjustCandidatePool(self, exceptions=[node.name])
4535
    self.context.RemoveNode(node.name)
4536

    
4537
    # Run post hooks on the node before it's removed
4538
    _RunPostHook(self, node.name)
4539

    
4540
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4541
    msg = result.fail_msg
4542
    if msg:
4543
      self.LogWarning("Errors encountered on the remote node while leaving"
4544
                      " the cluster: %s", msg)
4545

    
4546
    # Remove node from our /etc/hosts
4547
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4548
      master_node = self.cfg.GetMasterNode()
4549
      result = self.rpc.call_etc_hosts_modify(master_node,
4550
                                              constants.ETC_HOSTS_REMOVE,
4551
                                              node.name, None)
4552
      result.Raise("Can't update hosts file with new host data")
4553
      _RedistributeAncillaryFiles(self)
4554

    
4555

    
4556
class _NodeQuery(_QueryBase):
4557
  FIELDS = query.NODE_FIELDS
4558

    
4559
  def ExpandNames(self, lu):
4560
    lu.needed_locks = {}
4561
    lu.share_locks = _ShareAll()
4562

    
4563
    if self.names:
4564
      self.wanted = _GetWantedNodes(lu, self.names)
4565
    else:
4566
      self.wanted = locking.ALL_SET
4567

    
4568
    self.do_locking = (self.use_locking and
4569
                       query.NQ_LIVE in self.requested_data)
4570

    
4571
    if self.do_locking:
4572
      # If any non-static field is requested we need to lock the nodes
4573
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4574

    
4575
  def DeclareLocks(self, lu, level):
4576
    pass
4577

    
4578
  def _GetQueryData(self, lu):
4579
    """Computes the list of nodes and their attributes.
4580

4581
    """
4582
    all_info = lu.cfg.GetAllNodesInfo()
4583

    
4584
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4585

    
4586
    # Gather data as requested
4587
    if query.NQ_LIVE in self.requested_data:
4588
      # filter out non-vm_capable nodes
4589
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4590

    
4591
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4592
                                        lu.cfg.GetHypervisorType())
4593
      live_data = dict((name, nresult.payload)
4594
                       for (name, nresult) in node_data.items()
4595
                       if not nresult.fail_msg and nresult.payload)
4596
    else:
4597
      live_data = None
4598

    
4599
    if query.NQ_INST in self.requested_data:
4600
      node_to_primary = dict([(name, set()) for name in nodenames])
4601
      node_to_secondary = dict([(name, set()) for name in nodenames])
4602

    
4603
      inst_data = lu.cfg.GetAllInstancesInfo()
4604

    
4605
      for inst in inst_data.values():
4606
        if inst.primary_node in node_to_primary:
4607
          node_to_primary[inst.primary_node].add(inst.name)
4608
        for secnode in inst.secondary_nodes:
4609
          if secnode in node_to_secondary:
4610
            node_to_secondary[secnode].add(inst.name)
4611
    else:
4612
      node_to_primary = None
4613
      node_to_secondary = None
4614

    
4615
    if query.NQ_OOB in self.requested_data:
4616
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4617
                         for name, node in all_info.iteritems())
4618
    else:
4619
      oob_support = None
4620

    
4621
    if query.NQ_GROUP in self.requested_data:
4622
      groups = lu.cfg.GetAllNodeGroupsInfo()
4623
    else:
4624
      groups = {}
4625

    
4626
    return query.NodeQueryData([all_info[name] for name in nodenames],
4627
                               live_data, lu.cfg.GetMasterNode(),
4628
                               node_to_primary, node_to_secondary, groups,
4629
                               oob_support, lu.cfg.GetClusterInfo())
4630

    
4631

    
4632
class LUNodeQuery(NoHooksLU):
4633
  """Logical unit for querying nodes.
4634

4635
  """
4636
  # pylint: disable=W0142
4637
  REQ_BGL = False
4638

    
4639
  def CheckArguments(self):
4640
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4641
                         self.op.output_fields, self.op.use_locking)
4642

    
4643
  def ExpandNames(self):
4644
    self.nq.ExpandNames(self)
4645

    
4646
  def DeclareLocks(self, level):
4647
    self.nq.DeclareLocks(self, level)
4648

    
4649
  def Exec(self, feedback_fn):
4650
    return self.nq.OldStyleQuery(self)
4651

    
4652

    
4653
class LUNodeQueryvols(NoHooksLU):
4654
  """Logical unit for getting volumes on node(s).
4655

4656
  """
4657
  REQ_BGL = False
4658
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4659
  _FIELDS_STATIC = utils.FieldSet("node")
4660

    
4661
  def CheckArguments(self):
4662
    _CheckOutputFields(static=self._FIELDS_STATIC,
4663
                       dynamic=self._FIELDS_DYNAMIC,
4664
                       selected=self.op.output_fields)
4665

    
4666
  def ExpandNames(self):
4667
    self.share_locks = _ShareAll()
4668
    self.needed_locks = {}
4669

    
4670
    if not self.op.nodes:
4671
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4672
    else:
4673
      self.needed_locks[locking.LEVEL_NODE] = \
4674
        _GetWantedNodes(self, self.op.nodes)
4675

    
4676
  def Exec(self, feedback_fn):
4677
    """Computes the list of nodes and their attributes.
4678

4679
    """
4680
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4681
    volumes = self.rpc.call_node_volumes(nodenames)
4682

    
4683
    ilist = self.cfg.GetAllInstancesInfo()
4684
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4685

    
4686
    output = []
4687
    for node in nodenames:
4688
      nresult = volumes[node]
4689
      if nresult.offline:
4690
        continue
4691
      msg = nresult.fail_msg
4692
      if msg:
4693
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4694
        continue
4695

    
4696
      node_vols = sorted(nresult.payload,
4697
                         key=operator.itemgetter("dev"))
4698

    
4699
      for vol in node_vols:
4700
        node_output = []
4701
        for field in self.op.output_fields:
4702
          if field == "node":
4703
            val = node
4704
          elif field == "phys":
4705
            val = vol["dev"]
4706
          elif field == "vg":
4707
            val = vol["vg"]
4708
          elif field == "name":
4709
            val = vol["name"]
4710
          elif field == "size":
4711
            val = int(float(vol["size"]))
4712
          elif field == "instance":
4713
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4714
          else:
4715
            raise errors.ParameterError(field)
4716
          node_output.append(str(val))
4717

    
4718
        output.append(node_output)
4719

    
4720
    return output
4721

    
4722

    
4723
class LUNodeQueryStorage(NoHooksLU):
4724
  """Logical unit for getting information on storage units on node(s).
4725

4726
  """
4727
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4728
  REQ_BGL = False
4729

    
4730
  def CheckArguments(self):
4731
    _CheckOutputFields(static=self._FIELDS_STATIC,
4732
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4733
                       selected=self.op.output_fields)
4734

    
4735
  def ExpandNames(self):
4736
    self.share_locks = _ShareAll()
4737
    self.needed_locks = {}
4738

    
4739
    if self.op.nodes:
4740
      self.needed_locks[locking.LEVEL_NODE] = \
4741
        _GetWantedNodes(self, self.op.nodes)
4742
    else:
4743
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4744

    
4745
  def Exec(self, feedback_fn):
4746
    """Computes the list of nodes and their attributes.
4747

4748
    """
4749
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4750

    
4751
    # Always get name to sort by
4752
    if constants.SF_NAME in self.op.output_fields:
4753
      fields = self.op.output_fields[:]
4754
    else:
4755
      fields = [constants.SF_NAME] + self.op.output_fields
4756

    
4757
    # Never ask for node or type as it's only known to the LU
4758
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4759
      while extra in fields:
4760
        fields.remove(extra)
4761

    
4762
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4763
    name_idx = field_idx[constants.SF_NAME]
4764

    
4765
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4766
    data = self.rpc.call_storage_list(self.nodes,
4767
                                      self.op.storage_type, st_args,
4768
                                      self.op.name, fields)
4769

    
4770
    result = []
4771

    
4772
    for node in utils.NiceSort(self.nodes):
4773
      nresult = data[node]
4774
      if nresult.offline:
4775
        continue
4776

    
4777
      msg = nresult.fail_msg
4778
      if msg:
4779
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4780
        continue
4781

    
4782
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4783

    
4784
      for name in utils.NiceSort(rows.keys()):
4785
        row = rows[name]
4786

    
4787
        out = []
4788

    
4789
        for field in self.op.output_fields:
4790
          if field == constants.SF_NODE:
4791
            val = node
4792
          elif field == constants.SF_TYPE:
4793
            val = self.op.storage_type
4794
          elif field in field_idx:
4795
            val = row[field_idx[field]]
4796
          else:
4797
            raise errors.ParameterError(field)
4798

    
4799
          out.append(val)
4800

    
4801
        result.append(out)
4802

    
4803
    return result
4804

    
4805

    
4806
class _InstanceQuery(_QueryBase):
4807
  FIELDS = query.INSTANCE_FIELDS
4808

    
4809
  def ExpandNames(self, lu):
4810
    lu.needed_locks = {}
4811
    lu.share_locks = _ShareAll()
4812

    
4813
    if self.names:
4814
      self.wanted = _GetWantedInstances(lu, self.names)
4815
    else:
4816
      self.wanted = locking.ALL_SET
4817

    
4818
    self.do_locking = (self.use_locking and
4819
                       query.IQ_LIVE in self.requested_data)
4820
    if self.do_locking:
4821
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4822
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4823
      lu.needed_locks[locking.LEVEL_NODE] = []
4824
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4825

    
4826
    self.do_grouplocks = (self.do_locking and
4827
                          query.IQ_NODES in self.requested_data)
4828

    
4829
  def DeclareLocks(self, lu, level):
4830
    if self.do_locking:
4831
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4832
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4833

    
4834
        # Lock all groups used by instances optimistically; this requires going
4835
        # via the node before it's locked, requiring verification later on
4836
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4837
          set(group_uuid
4838
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4839
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4840
      elif level == locking.LEVEL_NODE:
4841
        lu._LockInstancesNodes() # pylint: disable=W0212
4842

    
4843
  @staticmethod
4844
  def _CheckGroupLocks(lu):
4845
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4846
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4847

    
4848
    # Check if node groups for locked instances are still correct
4849
    for instance_name in owned_instances:
4850
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4851

    
4852
  def _GetQueryData(self, lu):
4853
    """Computes the list of instances and their attributes.
4854

4855
    """
4856
    if self.do_grouplocks:
4857
      self._CheckGroupLocks(lu)
4858

    
4859
    cluster = lu.cfg.GetClusterInfo()
4860
    all_info = lu.cfg.GetAllInstancesInfo()
4861

    
4862
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4863

    
4864
    instance_list = [all_info[name] for name in instance_names]
4865
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4866
                                        for inst in instance_list)))
4867
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4868
    bad_nodes = []
4869
    offline_nodes = []
4870
    wrongnode_inst = set()
4871

    
4872
    # Gather data as requested
4873
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4874
      live_data = {}
4875
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4876
      for name in nodes:
4877
        result = node_data[name]
4878
        if result.offline:
4879
          # offline nodes will be in both lists
4880
          assert result.fail_msg
4881
          offline_nodes.append(name)
4882
        if result.fail_msg:
4883
          bad_nodes.append(name)
4884
        elif result.payload:
4885
          for inst in result.payload:
4886
            if inst in all_info:
4887
              if all_info[inst].primary_node == name:
4888
                live_data.update(result.payload)
4889
              else:
4890
                wrongnode_inst.add(inst)
4891
            else:
4892
              # orphan instance; we don't list it here as we don't
4893
              # handle this case yet in the output of instance listing
4894
              logging.warning("Orphan instance '%s' found on node %s",
4895
                              inst, name)
4896
        # else no instance is alive
4897
    else:
4898
      live_data = {}
4899

    
4900
    if query.IQ_DISKUSAGE in self.requested_data:
4901
      disk_usage = dict((inst.name,
4902
                         _ComputeDiskSize(inst.disk_template,
4903
                                          [{constants.IDISK_SIZE: disk.size}
4904
                                           for disk in inst.disks]))
4905
                        for inst in instance_list)
4906
    else:
4907
      disk_usage = None
4908

    
4909
    if query.IQ_CONSOLE in self.requested_data:
4910
      consinfo = {}
4911
      for inst in instance_list:
4912
        if inst.name in live_data:
4913
          # Instance is running
4914
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4915
        else:
4916
          consinfo[inst.name] = None
4917
      assert set(consinfo.keys()) == set(instance_names)
4918
    else:
4919
      consinfo = None
4920

    
4921
    if query.IQ_NODES in self.requested_data:
4922
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4923
                                            instance_list)))
4924
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4925
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4926
                    for uuid in set(map(operator.attrgetter("group"),
4927
                                        nodes.values())))
4928
    else:
4929
      nodes = None
4930
      groups = None
4931

    
4932
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4933
                                   disk_usage, offline_nodes, bad_nodes,
4934
                                   live_data, wrongnode_inst, consinfo,
4935
                                   nodes, groups)
4936

    
4937

    
4938
class LUQuery(NoHooksLU):
4939
  """Query for resources/items of a certain kind.
4940

4941
  """
4942
  # pylint: disable=W0142
4943
  REQ_BGL = False
4944

    
4945
  def CheckArguments(self):
4946
    qcls = _GetQueryImplementation(self.op.what)
4947

    
4948
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4949

    
4950
  def ExpandNames(self):
4951
    self.impl.ExpandNames(self)
4952

    
4953
  def DeclareLocks(self, level):
4954
    self.impl.DeclareLocks(self, level)
4955

    
4956
  def Exec(self, feedback_fn):
4957
    return self.impl.NewStyleQuery(self)
4958

    
4959

    
4960
class LUQueryFields(NoHooksLU):
4961
  """Query for resources/items of a certain kind.
4962

4963
  """
4964
  # pylint: disable=W0142
4965
  REQ_BGL = False
4966

    
4967
  def CheckArguments(self):
4968
    self.qcls = _GetQueryImplementation(self.op.what)
4969

    
4970
  def ExpandNames(self):
4971
    self.needed_locks = {}
4972

    
4973
  def Exec(self, feedback_fn):
4974
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4975

    
4976

    
4977
class LUNodeModifyStorage(NoHooksLU):
4978
  """Logical unit for modifying a storage volume on a node.
4979

4980
  """
4981
  REQ_BGL = False
4982

    
4983
  def CheckArguments(self):
4984
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4985

    
4986
    storage_type = self.op.storage_type
4987

    
4988
    try:
4989
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4990
    except KeyError:
4991
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4992
                                 " modified" % storage_type,
4993
                                 errors.ECODE_INVAL)
4994

    
4995
    diff = set(self.op.changes.keys()) - modifiable
4996
    if diff:
4997
      raise errors.OpPrereqError("The following fields can not be modified for"
4998
                                 " storage units of type '%s': %r" %
4999
                                 (storage_type, list(diff)),
5000
                                 errors.ECODE_INVAL)
5001

    
5002
  def ExpandNames(self):
5003
    self.needed_locks = {
5004
      locking.LEVEL_NODE: self.op.node_name,
5005
      }
5006

    
5007
  def Exec(self, feedback_fn):
5008
    """Computes the list of nodes and their attributes.
5009

5010
    """
5011
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5012
    result = self.rpc.call_storage_modify(self.op.node_name,
5013
                                          self.op.storage_type, st_args,
5014
                                          self.op.name, self.op.changes)
5015
    result.Raise("Failed to modify storage unit '%s' on %s" %
5016
                 (self.op.name, self.op.node_name))
5017

    
5018

    
5019
class LUNodeAdd(LogicalUnit):
5020
  """Logical unit for adding node to the cluster.
5021

5022
  """
5023
  HPATH = "node-add"
5024
  HTYPE = constants.HTYPE_NODE
5025
  _NFLAGS = ["master_capable", "vm_capable"]
5026

    
5027
  def CheckArguments(self):
5028
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5029
    # validate/normalize the node name
5030
    self.hostname = netutils.GetHostname(name=self.op.node_name,
5031
                                         family=self.primary_ip_family)
5032
    self.op.node_name = self.hostname.name
5033

    
5034
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5035
      raise errors.OpPrereqError("Cannot readd the master node",
5036
                                 errors.ECODE_STATE)
5037

    
5038
    if self.op.readd and self.op.group:
5039
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
5040
                                 " being readded", errors.ECODE_INVAL)
5041

    
5042
  def BuildHooksEnv(self):
5043
    """Build hooks env.
5044

5045
    This will run on all nodes before, and on all nodes + the new node after.
5046

5047
    """
5048
    return {
5049
      "OP_TARGET": self.op.node_name,
5050
      "NODE_NAME": self.op.node_name,
5051
      "NODE_PIP": self.op.primary_ip,
5052
      "NODE_SIP": self.op.secondary_ip,
5053
      "MASTER_CAPABLE": str(self.op.master_capable),
5054
      "VM_CAPABLE": str(self.op.vm_capable),
5055
      }
5056

    
5057
  def BuildHooksNodes(self):
5058
    """Build hooks nodes.
5059

5060
    """
5061
    # Exclude added node
5062
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5063
    post_nodes = pre_nodes + [self.op.node_name, ]
5064

    
5065
    return (pre_nodes, post_nodes)
5066

    
5067
  def CheckPrereq(self):
5068
    """Check prerequisites.
5069

5070
    This checks:
5071
     - the new node is not already in the config
5072
     - it is resolvable
5073
     - its parameters (single/dual homed) matches the cluster
5074

5075
    Any errors are signaled by raising errors.OpPrereqError.
5076

5077
    """
5078
    cfg = self.cfg
5079
    hostname = self.hostname
5080
    node = hostname.name
5081
    primary_ip = self.op.primary_ip = hostname.ip
5082
    if self.op.secondary_ip is None:
5083
      if self.primary_ip_family == netutils.IP6Address.family:
5084
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5085
                                   " IPv4 address must be given as secondary",
5086
                                   errors.ECODE_INVAL)
5087
      self.op.secondary_ip = primary_ip
5088

    
5089
    secondary_ip = self.op.secondary_ip
5090
    if not netutils.IP4Address.IsValid(secondary_ip):
5091
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5092
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5093

    
5094
    node_list = cfg.GetNodeList()
5095
    if not self.op.readd and node in node_list:
5096
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5097
                                 node, errors.ECODE_EXISTS)
5098
    elif self.op.readd and node not in node_list:
5099
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5100
                                 errors.ECODE_NOENT)
5101

    
5102
    self.changed_primary_ip = False
5103

    
5104
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5105
      if self.op.readd and node == existing_node_name:
5106
        if existing_node.secondary_ip != secondary_ip:
5107
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5108
                                     " address configuration as before",
5109
                                     errors.ECODE_INVAL)
5110
        if existing_node.primary_ip != primary_ip:
5111
          self.changed_primary_ip = True
5112

    
5113
        continue
5114

    
5115
      if (existing_node.primary_ip == primary_ip or
5116
          existing_node.secondary_ip == primary_ip or
5117
          existing_node.primary_ip == secondary_ip or
5118
          existing_node.secondary_ip == secondary_ip):
5119
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5120
                                   " existing node %s" % existing_node.name,
5121
                                   errors.ECODE_NOTUNIQUE)
5122

    
5123
    # After this 'if' block, None is no longer a valid value for the
5124
    # _capable op attributes
5125
    if self.op.readd:
5126
      old_node = self.cfg.GetNodeInfo(node)
5127
      assert old_node is not None, "Can't retrieve locked node %s" % node
5128
      for attr in self._NFLAGS:
5129
        if getattr(self.op, attr) is None:
5130
          setattr(self.op, attr, getattr(old_node, attr))
5131
    else:
5132
      for attr in self._NFLAGS:
5133
        if getattr(self.op, attr) is None:
5134
          setattr(self.op, attr, True)
5135

    
5136
    if self.op.readd and not self.op.vm_capable:
5137
      pri, sec = cfg.GetNodeInstances(node)
5138
      if pri or sec:
5139
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5140
                                   " flag set to false, but it already holds"
5141
                                   " instances" % node,
5142
                                   errors.ECODE_STATE)
5143

    
5144
    # check that the type of the node (single versus dual homed) is the
5145
    # same as for the master
5146
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5147
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5148
    newbie_singlehomed = secondary_ip == primary_ip
5149
    if master_singlehomed != newbie_singlehomed:
5150
      if master_singlehomed:
5151
        raise errors.OpPrereqError("The master has no secondary ip but the"
5152
                                   " new node has one",
5153
                                   errors.ECODE_INVAL)
5154
      else:
5155
        raise errors.OpPrereqError("The master has a secondary ip but the"
5156
                                   " new node doesn't have one",
5157
                                   errors.ECODE_INVAL)
5158

    
5159
    # checks reachability
5160
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5161
      raise errors.OpPrereqError("Node not reachable by ping",
5162
                                 errors.ECODE_ENVIRON)
5163

    
5164
    if not newbie_singlehomed:
5165
      # check reachability from my secondary ip to newbie's secondary ip
5166
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5167
                           source=myself.secondary_ip):
5168
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5169
                                   " based ping to node daemon port",
5170
                                   errors.ECODE_ENVIRON)
5171

    
5172
    if self.op.readd:
5173
      exceptions = [node]
5174
    else:
5175
      exceptions = []
5176

    
5177
    if self.op.master_capable:
5178
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5179
    else:
5180
      self.master_candidate = False
5181

    
5182
    if self.op.readd:
5183
      self.new_node = old_node
5184
    else:
5185
      node_group = cfg.LookupNodeGroup(self.op.group)
5186
      self.new_node = objects.Node(name=node,
5187
                                   primary_ip=primary_ip,
5188
                                   secondary_ip=secondary_ip,
5189
                                   master_candidate=self.master_candidate,
5190
                                   offline=False, drained=False,
5191
                                   group=node_group)
5192

    
5193
    if self.op.ndparams:
5194
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5195

    
5196
  def Exec(self, feedback_fn):
5197
    """Adds the new node to the cluster.
5198

5199
    """
5200
    new_node = self.new_node
5201
    node = new_node.name
5202

    
5203
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5204
      "Not owning BGL"
5205

    
5206
    # We adding a new node so we assume it's powered
5207
    new_node.powered = True
5208

    
5209
    # for re-adds, reset the offline/drained/master-candidate flags;
5210
    # we need to reset here, otherwise offline would prevent RPC calls
5211
    # later in the procedure; this also means that if the re-add
5212
    # fails, we are left with a non-offlined, broken node
5213
    if self.op.readd:
5214
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5215
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5216
      # if we demote the node, we do cleanup later in the procedure
5217
      new_node.master_candidate = self.master_candidate
5218
      if self.changed_primary_ip:
5219
        new_node.primary_ip = self.op.primary_ip
5220

    
5221
    # copy the master/vm_capable flags
5222
    for attr in self._NFLAGS:
5223
      setattr(new_node, attr, getattr(self.op, attr))
5224

    
5225
    # notify the user about any possible mc promotion
5226
    if new_node.master_candidate:
5227
      self.LogInfo("Node will be a master candidate")
5228

    
5229
    if self.op.ndparams:
5230
      new_node.ndparams = self.op.ndparams
5231
    else:
5232
      new_node.ndparams = {}
5233

    
5234
    # check connectivity
5235
    result = self.rpc.call_version([node])[node]
5236
    result.Raise("Can't get version information from node %s" % node)
5237
    if constants.PROTOCOL_VERSION == result.payload:
5238
      logging.info("Communication to node %s fine, sw version %s match",
5239
                   node, result.payload)
5240
    else:
5241
      raise errors.OpExecError("Version mismatch master version %s,"
5242
                               " node version %s" %
5243
                               (constants.PROTOCOL_VERSION, result.payload))
5244

    
5245
    # Add node to our /etc/hosts, and add key to known_hosts
5246
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5247
      master_node = self.cfg.GetMasterNode()
5248
      result = self.rpc.call_etc_hosts_modify(master_node,
5249
                                              constants.ETC_HOSTS_ADD,
5250
                                              self.hostname.name,
5251
                                              self.hostname.ip)
5252
      result.Raise("Can't update hosts file with new host data")
5253

    
5254
    if new_node.secondary_ip != new_node.primary_ip:
5255
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5256
                               False)
5257

    
5258
    node_verify_list = [self.cfg.GetMasterNode()]
5259
    node_verify_param = {
5260
      constants.NV_NODELIST: ([node], {}),
5261
      # TODO: do a node-net-test as well?
5262
    }
5263

    
5264
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5265
                                       self.cfg.GetClusterName())
5266
    for verifier in node_verify_list:
5267
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5268
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5269
      if nl_payload:
5270
        for failed in nl_payload:
5271
          feedback_fn("ssh/hostname verification failed"
5272
                      " (checking from %s): %s" %
5273
                      (verifier, nl_payload[failed]))
5274
        raise errors.OpExecError("ssh/hostname verification failed")
5275

    
5276
    if self.op.readd:
5277
      _RedistributeAncillaryFiles(self)
5278
      self.context.ReaddNode(new_node)
5279
      # make sure we redistribute the config
5280
      self.cfg.Update(new_node, feedback_fn)
5281
      # and make sure the new node will not have old files around
5282
      if not new_node.master_candidate:
5283
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5284
        msg = result.fail_msg
5285
        if msg:
5286
          self.LogWarning("Node failed to demote itself from master"
5287
                          " candidate status: %s" % msg)
5288
    else:
5289
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5290
                                  additional_vm=self.op.vm_capable)
5291
      self.context.AddNode(new_node, self.proc.GetECId())
5292

    
5293

    
5294
class LUNodeSetParams(LogicalUnit):
5295
  """Modifies the parameters of a node.
5296

5297
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5298
      to the node role (as _ROLE_*)
5299
  @cvar _R2F: a dictionary from node role to tuples of flags
5300
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5301

5302
  """
5303
  HPATH = "node-modify"
5304
  HTYPE = constants.HTYPE_NODE
5305
  REQ_BGL = False
5306
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5307
  _F2R = {
5308
    (True, False, False): _ROLE_CANDIDATE,
5309
    (False, True, False): _ROLE_DRAINED,
5310
    (False, False, True): _ROLE_OFFLINE,
5311
    (False, False, False): _ROLE_REGULAR,
5312
    }
5313
  _R2F = dict((v, k) for k, v in _F2R.items())
5314
  _FLAGS = ["master_candidate", "drained", "offline"]
5315

    
5316
  def CheckArguments(self):
5317
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5318
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5319
                self.op.master_capable, self.op.vm_capable,
5320
                self.op.secondary_ip, self.op.ndparams]
5321
    if all_mods.count(None) == len(all_mods):
5322
      raise errors.OpPrereqError("Please pass at least one modification",
5323
                                 errors.ECODE_INVAL)
5324
    if all_mods.count(True) > 1:
5325
      raise errors.OpPrereqError("Can't set the node into more than one"
5326
                                 " state at the same time",
5327
                                 errors.ECODE_INVAL)
5328

    
5329
    # Boolean value that tells us whether we might be demoting from MC
5330
    self.might_demote = (self.op.master_candidate == False or
5331
                         self.op.offline == True or
5332
                         self.op.drained == True or
5333
                         self.op.master_capable == False)
5334

    
5335
    if self.op.secondary_ip:
5336
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5337
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5338
                                   " address" % self.op.secondary_ip,
5339
                                   errors.ECODE_INVAL)
5340

    
5341
    self.lock_all = self.op.auto_promote and self.might_demote
5342
    self.lock_instances = self.op.secondary_ip is not None
5343

    
5344
  def _InstanceFilter(self, instance):
5345
    """Filter for getting affected instances.
5346

5347
    """
5348
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5349
            self.op.node_name in instance.all_nodes)
5350

    
5351
  def ExpandNames(self):
5352
    if self.lock_all:
5353
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5354
    else:
5355
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5356

    
5357
    # Since modifying a node can have severe effects on currently running
5358
    # operations the resource lock is at least acquired in shared mode
5359
    self.needed_locks[locking.LEVEL_NODE_RES] = \
5360
      self.needed_locks[locking.LEVEL_NODE]
5361

    
5362
    # Get node resource and instance locks in shared mode; they are not used
5363
    # for anything but read-only access
5364
    self.share_locks[locking.LEVEL_NODE_RES] = 1
5365
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5366

    
5367
    if self.lock_instances:
5368
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5369
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5370

    
5371
  def BuildHooksEnv(self):
5372
    """Build hooks env.
5373

5374
    This runs on the master node.
5375

5376
    """
5377
    return {
5378
      "OP_TARGET": self.op.node_name,
5379
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5380
      "OFFLINE": str(self.op.offline),
5381
      "DRAINED": str(self.op.drained),
5382
      "MASTER_CAPABLE": str(self.op.master_capable),
5383
      "VM_CAPABLE": str(self.op.vm_capable),
5384
      }
5385

    
5386
  def BuildHooksNodes(self):
5387
    """Build hooks nodes.
5388

5389
    """
5390
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5391
    return (nl, nl)
5392

    
5393
  def CheckPrereq(self):
5394
    """Check prerequisites.
5395

5396
    This only checks the instance list against the existing names.
5397

5398
    """
5399
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5400

    
5401
    if self.lock_instances:
5402
      affected_instances = \
5403
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5404

    
5405
      # Verify instance locks
5406
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5407
      wanted_instances = frozenset(affected_instances.keys())
5408
      if wanted_instances - owned_instances:
5409
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5410
                                   " secondary IP address have changed since"
5411
                                   " locks were acquired, wanted '%s', have"
5412
                                   " '%s'; retry the operation" %
5413
                                   (self.op.node_name,
5414
                                    utils.CommaJoin(wanted_instances),
5415
                                    utils.CommaJoin(owned_instances)),
5416
                                   errors.ECODE_STATE)
5417
    else:
5418
      affected_instances = None
5419

    
5420
    if (self.op.master_candidate is not None or
5421
        self.op.drained is not None or
5422
        self.op.offline is not None):
5423
      # we can't change the master's node flags
5424
      if self.op.node_name == self.cfg.GetMasterNode():
5425
        raise errors.OpPrereqError("The master role can be changed"
5426
                                   " only via master-failover",
5427
                                   errors.ECODE_INVAL)
5428

    
5429
    if self.op.master_candidate and not node.master_capable:
5430
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5431
                                 " it a master candidate" % node.name,
5432
                                 errors.ECODE_STATE)
5433

    
5434
    if self.op.vm_capable == False:
5435
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5436
      if ipri or isec:
5437
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5438
                                   " the vm_capable flag" % node.name,
5439
                                   errors.ECODE_STATE)
5440

    
5441
    if node.master_candidate and self.might_demote and not self.lock_all:
5442
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5443
      # check if after removing the current node, we're missing master
5444
      # candidates
5445
      (mc_remaining, mc_should, _) = \
5446
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5447
      if mc_remaining < mc_should:
5448
        raise errors.OpPrereqError("Not enough master candidates, please"
5449
                                   " pass auto promote option to allow"
5450
                                   " promotion", errors.ECODE_STATE)
5451

    
5452
    self.old_flags = old_flags = (node.master_candidate,
5453
                                  node.drained, node.offline)
5454
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5455
    self.old_role = old_role = self._F2R[old_flags]
5456

    
5457
    # Check for ineffective changes
5458
    for attr in self._FLAGS:
5459
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5460
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5461
        setattr(self.op, attr, None)
5462

    
5463
    # Past this point, any flag change to False means a transition
5464
    # away from the respective state, as only real changes are kept
5465

    
5466
    # TODO: We might query the real power state if it supports OOB
5467
    if _SupportsOob(self.cfg, node):
5468
      if self.op.offline is False and not (node.powered or
5469
                                           self.op.powered == True):
5470
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5471
                                    " offline status can be reset") %
5472
                                   self.op.node_name)
5473
    elif self.op.powered is not None:
5474
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5475
                                  " as it does not support out-of-band"
5476
                                  " handling") % self.op.node_name)
5477

    
5478
    # If we're being deofflined/drained, we'll MC ourself if needed
5479
    if (self.op.drained == False or self.op.offline == False or
5480
        (self.op.master_capable and not node.master_capable)):
5481
      if _DecideSelfPromotion(self):
5482
        self.op.master_candidate = True
5483
        self.LogInfo("Auto-promoting node to master candidate")
5484

    
5485
    # If we're no longer master capable, we'll demote ourselves from MC
5486
    if self.op.master_capable == False and node.master_candidate:
5487
      self.LogInfo("Demoting from master candidate")
5488
      self.op.master_candidate = False
5489

    
5490
    # Compute new role
5491
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5492
    if self.op.master_candidate:
5493
      new_role = self._ROLE_CANDIDATE
5494
    elif self.op.drained:
5495
      new_role = self._ROLE_DRAINED
5496
    elif self.op.offline:
5497
      new_role = self._ROLE_OFFLINE
5498
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5499
      # False is still in new flags, which means we're un-setting (the
5500
      # only) True flag
5501
      new_role = self._ROLE_REGULAR
5502
    else: # no new flags, nothing, keep old role
5503
      new_role = old_role
5504

    
5505
    self.new_role = new_role
5506

    
5507
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5508
      # Trying to transition out of offline status
5509
      # TODO: Use standard RPC runner, but make sure it works when the node is
5510
      # still marked offline
5511
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5512
      if result.fail_msg:
5513
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5514
                                   " to report its version: %s" %
5515
                                   (node.name, result.fail_msg),
5516
                                   errors.ECODE_STATE)
5517
      else:
5518
        self.LogWarning("Transitioning node from offline to online state"
5519
                        " without using re-add. Please make sure the node"
5520
                        " is healthy!")
5521

    
5522
    if self.op.secondary_ip:
5523
      # Ok even without locking, because this can't be changed by any LU
5524
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5525
      master_singlehomed = master.secondary_ip == master.primary_ip
5526
      if master_singlehomed and self.op.secondary_ip:
5527
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5528
                                   " homed cluster", errors.ECODE_INVAL)
5529

    
5530
      assert not (frozenset(affected_instances) -
5531
                  self.owned_locks(locking.LEVEL_INSTANCE))
5532

    
5533
      if node.offline:
5534
        if affected_instances:
5535
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5536
                                     " offline node has instances (%s)"
5537
                                     " configured to use it" %
5538
                                     utils.CommaJoin(affected_instances.keys()))
5539
      else:
5540
        # On online nodes, check that no instances are running, and that
5541
        # the node has the new ip and we can reach it.
5542
        for instance in affected_instances.values():
5543
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
5544
                              msg="cannot change secondary ip")
5545

    
5546
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5547
        if master.name != node.name:
5548
          # check reachability from master secondary ip to new secondary ip
5549
          if not netutils.TcpPing(self.op.secondary_ip,
5550
                                  constants.DEFAULT_NODED_PORT,
5551
                                  source=master.secondary_ip):
5552
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5553
                                       " based ping to node daemon port",
5554
                                       errors.ECODE_ENVIRON)
5555

    
5556
    if self.op.ndparams:
5557
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5558
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5559
      self.new_ndparams = new_ndparams
5560

    
5561
  def Exec(self, feedback_fn):
5562
    """Modifies a node.
5563

5564
    """
5565
    node = self.node
5566
    old_role = self.old_role
5567
    new_role = self.new_role
5568

    
5569
    result = []
5570

    
5571
    if self.op.ndparams:
5572
      node.ndparams = self.new_ndparams
5573

    
5574
    if self.op.powered is not None:
5575
      node.powered = self.op.powered
5576

    
5577
    for attr in ["master_capable", "vm_capable"]:
5578
      val = getattr(self.op, attr)
5579
      if val is not None:
5580
        setattr(node, attr, val)
5581
        result.append((attr, str(val)))
5582

    
5583
    if new_role != old_role:
5584
      # Tell the node to demote itself, if no longer MC and not offline
5585
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5586
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5587
        if msg:
5588
          self.LogWarning("Node failed to demote itself: %s", msg)
5589

    
5590
      new_flags = self._R2F[new_role]
5591
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5592
        if of != nf:
5593
          result.append((desc, str(nf)))
5594
      (node.master_candidate, node.drained, node.offline) = new_flags
5595

    
5596
      # we locked all nodes, we adjust the CP before updating this node
5597
      if self.lock_all:
5598
        _AdjustCandidatePool(self, [node.name])
5599

    
5600
    if self.op.secondary_ip:
5601
      node.secondary_ip = self.op.secondary_ip
5602
      result.append(("secondary_ip", self.op.secondary_ip))
5603

    
5604
    # this will trigger configuration file update, if needed
5605
    self.cfg.Update(node, feedback_fn)
5606

    
5607
    # this will trigger job queue propagation or cleanup if the mc
5608
    # flag changed
5609
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5610
      self.context.ReaddNode(node)
5611

    
5612
    return result
5613

    
5614

    
5615
class LUNodePowercycle(NoHooksLU):
5616
  """Powercycles a node.
5617

5618
  """
5619
  REQ_BGL = False
5620

    
5621
  def CheckArguments(self):
5622
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5623
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5624
      raise errors.OpPrereqError("The node is the master and the force"
5625
                                 " parameter was not set",
5626
                                 errors.ECODE_INVAL)
5627

    
5628
  def ExpandNames(self):
5629
    """Locking for PowercycleNode.
5630

5631
    This is a last-resort option and shouldn't block on other
5632
    jobs. Therefore, we grab no locks.
5633

5634
    """
5635
    self.needed_locks = {}
5636

    
5637
  def Exec(self, feedback_fn):
5638
    """Reboots a node.
5639

5640
    """
5641
    result = self.rpc.call_node_powercycle(self.op.node_name,
5642
                                           self.cfg.GetHypervisorType())
5643
    result.Raise("Failed to schedule the reboot")
5644
    return result.payload
5645

    
5646

    
5647
class LUClusterQuery(NoHooksLU):
5648
  """Query cluster configuration.
5649

5650
  """
5651
  REQ_BGL = False
5652

    
5653
  def ExpandNames(self):
5654
    self.needed_locks = {}
5655

    
5656
  def Exec(self, feedback_fn):
5657
    """Return cluster config.
5658

5659
    """
5660
    cluster = self.cfg.GetClusterInfo()
5661
    os_hvp = {}
5662

    
5663
    # Filter just for enabled hypervisors
5664
    for os_name, hv_dict in cluster.os_hvp.items():
5665
      os_hvp[os_name] = {}
5666
      for hv_name, hv_params in hv_dict.items():
5667
        if hv_name in cluster.enabled_hypervisors:
5668
          os_hvp[os_name][hv_name] = hv_params
5669

    
5670
    # Convert ip_family to ip_version
5671
    primary_ip_version = constants.IP4_VERSION
5672
    if cluster.primary_ip_family == netutils.IP6Address.family:
5673
      primary_ip_version = constants.IP6_VERSION
5674

    
5675
    result = {
5676
      "software_version": constants.RELEASE_VERSION,
5677
      "protocol_version": constants.PROTOCOL_VERSION,
5678
      "config_version": constants.CONFIG_VERSION,
5679
      "os_api_version": max(constants.OS_API_VERSIONS),
5680
      "export_version": constants.EXPORT_VERSION,
5681
      "architecture": (platform.architecture()[0], platform.machine()),
5682
      "name": cluster.cluster_name,
5683
      "master": cluster.master_node,
5684
      "default_hypervisor": cluster.enabled_hypervisors[0],
5685
      "enabled_hypervisors": cluster.enabled_hypervisors,
5686
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5687
                        for hypervisor_name in cluster.enabled_hypervisors]),
5688
      "os_hvp": os_hvp,
5689
      "beparams": cluster.beparams,
5690
      "osparams": cluster.osparams,
5691
      "nicparams": cluster.nicparams,
5692
      "ndparams": cluster.ndparams,
5693
      "candidate_pool_size": cluster.candidate_pool_size,
5694
      "master_netdev": cluster.master_netdev,
5695
      "master_netmask": cluster.master_netmask,
5696
      "use_external_mip_script": cluster.use_external_mip_script,
5697
      "volume_group_name": cluster.volume_group_name,
5698
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5699
      "file_storage_dir": cluster.file_storage_dir,
5700
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5701
      "maintain_node_health": cluster.maintain_node_health,
5702
      "ctime": cluster.ctime,
5703
      "mtime": cluster.mtime,
5704
      "uuid": cluster.uuid,
5705
      "tags": list(cluster.GetTags()),
5706
      "uid_pool": cluster.uid_pool,
5707
      "default_iallocator": cluster.default_iallocator,
5708
      "reserved_lvs": cluster.reserved_lvs,
5709
      "primary_ip_version": primary_ip_version,
5710
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5711
      "hidden_os": cluster.hidden_os,
5712
      "blacklisted_os": cluster.blacklisted_os,
5713
      }
5714

    
5715
    return result
5716

    
5717

    
5718
class LUClusterConfigQuery(NoHooksLU):
5719
  """Return configuration values.
5720

5721
  """
5722
  REQ_BGL = False
5723
  _FIELDS_DYNAMIC = utils.FieldSet()
5724
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5725
                                  "watcher_pause", "volume_group_name")
5726

    
5727
  def CheckArguments(self):
5728
    _CheckOutputFields(static=self._FIELDS_STATIC,
5729
                       dynamic=self._FIELDS_DYNAMIC,
5730
                       selected=self.op.output_fields)
5731

    
5732
  def ExpandNames(self):
5733
    self.needed_locks = {}
5734

    
5735
  def Exec(self, feedback_fn):
5736
    """Dump a representation of the cluster config to the standard output.
5737

5738
    """
5739
    values = []
5740
    for field in self.op.output_fields:
5741
      if field == "cluster_name":
5742
        entry = self.cfg.GetClusterName()
5743
      elif field == "master_node":
5744
        entry = self.cfg.GetMasterNode()
5745
      elif field == "drain_flag":
5746
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5747
      elif field == "watcher_pause":
5748
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5749
      elif field == "volume_group_name":
5750
        entry = self.cfg.GetVGName()
5751
      else:
5752
        raise errors.ParameterError(field)
5753
      values.append(entry)
5754
    return values
5755

    
5756

    
5757
class LUInstanceActivateDisks(NoHooksLU):
5758
  """Bring up an instance's disks.
5759

5760
  """
5761
  REQ_BGL = False
5762

    
5763
  def ExpandNames(self):
5764
    self._ExpandAndLockInstance()
5765
    self.needed_locks[locking.LEVEL_NODE] = []
5766
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5767

    
5768
  def DeclareLocks(self, level):
5769
    if level == locking.LEVEL_NODE:
5770
      self._LockInstancesNodes()
5771

    
5772
  def CheckPrereq(self):
5773
    """Check prerequisites.
5774

5775
    This checks that the instance is in the cluster.
5776

5777
    """
5778
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5779
    assert self.instance is not None, \
5780
      "Cannot retrieve locked instance %s" % self.op.instance_name
5781
    _CheckNodeOnline(self, self.instance.primary_node)
5782

    
5783
  def Exec(self, feedback_fn):
5784
    """Activate the disks.
5785

5786
    """
5787
    disks_ok, disks_info = \
5788
              _AssembleInstanceDisks(self, self.instance,
5789
                                     ignore_size=self.op.ignore_size)
5790
    if not disks_ok:
5791
      raise errors.OpExecError("Cannot activate block devices")
5792

    
5793
    return disks_info
5794

    
5795

    
5796
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5797
                           ignore_size=False):
5798
  """Prepare the block devices for an instance.
5799

5800
  This sets up the block devices on all nodes.
5801

5802
  @type lu: L{LogicalUnit}
5803
  @param lu: the logical unit on whose behalf we execute
5804
  @type instance: L{objects.Instance}
5805
  @param instance: the instance for whose disks we assemble
5806
  @type disks: list of L{objects.Disk} or None
5807
  @param disks: which disks to assemble (or all, if None)
5808
  @type ignore_secondaries: boolean
5809
  @param ignore_secondaries: if true, errors on secondary nodes
5810
      won't result in an error return from the function
5811
  @type ignore_size: boolean
5812
  @param ignore_size: if true, the current known size of the disk
5813
      will not be used during the disk activation, useful for cases
5814
      when the size is wrong
5815
  @return: False if the operation failed, otherwise a list of
5816
      (host, instance_visible_name, node_visible_name)
5817
      with the mapping from node devices to instance devices
5818

5819
  """
5820
  device_info = []
5821
  disks_ok = True
5822
  iname = instance.name
5823
  disks = _ExpandCheckDisks(instance, disks)
5824

    
5825
  # With the two passes mechanism we try to reduce the window of
5826
  # opportunity for the race condition of switching DRBD to primary
5827
  # before handshaking occured, but we do not eliminate it
5828

    
5829
  # The proper fix would be to wait (with some limits) until the
5830
  # connection has been made and drbd transitions from WFConnection
5831
  # into any other network-connected state (Connected, SyncTarget,
5832
  # SyncSource, etc.)
5833

    
5834
  # 1st pass, assemble on all nodes in secondary mode
5835
  for idx, inst_disk in enumerate(disks):
5836
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5837
      if ignore_size:
5838
        node_disk = node_disk.Copy()
5839
        node_disk.UnsetSize()
5840
      lu.cfg.SetDiskID(node_disk, node)
5841
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5842
      msg = result.fail_msg
5843
      if msg:
5844
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5845
                           " (is_primary=False, pass=1): %s",
5846
                           inst_disk.iv_name, node, msg)
5847
        if not ignore_secondaries:
5848
          disks_ok = False
5849

    
5850
  # FIXME: race condition on drbd migration to primary
5851

    
5852
  # 2nd pass, do only the primary node
5853
  for idx, inst_disk in enumerate(disks):
5854
    dev_path = None
5855

    
5856
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5857
      if node != instance.primary_node:
5858
        continue
5859
      if ignore_size:
5860
        node_disk = node_disk.Copy()
5861
        node_disk.UnsetSize()
5862
      lu.cfg.SetDiskID(node_disk, node)
5863
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5864
      msg = result.fail_msg
5865
      if msg:
5866
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5867
                           " (is_primary=True, pass=2): %s",
5868
                           inst_disk.iv_name, node, msg)
5869
        disks_ok = False
5870
      else:
5871
        dev_path = result.payload
5872

    
5873
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5874

    
5875
  # leave the disks configured for the primary node
5876
  # this is a workaround that would be fixed better by
5877
  # improving the logical/physical id handling
5878
  for disk in disks:
5879
    lu.cfg.SetDiskID(disk, instance.primary_node)
5880

    
5881
  return disks_ok, device_info
5882

    
5883

    
5884
def _StartInstanceDisks(lu, instance, force):
5885
  """Start the disks of an instance.
5886

5887
  """
5888
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5889
                                           ignore_secondaries=force)
5890
  if not disks_ok:
5891
    _ShutdownInstanceDisks(lu, instance)
5892
    if force is not None and not force:
5893
      lu.proc.LogWarning("", hint="If the message above refers to a"
5894
                         " secondary node,"
5895
                         " you can retry the operation using '--force'.")
5896
    raise errors.OpExecError("Disk consistency error")
5897

    
5898

    
5899
class LUInstanceDeactivateDisks(NoHooksLU):
5900
  """Shutdown an instance's disks.
5901

5902
  """
5903
  REQ_BGL = False
5904

    
5905
  def ExpandNames(self):
5906
    self._ExpandAndLockInstance()
5907
    self.needed_locks[locking.LEVEL_NODE] = []
5908
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5909

    
5910
  def DeclareLocks(self, level):
5911
    if level == locking.LEVEL_NODE:
5912
      self._LockInstancesNodes()
5913

    
5914
  def CheckPrereq(self):
5915
    """Check prerequisites.
5916

5917
    This checks that the instance is in the cluster.
5918

5919
    """
5920
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5921
    assert self.instance is not None, \
5922
      "Cannot retrieve locked instance %s" % self.op.instance_name
5923

    
5924
  def Exec(self, feedback_fn):
5925
    """Deactivate the disks
5926

5927
    """
5928
    instance = self.instance
5929
    if self.op.force:
5930
      _ShutdownInstanceDisks(self, instance)
5931
    else:
5932
      _SafeShutdownInstanceDisks(self, instance)
5933

    
5934

    
5935
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5936
  """Shutdown block devices of an instance.
5937

5938
  This function checks if an instance is running, before calling
5939
  _ShutdownInstanceDisks.
5940

5941
  """
5942
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5943
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5944

    
5945

    
5946
def _ExpandCheckDisks(instance, disks):
5947
  """Return the instance disks selected by the disks list
5948

5949
  @type disks: list of L{objects.Disk} or None
5950
  @param disks: selected disks
5951
  @rtype: list of L{objects.Disk}
5952
  @return: selected instance disks to act on
5953

5954
  """
5955
  if disks is None:
5956
    return instance.disks
5957
  else:
5958
    if not set(disks).issubset(instance.disks):
5959
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5960
                                   " target instance")
5961
    return disks
5962

    
5963

    
5964
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5965
  """Shutdown block devices of an instance.
5966

5967
  This does the shutdown on all nodes of the instance.
5968

5969
  If the ignore_primary is false, errors on the primary node are
5970
  ignored.
5971

5972
  """
5973
  all_result = True
5974
  disks = _ExpandCheckDisks(instance, disks)
5975

    
5976
  for disk in disks:
5977
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5978
      lu.cfg.SetDiskID(top_disk, node)
5979
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5980
      msg = result.fail_msg
5981
      if msg:
5982
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5983
                      disk.iv_name, node, msg)
5984
        if ((node == instance.primary_node and not ignore_primary) or
5985
            (node != instance.primary_node and not result.offline)):
5986
          all_result = False
5987
  return all_result
5988

    
5989

    
5990
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5991
  """Checks if a node has enough free memory.
5992

5993
  This function check if a given node has the needed amount of free
5994
  memory. In case the node has less memory or we cannot get the
5995
  information from the node, this function raise an OpPrereqError
5996
  exception.
5997

5998
  @type lu: C{LogicalUnit}
5999
  @param lu: a logical unit from which we get configuration data
6000
  @type node: C{str}
6001
  @param node: the node to check
6002
  @type reason: C{str}
6003
  @param reason: string to use in the error message
6004
  @type requested: C{int}
6005
  @param requested: the amount of memory in MiB to check for
6006
  @type hypervisor_name: C{str}
6007
  @param hypervisor_name: the hypervisor to ask for memory stats
6008
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6009
      we cannot check the node
6010

6011
  """
6012
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
6013
  nodeinfo[node].Raise("Can't get data from node %s" % node,
6014
                       prereq=True, ecode=errors.ECODE_ENVIRON)
6015
  free_mem = nodeinfo[node].payload.get("memory_free", None)
6016
  if not isinstance(free_mem, int):
6017
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6018
                               " was '%s'" % (node, free_mem),
6019
                               errors.ECODE_ENVIRON)
6020
  if requested > free_mem:
6021
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6022
                               " needed %s MiB, available %s MiB" %
6023
                               (node, reason, requested, free_mem),
6024
                               errors.ECODE_NORES)
6025

    
6026

    
6027
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6028
  """Checks if nodes have enough free disk space in the all VGs.
6029

6030
  This function check if all given nodes have the needed amount of
6031
  free disk. In case any node has less disk or we cannot get the
6032
  information from the node, this function raise an OpPrereqError
6033
  exception.
6034

6035
  @type lu: C{LogicalUnit}
6036
  @param lu: a logical unit from which we get configuration data
6037
  @type nodenames: C{list}
6038
  @param nodenames: the list of node names to check
6039
  @type req_sizes: C{dict}
6040
  @param req_sizes: the hash of vg and corresponding amount of disk in
6041
      MiB to check for
6042
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6043
      or we cannot check the node
6044

6045
  """
6046
  for vg, req_size in req_sizes.items():
6047
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6048

    
6049

    
6050
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6051
  """Checks if nodes have enough free disk space in the specified VG.
6052

6053
  This function check if all given nodes have the needed amount of
6054
  free disk. In case any node has less disk or we cannot get the
6055
  information from the node, this function raise an OpPrereqError
6056
  exception.
6057

6058
  @type lu: C{LogicalUnit}
6059
  @param lu: a logical unit from which we get configuration data
6060
  @type nodenames: C{list}
6061
  @param nodenames: the list of node names to check
6062
  @type vg: C{str}
6063
  @param vg: the volume group to check
6064
  @type requested: C{int}
6065
  @param requested: the amount of disk in MiB to check for
6066
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6067
      or we cannot check the node
6068

6069
  """
6070
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6071
  for node in nodenames:
6072
    info = nodeinfo[node]
6073
    info.Raise("Cannot get current information from node %s" % node,
6074
               prereq=True, ecode=errors.ECODE_ENVIRON)
6075
    vg_free = info.payload.get("vg_free", None)
6076
    if not isinstance(vg_free, int):
6077
      raise errors.OpPrereqError("Can't compute free disk space on node"
6078
                                 " %s for vg %s, result was '%s'" %
6079
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6080
    if requested > vg_free:
6081
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6082
                                 " vg %s: required %d MiB, available %d MiB" %
6083
                                 (node, vg, requested, vg_free),
6084
                                 errors.ECODE_NORES)
6085

    
6086

    
6087
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6088
  """Checks if nodes have enough physical CPUs
6089

6090
  This function checks if all given nodes have the needed number of
6091
  physical CPUs. In case any node has less CPUs or we cannot get the
6092
  information from the node, this function raises an OpPrereqError
6093
  exception.
6094

6095
  @type lu: C{LogicalUnit}
6096
  @param lu: a logical unit from which we get configuration data
6097
  @type nodenames: C{list}
6098
  @param nodenames: the list of node names to check
6099
  @type requested: C{int}
6100
  @param requested: the minimum acceptable number of physical CPUs
6101
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6102
      or we cannot check the node
6103

6104
  """
6105
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6106
  for node in nodenames:
6107
    info = nodeinfo[node]
6108
    info.Raise("Cannot get current information from node %s" % node,
6109
               prereq=True, ecode=errors.ECODE_ENVIRON)
6110
    num_cpus = info.payload.get("cpu_total", None)
6111
    if not isinstance(num_cpus, int):
6112
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6113
                                 " on node %s, result was '%s'" %
6114
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6115
    if requested > num_cpus:
6116
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6117
                                 "required" % (node, num_cpus, requested),
6118
                                 errors.ECODE_NORES)
6119

    
6120

    
6121
class LUInstanceStartup(LogicalUnit):
6122
  """Starts an instance.
6123

6124
  """
6125
  HPATH = "instance-start"
6126
  HTYPE = constants.HTYPE_INSTANCE
6127
  REQ_BGL = False
6128

    
6129
  def CheckArguments(self):
6130
    # extra beparams
6131
    if self.op.beparams:
6132
      # fill the beparams dict
6133
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6134

    
6135
  def ExpandNames(self):
6136
    self._ExpandAndLockInstance()
6137

    
6138
  def BuildHooksEnv(self):
6139
    """Build hooks env.
6140

6141
    This runs on master, primary and secondary nodes of the instance.
6142

6143
    """
6144
    env = {
6145
      "FORCE": self.op.force,
6146
      }
6147

    
6148
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6149

    
6150
    return env
6151

    
6152
  def BuildHooksNodes(self):
6153
    """Build hooks nodes.
6154

6155
    """
6156
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6157
    return (nl, nl)
6158

    
6159
  def CheckPrereq(self):
6160
    """Check prerequisites.
6161

6162
    This checks that the instance is in the cluster.
6163

6164
    """
6165
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6166
    assert self.instance is not None, \
6167
      "Cannot retrieve locked instance %s" % self.op.instance_name
6168

    
6169
    # extra hvparams
6170
    if self.op.hvparams:
6171
      # check hypervisor parameter syntax (locally)
6172
      cluster = self.cfg.GetClusterInfo()
6173
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6174
      filled_hvp = cluster.FillHV(instance)
6175
      filled_hvp.update(self.op.hvparams)
6176
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6177
      hv_type.CheckParameterSyntax(filled_hvp)
6178
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6179

    
6180
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6181

    
6182
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6183

    
6184
    if self.primary_offline and self.op.ignore_offline_nodes:
6185
      self.proc.LogWarning("Ignoring offline primary node")
6186

    
6187
      if self.op.hvparams or self.op.beparams:
6188
        self.proc.LogWarning("Overridden parameters are ignored")
6189
    else:
6190
      _CheckNodeOnline(self, instance.primary_node)
6191

    
6192
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6193

    
6194
      # check bridges existence
6195
      _CheckInstanceBridgesExist(self, instance)
6196

    
6197
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6198
                                                instance.name,
6199
                                                instance.hypervisor)
6200
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6201
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6202
      if not remote_info.payload: # not running already
6203
        _CheckNodeFreeMemory(self, instance.primary_node,
6204
                             "starting instance %s" % instance.name,
6205
                             bep[constants.BE_MEMORY], instance.hypervisor)
6206

    
6207
  def Exec(self, feedback_fn):
6208
    """Start the instance.
6209

6210
    """
6211
    instance = self.instance
6212
    force = self.op.force
6213

    
6214
    if not self.op.no_remember:
6215
      self.cfg.MarkInstanceUp(instance.name)
6216

    
6217
    if self.primary_offline:
6218
      assert self.op.ignore_offline_nodes
6219
      self.proc.LogInfo("Primary node offline, marked instance as started")
6220
    else:
6221
      node_current = instance.primary_node
6222

    
6223
      _StartInstanceDisks(self, instance, force)
6224

    
6225
      result = \
6226
        self.rpc.call_instance_start(node_current,
6227
                                     (instance, self.op.hvparams,
6228
                                      self.op.beparams),
6229
                                     self.op.startup_paused)
6230
      msg = result.fail_msg
6231
      if msg:
6232
        _ShutdownInstanceDisks(self, instance)
6233
        raise errors.OpExecError("Could not start instance: %s" % msg)
6234

    
6235

    
6236
class LUInstanceReboot(LogicalUnit):
6237
  """Reboot an instance.
6238

6239
  """
6240
  HPATH = "instance-reboot"
6241
  HTYPE = constants.HTYPE_INSTANCE
6242
  REQ_BGL = False
6243

    
6244
  def ExpandNames(self):
6245
    self._ExpandAndLockInstance()
6246

    
6247
  def BuildHooksEnv(self):
6248
    """Build hooks env.
6249

6250
    This runs on master, primary and secondary nodes of the instance.
6251

6252
    """
6253
    env = {
6254
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6255
      "REBOOT_TYPE": self.op.reboot_type,
6256
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6257
      }
6258

    
6259
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6260

    
6261
    return env
6262

    
6263
  def BuildHooksNodes(self):
6264
    """Build hooks nodes.
6265

6266
    """
6267
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6268
    return (nl, nl)
6269

    
6270
  def CheckPrereq(self):
6271
    """Check prerequisites.
6272

6273
    This checks that the instance is in the cluster.
6274

6275
    """
6276
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6277
    assert self.instance is not None, \
6278
      "Cannot retrieve locked instance %s" % self.op.instance_name
6279
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6280
    _CheckNodeOnline(self, instance.primary_node)
6281

    
6282
    # check bridges existence
6283
    _CheckInstanceBridgesExist(self, instance)
6284

    
6285
  def Exec(self, feedback_fn):
6286
    """Reboot the instance.
6287

6288
    """
6289
    instance = self.instance
6290
    ignore_secondaries = self.op.ignore_secondaries
6291
    reboot_type = self.op.reboot_type
6292

    
6293
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6294
                                              instance.name,
6295
                                              instance.hypervisor)
6296
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6297
    instance_running = bool(remote_info.payload)
6298

    
6299
    node_current = instance.primary_node
6300

    
6301
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6302
                                            constants.INSTANCE_REBOOT_HARD]:
6303
      for disk in instance.disks:
6304
        self.cfg.SetDiskID(disk, node_current)
6305
      result = self.rpc.call_instance_reboot(node_current, instance,
6306
                                             reboot_type,
6307
                                             self.op.shutdown_timeout)
6308
      result.Raise("Could not reboot instance")
6309
    else:
6310
      if instance_running:
6311
        result = self.rpc.call_instance_shutdown(node_current, instance,
6312
                                                 self.op.shutdown_timeout)
6313
        result.Raise("Could not shutdown instance for full reboot")
6314
        _ShutdownInstanceDisks(self, instance)
6315
      else:
6316
        self.LogInfo("Instance %s was already stopped, starting now",
6317
                     instance.name)
6318
      _StartInstanceDisks(self, instance, ignore_secondaries)
6319
      result = self.rpc.call_instance_start(node_current,
6320
                                            (instance, None, None), False)
6321
      msg = result.fail_msg
6322
      if msg:
6323
        _ShutdownInstanceDisks(self, instance)
6324
        raise errors.OpExecError("Could not start instance for"
6325
                                 " full reboot: %s" % msg)
6326

    
6327
    self.cfg.MarkInstanceUp(instance.name)
6328

    
6329

    
6330
class LUInstanceShutdown(LogicalUnit):
6331
  """Shutdown an instance.
6332

6333
  """
6334
  HPATH = "instance-stop"
6335
  HTYPE = constants.HTYPE_INSTANCE
6336
  REQ_BGL = False
6337

    
6338
  def ExpandNames(self):
6339
    self._ExpandAndLockInstance()
6340

    
6341
  def BuildHooksEnv(self):
6342
    """Build hooks env.
6343

6344
    This runs on master, primary and secondary nodes of the instance.
6345

6346
    """
6347
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6348
    env["TIMEOUT"] = self.op.timeout
6349
    return env
6350

    
6351
  def BuildHooksNodes(self):
6352
    """Build hooks nodes.
6353

6354
    """
6355
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6356
    return (nl, nl)
6357

    
6358
  def CheckPrereq(self):
6359
    """Check prerequisites.
6360

6361
    This checks that the instance is in the cluster.
6362

6363
    """
6364
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6365
    assert self.instance is not None, \
6366
      "Cannot retrieve locked instance %s" % self.op.instance_name
6367

    
6368
    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6369

    
6370
    self.primary_offline = \
6371
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6372

    
6373
    if self.primary_offline and self.op.ignore_offline_nodes:
6374
      self.proc.LogWarning("Ignoring offline primary node")
6375
    else:
6376
      _CheckNodeOnline(self, self.instance.primary_node)
6377

    
6378
  def Exec(self, feedback_fn):
6379
    """Shutdown the instance.
6380

6381
    """
6382
    instance = self.instance
6383
    node_current = instance.primary_node
6384
    timeout = self.op.timeout
6385

    
6386
    if not self.op.no_remember:
6387
      self.cfg.MarkInstanceDown(instance.name)
6388

    
6389
    if self.primary_offline:
6390
      assert self.op.ignore_offline_nodes
6391
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6392
    else:
6393
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6394
      msg = result.fail_msg
6395
      if msg:
6396
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6397

    
6398
      _ShutdownInstanceDisks(self, instance)
6399

    
6400

    
6401
class LUInstanceReinstall(LogicalUnit):
6402
  """Reinstall an instance.
6403

6404
  """
6405
  HPATH = "instance-reinstall"
6406
  HTYPE = constants.HTYPE_INSTANCE
6407
  REQ_BGL = False
6408

    
6409
  def ExpandNames(self):
6410
    self._ExpandAndLockInstance()
6411

    
6412
  def BuildHooksEnv(self):
6413
    """Build hooks env.
6414

6415
    This runs on master, primary and secondary nodes of the instance.
6416

6417
    """
6418
    return _BuildInstanceHookEnvByObject(self, self.instance)
6419

    
6420
  def BuildHooksNodes(self):
6421
    """Build hooks nodes.
6422

6423
    """
6424
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6425
    return (nl, nl)
6426

    
6427
  def CheckPrereq(self):
6428
    """Check prerequisites.
6429

6430
    This checks that the instance is in the cluster and is not running.
6431

6432
    """
6433
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6434
    assert instance is not None, \
6435
      "Cannot retrieve locked instance %s" % self.op.instance_name
6436
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6437
                     " offline, cannot reinstall")
6438
    for node in instance.secondary_nodes:
6439
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6440
                       " cannot reinstall")
6441

    
6442
    if instance.disk_template == constants.DT_DISKLESS:
6443
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6444
                                 self.op.instance_name,
6445
                                 errors.ECODE_INVAL)
6446
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6447

    
6448
    if self.op.os_type is not None:
6449
      # OS verification
6450
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6451
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6452
      instance_os = self.op.os_type
6453
    else:
6454
      instance_os = instance.os
6455

    
6456
    nodelist = list(instance.all_nodes)
6457

    
6458
    if self.op.osparams:
6459
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6460
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6461
      self.os_inst = i_osdict # the new dict (without defaults)
6462
    else:
6463
      self.os_inst = None
6464

    
6465
    self.instance = instance
6466

    
6467
  def Exec(self, feedback_fn):
6468
    """Reinstall the instance.
6469

6470
    """
6471
    inst = self.instance
6472

    
6473
    if self.op.os_type is not None:
6474
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6475
      inst.os = self.op.os_type
6476
      # Write to configuration
6477
      self.cfg.Update(inst, feedback_fn)
6478

    
6479
    _StartInstanceDisks(self, inst, None)
6480
    try:
6481
      feedback_fn("Running the instance OS create scripts...")
6482
      # FIXME: pass debug option from opcode to backend
6483
      result = self.rpc.call_instance_os_add(inst.primary_node,
6484
                                             (inst, self.os_inst), True,
6485
                                             self.op.debug_level)
6486
      result.Raise("Could not install OS for instance %s on node %s" %
6487
                   (inst.name, inst.primary_node))
6488
    finally:
6489
      _ShutdownInstanceDisks(self, inst)
6490

    
6491

    
6492
class LUInstanceRecreateDisks(LogicalUnit):
6493
  """Recreate an instance's missing disks.
6494

6495
  """
6496
  HPATH = "instance-recreate-disks"
6497
  HTYPE = constants.HTYPE_INSTANCE
6498
  REQ_BGL = False
6499

    
6500
  def CheckArguments(self):
6501
    # normalise the disk list
6502
    self.op.disks = sorted(frozenset(self.op.disks))
6503

    
6504
  def ExpandNames(self):
6505
    self._ExpandAndLockInstance()
6506
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6507
    if self.op.nodes:
6508
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6509
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6510
    else:
6511
      self.needed_locks[locking.LEVEL_NODE] = []
6512

    
6513
  def DeclareLocks(self, level):
6514
    if level == locking.LEVEL_NODE:
6515
      # if we replace the nodes, we only need to lock the old primary,
6516
      # otherwise we need to lock all nodes for disk re-creation
6517
      primary_only = bool(self.op.nodes)
6518
      self._LockInstancesNodes(primary_only=primary_only)
6519
    elif level == locking.LEVEL_NODE_RES:
6520
      # Copy node locks
6521
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6522
        self.needed_locks[locking.LEVEL_NODE][:]
6523

    
6524
  def BuildHooksEnv(self):
6525
    """Build hooks env.
6526

6527
    This runs on master, primary and secondary nodes of the instance.
6528

6529
    """
6530
    return _BuildInstanceHookEnvByObject(self, self.instance)
6531

    
6532
  def BuildHooksNodes(self):
6533
    """Build hooks nodes.
6534

6535
    """
6536
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6537
    return (nl, nl)
6538

    
6539
  def CheckPrereq(self):
6540
    """Check prerequisites.
6541

6542
    This checks that the instance is in the cluster and is not running.
6543

6544
    """
6545
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6546
    assert instance is not None, \
6547
      "Cannot retrieve locked instance %s" % self.op.instance_name
6548
    if self.op.nodes:
6549
      if len(self.op.nodes) != len(instance.all_nodes):
6550
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6551
                                   " %d replacement nodes were specified" %
6552
                                   (instance.name, len(instance.all_nodes),
6553
                                    len(self.op.nodes)),
6554
                                   errors.ECODE_INVAL)
6555
      assert instance.disk_template != constants.DT_DRBD8 or \
6556
          len(self.op.nodes) == 2
6557
      assert instance.disk_template != constants.DT_PLAIN or \
6558
          len(self.op.nodes) == 1
6559
      primary_node = self.op.nodes[0]
6560
    else:
6561
      primary_node = instance.primary_node
6562
    _CheckNodeOnline(self, primary_node)
6563

    
6564
    if instance.disk_template == constants.DT_DISKLESS:
6565
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6566
                                 self.op.instance_name, errors.ECODE_INVAL)
6567
    # if we replace nodes *and* the old primary is offline, we don't
6568
    # check
6569
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6570
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6571
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6572
    if not (self.op.nodes and old_pnode.offline):
6573
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6574
                          msg="cannot recreate disks")
6575

    
6576
    if not self.op.disks:
6577
      self.op.disks = range(len(instance.disks))
6578
    else:
6579
      for idx in self.op.disks:
6580
        if idx >= len(instance.disks):
6581
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6582
                                     errors.ECODE_INVAL)
6583
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6584
      raise errors.OpPrereqError("Can't recreate disks partially and"
6585
                                 " change the nodes at the same time",
6586
                                 errors.ECODE_INVAL)
6587
    self.instance = instance
6588

    
6589
  def Exec(self, feedback_fn):
6590
    """Recreate the disks.
6591

6592
    """
6593
    instance = self.instance
6594

    
6595
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6596
            self.owned_locks(locking.LEVEL_NODE_RES))
6597

    
6598
    to_skip = []
6599
    mods = [] # keeps track of needed logical_id changes
6600

    
6601
    for idx, disk in enumerate(instance.disks):
6602
      if idx not in self.op.disks: # disk idx has not been passed in
6603
        to_skip.append(idx)
6604
        continue
6605
      # update secondaries for disks, if needed
6606
      if self.op.nodes:
6607
        if disk.dev_type == constants.LD_DRBD8:
6608
          # need to update the nodes and minors
6609
          assert len(self.op.nodes) == 2
6610
          assert len(disk.logical_id) == 6 # otherwise disk internals
6611
                                           # have changed
6612
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6613
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6614
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6615
                    new_minors[0], new_minors[1], old_secret)
6616
          assert len(disk.logical_id) == len(new_id)
6617
          mods.append((idx, new_id))
6618

    
6619
    # now that we have passed all asserts above, we can apply the mods
6620
    # in a single run (to avoid partial changes)
6621
    for idx, new_id in mods:
6622
      instance.disks[idx].logical_id = new_id
6623

    
6624
    # change primary node, if needed
6625
    if self.op.nodes:
6626
      instance.primary_node = self.op.nodes[0]
6627
      self.LogWarning("Changing the instance's nodes, you will have to"
6628
                      " remove any disks left on the older nodes manually")
6629

    
6630
    if self.op.nodes:
6631
      self.cfg.Update(instance, feedback_fn)
6632

    
6633
    _CreateDisks(self, instance, to_skip=to_skip)
6634

    
6635

    
6636
class LUInstanceRename(LogicalUnit):
6637
  """Rename an instance.
6638

6639
  """
6640
  HPATH = "instance-rename"
6641
  HTYPE = constants.HTYPE_INSTANCE
6642

    
6643
  def CheckArguments(self):
6644
    """Check arguments.
6645

6646
    """
6647
    if self.op.ip_check and not self.op.name_check:
6648
      # TODO: make the ip check more flexible and not depend on the name check
6649
      raise errors.OpPrereqError("IP address check requires a name check",
6650
                                 errors.ECODE_INVAL)
6651

    
6652
  def BuildHooksEnv(self):
6653
    """Build hooks env.
6654

6655
    This runs on master, primary and secondary nodes of the instance.
6656

6657
    """
6658
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6659
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6660
    return env
6661

    
6662
  def BuildHooksNodes(self):
6663
    """Build hooks nodes.
6664

6665
    """
6666
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6667
    return (nl, nl)
6668

    
6669
  def CheckPrereq(self):
6670
    """Check prerequisites.
6671

6672
    This checks that the instance is in the cluster and is not running.
6673

6674
    """
6675
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6676
                                                self.op.instance_name)
6677
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6678
    assert instance is not None
6679
    _CheckNodeOnline(self, instance.primary_node)
6680
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6681
                        msg="cannot rename")
6682
    self.instance = instance
6683

    
6684
    new_name = self.op.new_name
6685
    if self.op.name_check:
6686
      hostname = netutils.GetHostname(name=new_name)
6687
      if hostname.name != new_name:
6688
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6689
                     hostname.name)
6690
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6691
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6692
                                    " same as given hostname '%s'") %
6693
                                    (hostname.name, self.op.new_name),
6694
                                    errors.ECODE_INVAL)
6695
      new_name = self.op.new_name = hostname.name
6696
      if (self.op.ip_check and
6697
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6698
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6699
                                   (hostname.ip, new_name),
6700
                                   errors.ECODE_NOTUNIQUE)
6701

    
6702
    instance_list = self.cfg.GetInstanceList()
6703
    if new_name in instance_list and new_name != instance.name:
6704
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6705
                                 new_name, errors.ECODE_EXISTS)
6706

    
6707
  def Exec(self, feedback_fn):
6708
    """Rename the instance.
6709

6710
    """
6711
    inst = self.instance
6712
    old_name = inst.name
6713

    
6714
    rename_file_storage = False
6715
    if (inst.disk_template in constants.DTS_FILEBASED and
6716
        self.op.new_name != inst.name):
6717
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6718
      rename_file_storage = True
6719

    
6720
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6721
    # Change the instance lock. This is definitely safe while we hold the BGL.
6722
    # Otherwise the new lock would have to be added in acquired mode.
6723
    assert self.REQ_BGL
6724
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6725
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6726

    
6727
    # re-read the instance from the configuration after rename
6728
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6729

    
6730
    if rename_file_storage:
6731
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6732
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6733
                                                     old_file_storage_dir,
6734
                                                     new_file_storage_dir)
6735
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6736
                   " (but the instance has been renamed in Ganeti)" %
6737
                   (inst.primary_node, old_file_storage_dir,
6738
                    new_file_storage_dir))
6739

    
6740
    _StartInstanceDisks(self, inst, None)
6741
    try:
6742
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6743
                                                 old_name, self.op.debug_level)
6744
      msg = result.fail_msg
6745
      if msg:
6746
        msg = ("Could not run OS rename script for instance %s on node %s"
6747
               " (but the instance has been renamed in Ganeti): %s" %
6748
               (inst.name, inst.primary_node, msg))
6749
        self.proc.LogWarning(msg)
6750
    finally:
6751
      _ShutdownInstanceDisks(self, inst)
6752

    
6753
    return inst.name
6754

    
6755

    
6756
class LUInstanceRemove(LogicalUnit):
6757
  """Remove an instance.
6758

6759
  """
6760
  HPATH = "instance-remove"
6761
  HTYPE = constants.HTYPE_INSTANCE
6762
  REQ_BGL = False
6763

    
6764
  def ExpandNames(self):
6765
    self._ExpandAndLockInstance()
6766
    self.needed_locks[locking.LEVEL_NODE] = []
6767
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6768
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6769

    
6770
  def DeclareLocks(self, level):
6771
    if level == locking.LEVEL_NODE:
6772
      self._LockInstancesNodes()
6773
    elif level == locking.LEVEL_NODE_RES:
6774
      # Copy node locks
6775
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6776
        self.needed_locks[locking.LEVEL_NODE][:]
6777

    
6778
  def BuildHooksEnv(self):
6779
    """Build hooks env.
6780

6781
    This runs on master, primary and secondary nodes of the instance.
6782

6783
    """
6784
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6785
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6786
    return env
6787

    
6788
  def BuildHooksNodes(self):
6789
    """Build hooks nodes.
6790

6791
    """
6792
    nl = [self.cfg.GetMasterNode()]
6793
    nl_post = list(self.instance.all_nodes) + nl
6794
    return (nl, nl_post)
6795

    
6796
  def CheckPrereq(self):
6797
    """Check prerequisites.
6798

6799
    This checks that the instance is in the cluster.
6800

6801
    """
6802
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6803
    assert self.instance is not None, \
6804
      "Cannot retrieve locked instance %s" % self.op.instance_name
6805

    
6806
  def Exec(self, feedback_fn):
6807
    """Remove the instance.
6808

6809
    """
6810
    instance = self.instance
6811
    logging.info("Shutting down instance %s on node %s",
6812
                 instance.name, instance.primary_node)
6813

    
6814
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6815
                                             self.op.shutdown_timeout)
6816
    msg = result.fail_msg
6817
    if msg:
6818
      if self.op.ignore_failures:
6819
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6820
      else:
6821
        raise errors.OpExecError("Could not shutdown instance %s on"
6822
                                 " node %s: %s" %
6823
                                 (instance.name, instance.primary_node, msg))
6824

    
6825
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6826
            self.owned_locks(locking.LEVEL_NODE_RES))
6827
    assert not (set(instance.all_nodes) -
6828
                self.owned_locks(locking.LEVEL_NODE)), \
6829
      "Not owning correct locks"
6830

    
6831
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6832

    
6833

    
6834
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6835
  """Utility function to remove an instance.
6836

6837
  """
6838
  logging.info("Removing block devices for instance %s", instance.name)
6839

    
6840
  if not _RemoveDisks(lu, instance):
6841
    if not ignore_failures:
6842
      raise errors.OpExecError("Can't remove instance's disks")
6843
    feedback_fn("Warning: can't remove instance's disks")
6844

    
6845
  logging.info("Removing instance %s out of cluster config", instance.name)
6846

    
6847
  lu.cfg.RemoveInstance(instance.name)
6848

    
6849
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6850
    "Instance lock removal conflict"
6851

    
6852
  # Remove lock for the instance
6853
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6854

    
6855

    
6856
class LUInstanceQuery(NoHooksLU):
6857
  """Logical unit for querying instances.
6858

6859
  """
6860
  # pylint: disable=W0142
6861
  REQ_BGL = False
6862

    
6863
  def CheckArguments(self):
6864
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6865
                             self.op.output_fields, self.op.use_locking)
6866

    
6867
  def ExpandNames(self):
6868
    self.iq.ExpandNames(self)
6869

    
6870
  def DeclareLocks(self, level):
6871
    self.iq.DeclareLocks(self, level)
6872

    
6873
  def Exec(self, feedback_fn):
6874
    return self.iq.OldStyleQuery(self)
6875

    
6876

    
6877
class LUInstanceFailover(LogicalUnit):
6878
  """Failover an instance.
6879

6880
  """
6881
  HPATH = "instance-failover"
6882
  HTYPE = constants.HTYPE_INSTANCE
6883
  REQ_BGL = False
6884

    
6885
  def CheckArguments(self):
6886
    """Check the arguments.
6887

6888
    """
6889
    self.iallocator = getattr(self.op, "iallocator", None)
6890
    self.target_node = getattr(self.op, "target_node", None)
6891

    
6892
  def ExpandNames(self):
6893
    self._ExpandAndLockInstance()
6894

    
6895
    if self.op.target_node is not None:
6896
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6897

    
6898
    self.needed_locks[locking.LEVEL_NODE] = []
6899
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6900

    
6901
    ignore_consistency = self.op.ignore_consistency
6902
    shutdown_timeout = self.op.shutdown_timeout
6903
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6904
                                       cleanup=False,
6905
                                       failover=True,
6906
                                       ignore_consistency=ignore_consistency,
6907
                                       shutdown_timeout=shutdown_timeout)
6908
    self.tasklets = [self._migrater]
6909

    
6910
  def DeclareLocks(self, level):
6911
    if level == locking.LEVEL_NODE:
6912
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6913
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6914
        if self.op.target_node is None:
6915
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6916
        else:
6917
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6918
                                                   self.op.target_node]
6919
        del self.recalculate_locks[locking.LEVEL_NODE]
6920
      else:
6921
        self._LockInstancesNodes()
6922

    
6923
  def BuildHooksEnv(self):
6924
    """Build hooks env.
6925

6926
    This runs on master, primary and secondary nodes of the instance.
6927

6928
    """
6929
    instance = self._migrater.instance
6930
    source_node = instance.primary_node
6931
    target_node = self.op.target_node
6932
    env = {
6933
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6934
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6935
      "OLD_PRIMARY": source_node,
6936
      "NEW_PRIMARY": target_node,
6937
      }
6938

    
6939
    if instance.disk_template in constants.DTS_INT_MIRROR:
6940
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6941
      env["NEW_SECONDARY"] = source_node
6942
    else:
6943
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6944

    
6945
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6946

    
6947
    return env
6948

    
6949
  def BuildHooksNodes(self):
6950
    """Build hooks nodes.
6951

6952
    """
6953
    instance = self._migrater.instance
6954
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6955
    return (nl, nl + [instance.primary_node])
6956

    
6957

    
6958
class LUInstanceMigrate(LogicalUnit):
6959
  """Migrate an instance.
6960

6961
  This is migration without shutting down, compared to the failover,
6962
  which is done with shutdown.
6963

6964
  """
6965
  HPATH = "instance-migrate"
6966
  HTYPE = constants.HTYPE_INSTANCE
6967
  REQ_BGL = False
6968

    
6969
  def ExpandNames(self):
6970
    self._ExpandAndLockInstance()
6971

    
6972
    if self.op.target_node is not None:
6973
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6974

    
6975
    self.needed_locks[locking.LEVEL_NODE] = []
6976
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6977

    
6978
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6979
                                       cleanup=self.op.cleanup,
6980
                                       failover=False,
6981
                                       fallback=self.op.allow_failover)
6982
    self.tasklets = [self._migrater]
6983

    
6984
  def DeclareLocks(self, level):
6985
    if level == locking.LEVEL_NODE:
6986
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6987
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6988
        if self.op.target_node is None:
6989
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6990
        else:
6991
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6992
                                                   self.op.target_node]
6993
        del self.recalculate_locks[locking.LEVEL_NODE]
6994
      else:
6995
        self._LockInstancesNodes()
6996

    
6997
  def BuildHooksEnv(self):
6998
    """Build hooks env.
6999

7000
    This runs on master, primary and secondary nodes of the instance.
7001

7002
    """
7003
    instance = self._migrater.instance
7004
    source_node = instance.primary_node
7005
    target_node = self.op.target_node
7006
    env = _BuildInstanceHookEnvByObject(self, instance)
7007
    env.update({
7008
      "MIGRATE_LIVE": self._migrater.live,
7009
      "MIGRATE_CLEANUP": self.op.cleanup,
7010
      "OLD_PRIMARY": source_node,
7011
      "NEW_PRIMARY": target_node,
7012
      })
7013

    
7014
    if instance.disk_template in constants.DTS_INT_MIRROR:
7015
      env["OLD_SECONDARY"] = target_node
7016
      env["NEW_SECONDARY"] = source_node
7017
    else:
7018
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7019

    
7020
    return env
7021

    
7022
  def BuildHooksNodes(self):
7023
    """Build hooks nodes.
7024

7025
    """
7026
    instance = self._migrater.instance
7027
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7028
    return (nl, nl + [instance.primary_node])
7029

    
7030

    
7031
class LUInstanceMove(LogicalUnit):
7032
  """Move an instance by data-copying.
7033

7034
  """
7035
  HPATH = "instance-move"
7036
  HTYPE = constants.HTYPE_INSTANCE
7037
  REQ_BGL = False
7038

    
7039
  def ExpandNames(self):
7040
    self._ExpandAndLockInstance()
7041
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7042
    self.op.target_node = target_node
7043
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
7044
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7045
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7046

    
7047
  def DeclareLocks(self, level):
7048
    if level == locking.LEVEL_NODE:
7049
      self._LockInstancesNodes(primary_only=True)
7050
    elif level == locking.LEVEL_NODE_RES:
7051
      # Copy node locks
7052
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7053
        self.needed_locks[locking.LEVEL_NODE][:]
7054

    
7055
  def BuildHooksEnv(self):
7056
    """Build hooks env.
7057

7058
    This runs on master, primary and secondary nodes of the instance.
7059

7060
    """
7061
    env = {
7062
      "TARGET_NODE": self.op.target_node,
7063
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7064
      }
7065
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7066
    return env
7067

    
7068
  def BuildHooksNodes(self):
7069
    """Build hooks nodes.
7070

7071
    """
7072
    nl = [
7073
      self.cfg.GetMasterNode(),
7074
      self.instance.primary_node,
7075
      self.op.target_node,
7076
      ]
7077
    return (nl, nl)
7078

    
7079
  def CheckPrereq(self):
7080
    """Check prerequisites.
7081

7082
    This checks that the instance is in the cluster.
7083

7084
    """
7085
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7086
    assert self.instance is not None, \
7087
      "Cannot retrieve locked instance %s" % self.op.instance_name
7088

    
7089
    node = self.cfg.GetNodeInfo(self.op.target_node)
7090
    assert node is not None, \
7091
      "Cannot retrieve locked node %s" % self.op.target_node
7092

    
7093
    self.target_node = target_node = node.name
7094

    
7095
    if target_node == instance.primary_node:
7096
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7097
                                 (instance.name, target_node),
7098
                                 errors.ECODE_STATE)
7099

    
7100
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7101

    
7102
    for idx, dsk in enumerate(instance.disks):
7103
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7104
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7105
                                   " cannot copy" % idx, errors.ECODE_STATE)
7106

    
7107
    _CheckNodeOnline(self, target_node)
7108
    _CheckNodeNotDrained(self, target_node)
7109
    _CheckNodeVmCapable(self, target_node)
7110

    
7111
    if instance.admin_state == constants.ADMINST_UP:
7112
      # check memory requirements on the secondary node
7113
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7114
                           instance.name, bep[constants.BE_MEMORY],
7115
                           instance.hypervisor)
7116
    else:
7117
      self.LogInfo("Not checking memory on the secondary node as"
7118
                   " instance will not be started")
7119

    
7120
    # check bridge existance
7121
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7122

    
7123
  def Exec(self, feedback_fn):
7124
    """Move an instance.
7125

7126
    The move is done by shutting it down on its present node, copying
7127
    the data over (slow) and starting it on the new node.
7128

7129
    """
7130
    instance = self.instance
7131

    
7132
    source_node = instance.primary_node
7133
    target_node = self.target_node
7134

    
7135
    self.LogInfo("Shutting down instance %s on source node %s",
7136
                 instance.name, source_node)
7137

    
7138
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7139
            self.owned_locks(locking.LEVEL_NODE_RES))
7140

    
7141
    result = self.rpc.call_instance_shutdown(source_node, instance,
7142
                                             self.op.shutdown_timeout)
7143
    msg = result.fail_msg
7144
    if msg:
7145
      if self.op.ignore_consistency:
7146
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7147
                             " Proceeding anyway. Please make sure node"
7148
                             " %s is down. Error details: %s",
7149
                             instance.name, source_node, source_node, msg)
7150
      else:
7151
        raise errors.OpExecError("Could not shutdown instance %s on"
7152
                                 " node %s: %s" %
7153
                                 (instance.name, source_node, msg))
7154

    
7155
    # create the target disks
7156
    try:
7157
      _CreateDisks(self, instance, target_node=target_node)
7158
    except errors.OpExecError:
7159
      self.LogWarning("Device creation failed, reverting...")
7160
      try:
7161
        _RemoveDisks(self, instance, target_node=target_node)
7162
      finally:
7163
        self.cfg.ReleaseDRBDMinors(instance.name)
7164
        raise
7165

    
7166
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7167

    
7168
    errs = []
7169
    # activate, get path, copy the data over
7170
    for idx, disk in enumerate(instance.disks):
7171
      self.LogInfo("Copying data for disk %d", idx)
7172
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7173
                                               instance.name, True, idx)
7174
      if result.fail_msg:
7175
        self.LogWarning("Can't assemble newly created disk %d: %s",
7176
                        idx, result.fail_msg)
7177
        errs.append(result.fail_msg)
7178
        break
7179
      dev_path = result.payload
7180
      result = self.rpc.call_blockdev_export(source_node, disk,
7181
                                             target_node, dev_path,
7182
                                             cluster_name)
7183
      if result.fail_msg:
7184
        self.LogWarning("Can't copy data over for disk %d: %s",
7185
                        idx, result.fail_msg)
7186
        errs.append(result.fail_msg)
7187
        break
7188

    
7189
    if errs:
7190
      self.LogWarning("Some disks failed to copy, aborting")
7191
      try:
7192
        _RemoveDisks(self, instance, target_node=target_node)
7193
      finally:
7194
        self.cfg.ReleaseDRBDMinors(instance.name)
7195
        raise errors.OpExecError("Errors during disk copy: %s" %
7196
                                 (",".join(errs),))
7197

    
7198
    instance.primary_node = target_node
7199
    self.cfg.Update(instance, feedback_fn)
7200

    
7201
    self.LogInfo("Removing the disks on the original node")
7202
    _RemoveDisks(self, instance, target_node=source_node)
7203

    
7204
    # Only start the instance if it's marked as up
7205
    if instance.admin_state == constants.ADMINST_UP:
7206
      self.LogInfo("Starting instance %s on node %s",
7207
                   instance.name, target_node)
7208

    
7209
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7210
                                           ignore_secondaries=True)
7211
      if not disks_ok:
7212
        _ShutdownInstanceDisks(self, instance)
7213
        raise errors.OpExecError("Can't activate the instance's disks")
7214

    
7215
      result = self.rpc.call_instance_start(target_node,
7216
                                            (instance, None, None), False)
7217
      msg = result.fail_msg
7218
      if msg:
7219
        _ShutdownInstanceDisks(self, instance)
7220
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7221
                                 (instance.name, target_node, msg))
7222

    
7223

    
7224
class LUNodeMigrate(LogicalUnit):
7225
  """Migrate all instances from a node.
7226

7227
  """
7228
  HPATH = "node-migrate"
7229
  HTYPE = constants.HTYPE_NODE
7230
  REQ_BGL = False
7231

    
7232
  def CheckArguments(self):
7233
    pass
7234

    
7235
  def ExpandNames(self):
7236
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7237

    
7238
    self.share_locks = _ShareAll()
7239
    self.needed_locks = {
7240
      locking.LEVEL_NODE: [self.op.node_name],
7241
      }
7242

    
7243
  def BuildHooksEnv(self):
7244
    """Build hooks env.
7245

7246
    This runs on the master, the primary and all the secondaries.
7247

7248
    """
7249
    return {
7250
      "NODE_NAME": self.op.node_name,
7251
      }
7252

    
7253
  def BuildHooksNodes(self):
7254
    """Build hooks nodes.
7255

7256
    """
7257
    nl = [self.cfg.GetMasterNode()]
7258
    return (nl, nl)
7259

    
7260
  def CheckPrereq(self):
7261
    pass
7262

    
7263
  def Exec(self, feedback_fn):
7264
    # Prepare jobs for migration instances
7265
    jobs = [
7266
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7267
                                 mode=self.op.mode,
7268
                                 live=self.op.live,
7269
                                 iallocator=self.op.iallocator,
7270
                                 target_node=self.op.target_node)]
7271
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7272
      ]
7273

    
7274
    # TODO: Run iallocator in this opcode and pass correct placement options to
7275
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7276
    # running the iallocator and the actual migration, a good consistency model
7277
    # will have to be found.
7278

    
7279
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7280
            frozenset([self.op.node_name]))
7281

    
7282
    return ResultWithJobs(jobs)
7283

    
7284

    
7285
class TLMigrateInstance(Tasklet):
7286
  """Tasklet class for instance migration.
7287

7288
  @type live: boolean
7289
  @ivar live: whether the migration will be done live or non-live;
7290
      this variable is initalized only after CheckPrereq has run
7291
  @type cleanup: boolean
7292
  @ivar cleanup: Wheater we cleanup from a failed migration
7293
  @type iallocator: string
7294
  @ivar iallocator: The iallocator used to determine target_node
7295
  @type target_node: string
7296
  @ivar target_node: If given, the target_node to reallocate the instance to
7297
  @type failover: boolean
7298
  @ivar failover: Whether operation results in failover or migration
7299
  @type fallback: boolean
7300
  @ivar fallback: Whether fallback to failover is allowed if migration not
7301
                  possible
7302
  @type ignore_consistency: boolean
7303
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7304
                            and target node
7305
  @type shutdown_timeout: int
7306
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7307

7308
  """
7309

    
7310
  # Constants
7311
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7312
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7313

    
7314
  def __init__(self, lu, instance_name, cleanup=False,
7315
               failover=False, fallback=False,
7316
               ignore_consistency=False,
7317
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7318
    """Initializes this class.
7319

7320
    """
7321
    Tasklet.__init__(self, lu)
7322

    
7323
    # Parameters
7324
    self.instance_name = instance_name
7325
    self.cleanup = cleanup
7326
    self.live = False # will be overridden later
7327
    self.failover = failover
7328
    self.fallback = fallback
7329
    self.ignore_consistency = ignore_consistency
7330
    self.shutdown_timeout = shutdown_timeout
7331

    
7332
  def CheckPrereq(self):
7333
    """Check prerequisites.
7334

7335
    This checks that the instance is in the cluster.
7336

7337
    """
7338
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7339
    instance = self.cfg.GetInstanceInfo(instance_name)
7340
    assert instance is not None
7341
    self.instance = instance
7342

    
7343
    if (not self.cleanup and
7344
        not instance.admin_state == constants.ADMINST_UP and
7345
        not self.failover and self.fallback):
7346
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7347
                      " switching to failover")
7348
      self.failover = True
7349

    
7350
    if instance.disk_template not in constants.DTS_MIRRORED:
7351
      if self.failover:
7352
        text = "failovers"
7353
      else:
7354
        text = "migrations"
7355
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7356
                                 " %s" % (instance.disk_template, text),
7357
                                 errors.ECODE_STATE)
7358

    
7359
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7360
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7361

    
7362
      if self.lu.op.iallocator:
7363
        self._RunAllocator()
7364
      else:
7365
        # We set set self.target_node as it is required by
7366
        # BuildHooksEnv
7367
        self.target_node = self.lu.op.target_node
7368

    
7369
      # self.target_node is already populated, either directly or by the
7370
      # iallocator run
7371
      target_node = self.target_node
7372
      if self.target_node == instance.primary_node:
7373
        raise errors.OpPrereqError("Cannot migrate instance %s"
7374
                                   " to its primary (%s)" %
7375
                                   (instance.name, instance.primary_node))
7376

    
7377
      if len(self.lu.tasklets) == 1:
7378
        # It is safe to release locks only when we're the only tasklet
7379
        # in the LU
7380
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7381
                      keep=[instance.primary_node, self.target_node])
7382

    
7383
    else:
7384
      secondary_nodes = instance.secondary_nodes
7385
      if not secondary_nodes:
7386
        raise errors.ConfigurationError("No secondary node but using"
7387
                                        " %s disk template" %
7388
                                        instance.disk_template)
7389
      target_node = secondary_nodes[0]
7390
      if self.lu.op.iallocator or (self.lu.op.target_node and
7391
                                   self.lu.op.target_node != target_node):
7392
        if self.failover:
7393
          text = "failed over"
7394
        else:
7395
          text = "migrated"
7396
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7397
                                   " be %s to arbitrary nodes"
7398
                                   " (neither an iallocator nor a target"
7399
                                   " node can be passed)" %
7400
                                   (instance.disk_template, text),
7401
                                   errors.ECODE_INVAL)
7402

    
7403
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7404

    
7405
    # check memory requirements on the secondary node
7406
    if not self.failover or instance.admin_state == constants.ADMINST_UP:
7407
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7408
                           instance.name, i_be[constants.BE_MEMORY],
7409
                           instance.hypervisor)
7410
    else:
7411
      self.lu.LogInfo("Not checking memory on the secondary node as"
7412
                      " instance will not be started")
7413

    
7414
    # check bridge existance
7415
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7416

    
7417
    if not self.cleanup:
7418
      _CheckNodeNotDrained(self.lu, target_node)
7419
      if not self.failover:
7420
        result = self.rpc.call_instance_migratable(instance.primary_node,
7421
                                                   instance)
7422
        if result.fail_msg and self.fallback:
7423
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7424
                          " failover")
7425
          self.failover = True
7426
        else:
7427
          result.Raise("Can't migrate, please use failover",
7428
                       prereq=True, ecode=errors.ECODE_STATE)
7429

    
7430
    assert not (self.failover and self.cleanup)
7431

    
7432
    if not self.failover:
7433
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7434
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7435
                                   " parameters are accepted",
7436
                                   errors.ECODE_INVAL)
7437
      if self.lu.op.live is not None:
7438
        if self.lu.op.live:
7439
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7440
        else:
7441
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7442
        # reset the 'live' parameter to None so that repeated
7443
        # invocations of CheckPrereq do not raise an exception
7444
        self.lu.op.live = None
7445
      elif self.lu.op.mode is None:
7446
        # read the default value from the hypervisor
7447
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7448
                                                skip_globals=False)
7449
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7450

    
7451
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7452
    else:
7453
      # Failover is never live
7454
      self.live = False
7455

    
7456
  def _RunAllocator(self):
7457
    """Run the allocator based on input opcode.
7458

7459
    """
7460
    ial = IAllocator(self.cfg, self.rpc,
7461
                     mode=constants.IALLOCATOR_MODE_RELOC,
7462
                     name=self.instance_name,
7463
                     # TODO See why hail breaks with a single node below
7464
                     relocate_from=[self.instance.primary_node,
7465
                                    self.instance.primary_node],
7466
                     )
7467

    
7468
    ial.Run(self.lu.op.iallocator)
7469

    
7470
    if not ial.success:
7471
      raise errors.OpPrereqError("Can't compute nodes using"
7472
                                 " iallocator '%s': %s" %
7473
                                 (self.lu.op.iallocator, ial.info),
7474
                                 errors.ECODE_NORES)
7475
    if len(ial.result) != ial.required_nodes:
7476
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7477
                                 " of nodes (%s), required %s" %
7478
                                 (self.lu.op.iallocator, len(ial.result),
7479
                                  ial.required_nodes), errors.ECODE_FAULT)
7480
    self.target_node = ial.result[0]
7481
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7482
                 self.instance_name, self.lu.op.iallocator,
7483
                 utils.CommaJoin(ial.result))
7484

    
7485
  def _WaitUntilSync(self):
7486
    """Poll with custom rpc for disk sync.
7487

7488
    This uses our own step-based rpc call.
7489

7490
    """
7491
    self.feedback_fn("* wait until resync is done")
7492
    all_done = False
7493
    while not all_done:
7494
      all_done = True
7495
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7496
                                            self.nodes_ip,
7497
                                            self.instance.disks)
7498
      min_percent = 100
7499
      for node, nres in result.items():
7500
        nres.Raise("Cannot resync disks on node %s" % node)
7501
        node_done, node_percent = nres.payload
7502
        all_done = all_done and node_done
7503
        if node_percent is not None:
7504
          min_percent = min(min_percent, node_percent)
7505
      if not all_done:
7506
        if min_percent < 100:
7507
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7508
        time.sleep(2)
7509

    
7510
  def _EnsureSecondary(self, node):
7511
    """Demote a node to secondary.
7512

7513
    """
7514
    self.feedback_fn("* switching node %s to secondary mode" % node)
7515

    
7516
    for dev in self.instance.disks:
7517
      self.cfg.SetDiskID(dev, node)
7518

    
7519
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7520
                                          self.instance.disks)
7521
    result.Raise("Cannot change disk to secondary on node %s" % node)
7522

    
7523
  def _GoStandalone(self):
7524
    """Disconnect from the network.
7525

7526
    """
7527
    self.feedback_fn("* changing into standalone mode")
7528
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7529
                                               self.instance.disks)
7530
    for node, nres in result.items():
7531
      nres.Raise("Cannot disconnect disks node %s" % node)
7532

    
7533
  def _GoReconnect(self, multimaster):
7534
    """Reconnect to the network.
7535

7536
    """
7537
    if multimaster:
7538
      msg = "dual-master"
7539
    else:
7540
      msg = "single-master"
7541
    self.feedback_fn("* changing disks into %s mode" % msg)
7542
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7543
                                           self.instance.disks,
7544
                                           self.instance.name, multimaster)
7545
    for node, nres in result.items():
7546
      nres.Raise("Cannot change disks config on node %s" % node)
7547

    
7548
  def _ExecCleanup(self):
7549
    """Try to cleanup after a failed migration.
7550

7551
    The cleanup is done by:
7552
      - check that the instance is running only on one node
7553
        (and update the config if needed)
7554
      - change disks on its secondary node to secondary
7555
      - wait until disks are fully synchronized
7556
      - disconnect from the network
7557
      - change disks into single-master mode
7558
      - wait again until disks are fully synchronized
7559

7560
    """
7561
    instance = self.instance
7562
    target_node = self.target_node
7563
    source_node = self.source_node
7564

    
7565
    # check running on only one node
7566
    self.feedback_fn("* checking where the instance actually runs"
7567
                     " (if this hangs, the hypervisor might be in"
7568
                     " a bad state)")
7569
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7570
    for node, result in ins_l.items():
7571
      result.Raise("Can't contact node %s" % node)
7572

    
7573
    runningon_source = instance.name in ins_l[source_node].payload
7574
    runningon_target = instance.name in ins_l[target_node].payload
7575

    
7576
    if runningon_source and runningon_target:
7577
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7578
                               " or the hypervisor is confused; you will have"
7579
                               " to ensure manually that it runs only on one"
7580
                               " and restart this operation")
7581

    
7582
    if not (runningon_source or runningon_target):
7583
      raise errors.OpExecError("Instance does not seem to be running at all;"
7584
                               " in this case it's safer to repair by"
7585
                               " running 'gnt-instance stop' to ensure disk"
7586
                               " shutdown, and then restarting it")
7587

    
7588
    if runningon_target:
7589
      # the migration has actually succeeded, we need to update the config
7590
      self.feedback_fn("* instance running on secondary node (%s),"
7591
                       " updating config" % target_node)
7592
      instance.primary_node = target_node
7593
      self.cfg.Update(instance, self.feedback_fn)
7594
      demoted_node = source_node
7595
    else:
7596
      self.feedback_fn("* instance confirmed to be running on its"
7597
                       " primary node (%s)" % source_node)
7598
      demoted_node = target_node
7599

    
7600
    if instance.disk_template in constants.DTS_INT_MIRROR:
7601
      self._EnsureSecondary(demoted_node)
7602
      try:
7603
        self._WaitUntilSync()
7604
      except errors.OpExecError:
7605
        # we ignore here errors, since if the device is standalone, it
7606
        # won't be able to sync
7607
        pass
7608
      self._GoStandalone()
7609
      self._GoReconnect(False)
7610
      self._WaitUntilSync()
7611

    
7612
    self.feedback_fn("* done")
7613

    
7614
  def _RevertDiskStatus(self):
7615
    """Try to revert the disk status after a failed migration.
7616

7617
    """
7618
    target_node = self.target_node
7619
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7620
      return
7621

    
7622
    try:
7623
      self._EnsureSecondary(target_node)
7624
      self._GoStandalone()
7625
      self._GoReconnect(False)
7626
      self._WaitUntilSync()
7627
    except errors.OpExecError, err:
7628
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7629
                         " please try to recover the instance manually;"
7630
                         " error '%s'" % str(err))
7631

    
7632
  def _AbortMigration(self):
7633
    """Call the hypervisor code to abort a started migration.
7634

7635
    """
7636
    instance = self.instance
7637
    target_node = self.target_node
7638
    source_node = self.source_node
7639
    migration_info = self.migration_info
7640

    
7641
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7642
                                                                 instance,
7643
                                                                 migration_info,
7644
                                                                 False)
7645
    abort_msg = abort_result.fail_msg
7646
    if abort_msg:
7647
      logging.error("Aborting migration failed on target node %s: %s",
7648
                    target_node, abort_msg)
7649
      # Don't raise an exception here, as we stil have to try to revert the
7650
      # disk status, even if this step failed.
7651

    
7652
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7653
        instance, False, self.live)
7654
    abort_msg = abort_result.fail_msg
7655
    if abort_msg:
7656
      logging.error("Aborting migration failed on source node %s: %s",
7657
                    source_node, abort_msg)
7658

    
7659
  def _ExecMigration(self):
7660
    """Migrate an instance.
7661

7662
    The migrate is done by:
7663
      - change the disks into dual-master mode
7664
      - wait until disks are fully synchronized again
7665
      - migrate the instance
7666
      - change disks on the new secondary node (the old primary) to secondary
7667
      - wait until disks are fully synchronized
7668
      - change disks into single-master mode
7669

7670
    """
7671
    instance = self.instance
7672
    target_node = self.target_node
7673
    source_node = self.source_node
7674

    
7675
    # Check for hypervisor version mismatch and warn the user.
7676
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7677
                                       None, self.instance.hypervisor)
7678
    src_info = nodeinfo[source_node]
7679
    dst_info = nodeinfo[target_node]
7680

    
7681
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7682
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7683
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7684
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7685
      if src_version != dst_version:
7686
        self.feedback_fn("* warning: hypervisor version mismatch between"
7687
                         " source (%s) and target (%s) node" %
7688
                         (src_version, dst_version))
7689

    
7690
    self.feedback_fn("* checking disk consistency between source and target")
7691
    for dev in instance.disks:
7692
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7693
        raise errors.OpExecError("Disk %s is degraded or not fully"
7694
                                 " synchronized on target node,"
7695
                                 " aborting migration" % dev.iv_name)
7696

    
7697
    # First get the migration information from the remote node
7698
    result = self.rpc.call_migration_info(source_node, instance)
7699
    msg = result.fail_msg
7700
    if msg:
7701
      log_err = ("Failed fetching source migration information from %s: %s" %
7702
                 (source_node, msg))
7703
      logging.error(log_err)
7704
      raise errors.OpExecError(log_err)
7705

    
7706
    self.migration_info = migration_info = result.payload
7707

    
7708
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7709
      # Then switch the disks to master/master mode
7710
      self._EnsureSecondary(target_node)
7711
      self._GoStandalone()
7712
      self._GoReconnect(True)
7713
      self._WaitUntilSync()
7714

    
7715
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7716
    result = self.rpc.call_accept_instance(target_node,
7717
                                           instance,
7718
                                           migration_info,
7719
                                           self.nodes_ip[target_node])
7720

    
7721
    msg = result.fail_msg
7722
    if msg:
7723
      logging.error("Instance pre-migration failed, trying to revert"
7724
                    " disk status: %s", msg)
7725
      self.feedback_fn("Pre-migration failed, aborting")
7726
      self._AbortMigration()
7727
      self._RevertDiskStatus()
7728
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7729
                               (instance.name, msg))
7730

    
7731
    self.feedback_fn("* migrating instance to %s" % target_node)
7732
    result = self.rpc.call_instance_migrate(source_node, instance,
7733
                                            self.nodes_ip[target_node],
7734
                                            self.live)
7735
    msg = result.fail_msg
7736
    if msg:
7737
      logging.error("Instance migration failed, trying to revert"
7738
                    " disk status: %s", msg)
7739
      self.feedback_fn("Migration failed, aborting")
7740
      self._AbortMigration()
7741
      self._RevertDiskStatus()
7742
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7743
                               (instance.name, msg))
7744

    
7745
    self.feedback_fn("* starting memory transfer")
7746
    last_feedback = time.time()
7747
    while True:
7748
      result = self.rpc.call_instance_get_migration_status(source_node,
7749
                                                           instance)
7750
      msg = result.fail_msg
7751
      ms = result.payload   # MigrationStatus instance
7752
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7753
        logging.error("Instance migration failed, trying to revert"
7754
                      " disk status: %s", msg)
7755
        self.feedback_fn("Migration failed, aborting")
7756
        self._AbortMigration()
7757
        self._RevertDiskStatus()
7758
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7759
                                 (instance.name, msg))
7760

    
7761
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7762
        self.feedback_fn("* memory transfer complete")
7763
        break
7764

    
7765
      if (utils.TimeoutExpired(last_feedback,
7766
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7767
          ms.transferred_ram is not None):
7768
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7769
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7770
        last_feedback = time.time()
7771

    
7772
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7773

    
7774
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7775
                                                           instance,
7776
                                                           True,
7777
                                                           self.live)
7778
    msg = result.fail_msg
7779
    if msg:
7780
      logging.error("Instance migration succeeded, but finalization failed"
7781
                    " on the source node: %s", msg)
7782
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7783
                               msg)
7784

    
7785
    instance.primary_node = target_node
7786

    
7787
    # distribute new instance config to the other nodes
7788
    self.cfg.Update(instance, self.feedback_fn)
7789

    
7790
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7791
                                                           instance,
7792
                                                           migration_info,
7793
                                                           True)
7794
    msg = result.fail_msg
7795
    if msg:
7796
      logging.error("Instance migration succeeded, but finalization failed"
7797
                    " on the target node: %s", msg)
7798
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7799
                               msg)
7800

    
7801
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7802
      self._EnsureSecondary(source_node)
7803
      self._WaitUntilSync()
7804
      self._GoStandalone()
7805
      self._GoReconnect(False)
7806
      self._WaitUntilSync()
7807

    
7808
    self.feedback_fn("* done")
7809

    
7810
  def _ExecFailover(self):
7811
    """Failover an instance.
7812

7813
    The failover is done by shutting it down on its present node and
7814
    starting it on the secondary.
7815

7816
    """
7817
    instance = self.instance
7818
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7819

    
7820
    source_node = instance.primary_node
7821
    target_node = self.target_node
7822

    
7823
    if instance.admin_state == constants.ADMINST_UP:
7824
      self.feedback_fn("* checking disk consistency between source and target")
7825
      for dev in instance.disks:
7826
        # for drbd, these are drbd over lvm
7827
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7828
          if primary_node.offline:
7829
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7830
                             " target node %s" %
7831
                             (primary_node.name, dev.iv_name, target_node))
7832
          elif not self.ignore_consistency:
7833
            raise errors.OpExecError("Disk %s is degraded on target node,"
7834
                                     " aborting failover" % dev.iv_name)
7835
    else:
7836
      self.feedback_fn("* not checking disk consistency as instance is not"
7837
                       " running")
7838

    
7839
    self.feedback_fn("* shutting down instance on source node")
7840
    logging.info("Shutting down instance %s on node %s",
7841
                 instance.name, source_node)
7842

    
7843
    result = self.rpc.call_instance_shutdown(source_node, instance,
7844
                                             self.shutdown_timeout)
7845
    msg = result.fail_msg
7846
    if msg:
7847
      if self.ignore_consistency or primary_node.offline:
7848
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7849
                           " proceeding anyway; please make sure node"
7850
                           " %s is down; error details: %s",
7851
                           instance.name, source_node, source_node, msg)
7852
      else:
7853
        raise errors.OpExecError("Could not shutdown instance %s on"
7854
                                 " node %s: %s" %
7855
                                 (instance.name, source_node, msg))
7856

    
7857
    self.feedback_fn("* deactivating the instance's disks on source node")
7858
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7859
      raise errors.OpExecError("Can't shut down the instance's disks")
7860

    
7861
    instance.primary_node = target_node
7862
    # distribute new instance config to the other nodes
7863
    self.cfg.Update(instance, self.feedback_fn)
7864

    
7865
    # Only start the instance if it's marked as up
7866
    if instance.admin_state == constants.ADMINST_UP:
7867
      self.feedback_fn("* activating the instance's disks on target node %s" %
7868
                       target_node)
7869
      logging.info("Starting instance %s on node %s",
7870
                   instance.name, target_node)
7871

    
7872
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7873
                                           ignore_secondaries=True)
7874
      if not disks_ok:
7875
        _ShutdownInstanceDisks(self.lu, instance)
7876
        raise errors.OpExecError("Can't activate the instance's disks")
7877

    
7878
      self.feedback_fn("* starting the instance on the target node %s" %
7879
                       target_node)
7880
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7881
                                            False)
7882
      msg = result.fail_msg
7883
      if msg:
7884
        _ShutdownInstanceDisks(self.lu, instance)
7885
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7886
                                 (instance.name, target_node, msg))
7887

    
7888
  def Exec(self, feedback_fn):
7889
    """Perform the migration.
7890

7891
    """
7892
    self.feedback_fn = feedback_fn
7893
    self.source_node = self.instance.primary_node
7894

    
7895
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7896
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7897
      self.target_node = self.instance.secondary_nodes[0]
7898
      # Otherwise self.target_node has been populated either
7899
      # directly, or through an iallocator.
7900

    
7901
    self.all_nodes = [self.source_node, self.target_node]
7902
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7903
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7904

    
7905
    if self.failover:
7906
      feedback_fn("Failover instance %s" % self.instance.name)
7907
      self._ExecFailover()
7908
    else:
7909
      feedback_fn("Migrating instance %s" % self.instance.name)
7910

    
7911
      if self.cleanup:
7912
        return self._ExecCleanup()
7913
      else:
7914
        return self._ExecMigration()
7915

    
7916

    
7917
def _CreateBlockDev(lu, node, instance, device, force_create,
7918
                    info, force_open):
7919
  """Create a tree of block devices on a given node.
7920

7921
  If this device type has to be created on secondaries, create it and
7922
  all its children.
7923

7924
  If not, just recurse to children keeping the same 'force' value.
7925

7926
  @param lu: the lu on whose behalf we execute
7927
  @param node: the node on which to create the device
7928
  @type instance: L{objects.Instance}
7929
  @param instance: the instance which owns the device
7930
  @type device: L{objects.Disk}
7931
  @param device: the device to create
7932
  @type force_create: boolean
7933
  @param force_create: whether to force creation of this device; this
7934
      will be change to True whenever we find a device which has
7935
      CreateOnSecondary() attribute
7936
  @param info: the extra 'metadata' we should attach to the device
7937
      (this will be represented as a LVM tag)
7938
  @type force_open: boolean
7939
  @param force_open: this parameter will be passes to the
7940
      L{backend.BlockdevCreate} function where it specifies
7941
      whether we run on primary or not, and it affects both
7942
      the child assembly and the device own Open() execution
7943

7944
  """
7945
  if device.CreateOnSecondary():
7946
    force_create = True
7947

    
7948
  if device.children:
7949
    for child in device.children:
7950
      _CreateBlockDev(lu, node, instance, child, force_create,
7951
                      info, force_open)
7952

    
7953
  if not force_create:
7954
    return
7955

    
7956
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7957

    
7958

    
7959
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7960
  """Create a single block device on a given node.
7961

7962
  This will not recurse over children of the device, so they must be
7963
  created in advance.
7964

7965
  @param lu: the lu on whose behalf we execute
7966
  @param node: the node on which to create the device
7967
  @type instance: L{objects.Instance}
7968
  @param instance: the instance which owns the device
7969
  @type device: L{objects.Disk}
7970
  @param device: the device to create
7971
  @param info: the extra 'metadata' we should attach to the device
7972
      (this will be represented as a LVM tag)
7973
  @type force_open: boolean
7974
  @param force_open: this parameter will be passes to the
7975
      L{backend.BlockdevCreate} function where it specifies
7976
      whether we run on primary or not, and it affects both
7977
      the child assembly and the device own Open() execution
7978

7979
  """
7980
  lu.cfg.SetDiskID(device, node)
7981
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7982
                                       instance.name, force_open, info)
7983
  result.Raise("Can't create block device %s on"
7984
               " node %s for instance %s" % (device, node, instance.name))
7985
  if device.physical_id is None:
7986
    device.physical_id = result.payload
7987

    
7988

    
7989
def _GenerateUniqueNames(lu, exts):
7990
  """Generate a suitable LV name.
7991

7992
  This will generate a logical volume name for the given instance.
7993

7994
  """
7995
  results = []
7996
  for val in exts:
7997
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7998
    results.append("%s%s" % (new_id, val))
7999
  return results
8000

    
8001

    
8002
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8003
                         iv_name, p_minor, s_minor):
8004
  """Generate a drbd8 device complete with its children.
8005

8006
  """
8007
  assert len(vgnames) == len(names) == 2
8008
  port = lu.cfg.AllocatePort()
8009
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8010
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8011
                          logical_id=(vgnames[0], names[0]))
8012
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8013
                          logical_id=(vgnames[1], names[1]))
8014
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8015
                          logical_id=(primary, secondary, port,
8016
                                      p_minor, s_minor,
8017
                                      shared_secret),
8018
                          children=[dev_data, dev_meta],
8019
                          iv_name=iv_name)
8020
  return drbd_dev
8021

    
8022

    
8023
def _GenerateDiskTemplate(lu, template_name,
8024
                          instance_name, primary_node,
8025
                          secondary_nodes, disk_info,
8026
                          file_storage_dir, file_driver,
8027
                          base_index, feedback_fn):
8028
  """Generate the entire disk layout for a given template type.
8029

8030
  """
8031
  #TODO: compute space requirements
8032

    
8033
  vgname = lu.cfg.GetVGName()
8034
  disk_count = len(disk_info)
8035
  disks = []
8036
  if template_name == constants.DT_DISKLESS:
8037
    pass
8038
  elif template_name == constants.DT_PLAIN:
8039
    if len(secondary_nodes) != 0:
8040
      raise errors.ProgrammerError("Wrong template configuration")
8041

    
8042
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8043
                                      for i in range(disk_count)])
8044
    for idx, disk in enumerate(disk_info):
8045
      disk_index = idx + base_index
8046
      vg = disk.get(constants.IDISK_VG, vgname)
8047
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8048
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
8049
                              size=disk[constants.IDISK_SIZE],
8050
                              logical_id=(vg, names[idx]),
8051
                              iv_name="disk/%d" % disk_index,
8052
                              mode=disk[constants.IDISK_MODE])
8053
      disks.append(disk_dev)
8054
  elif template_name == constants.DT_DRBD8:
8055
    if len(secondary_nodes) != 1:
8056
      raise errors.ProgrammerError("Wrong template configuration")
8057
    remote_node = secondary_nodes[0]
8058
    minors = lu.cfg.AllocateDRBDMinor(
8059
      [primary_node, remote_node] * len(disk_info), instance_name)
8060

    
8061
    names = []
8062
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8063
                                               for i in range(disk_count)]):
8064
      names.append(lv_prefix + "_data")
8065
      names.append(lv_prefix + "_meta")
8066
    for idx, disk in enumerate(disk_info):
8067
      disk_index = idx + base_index
8068
      data_vg = disk.get(constants.IDISK_VG, vgname)
8069
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8070
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8071
                                      disk[constants.IDISK_SIZE],
8072
                                      [data_vg, meta_vg],
8073
                                      names[idx * 2:idx * 2 + 2],
8074
                                      "disk/%d" % disk_index,
8075
                                      minors[idx * 2], minors[idx * 2 + 1])
8076
      disk_dev.mode = disk[constants.IDISK_MODE]
8077
      disks.append(disk_dev)
8078
  elif template_name == constants.DT_FILE:
8079
    if len(secondary_nodes) != 0:
8080
      raise errors.ProgrammerError("Wrong template configuration")
8081

    
8082
    opcodes.RequireFileStorage()
8083

    
8084
    for idx, disk in enumerate(disk_info):
8085
      disk_index = idx + base_index
8086
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8087
                              size=disk[constants.IDISK_SIZE],
8088
                              iv_name="disk/%d" % disk_index,
8089
                              logical_id=(file_driver,
8090
                                          "%s/disk%d" % (file_storage_dir,
8091
                                                         disk_index)),
8092
                              mode=disk[constants.IDISK_MODE])
8093
      disks.append(disk_dev)
8094
  elif template_name == constants.DT_SHARED_FILE:
8095
    if len(secondary_nodes) != 0:
8096
      raise errors.ProgrammerError("Wrong template configuration")
8097

    
8098
    opcodes.RequireSharedFileStorage()
8099

    
8100
    for idx, disk in enumerate(disk_info):
8101
      disk_index = idx + base_index
8102
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8103
                              size=disk[constants.IDISK_SIZE],
8104
                              iv_name="disk/%d" % disk_index,
8105
                              logical_id=(file_driver,
8106
                                          "%s/disk%d" % (file_storage_dir,
8107
                                                         disk_index)),
8108
                              mode=disk[constants.IDISK_MODE])
8109
      disks.append(disk_dev)
8110
  elif template_name == constants.DT_BLOCK:
8111
    if len(secondary_nodes) != 0:
8112
      raise errors.ProgrammerError("Wrong template configuration")
8113

    
8114
    for idx, disk in enumerate(disk_info):
8115
      disk_index = idx + base_index
8116
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8117
                              size=disk[constants.IDISK_SIZE],
8118
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8119
                                          disk[constants.IDISK_ADOPT]),
8120
                              iv_name="disk/%d" % disk_index,
8121
                              mode=disk[constants.IDISK_MODE])
8122
      disks.append(disk_dev)
8123

    
8124
  else:
8125
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8126
  return disks
8127

    
8128

    
8129
def _GetInstanceInfoText(instance):
8130
  """Compute that text that should be added to the disk's metadata.
8131

8132
  """
8133
  return "originstname+%s" % instance.name
8134

    
8135

    
8136
def _CalcEta(time_taken, written, total_size):
8137
  """Calculates the ETA based on size written and total size.
8138

8139
  @param time_taken: The time taken so far
8140
  @param written: amount written so far
8141
  @param total_size: The total size of data to be written
8142
  @return: The remaining time in seconds
8143

8144
  """
8145
  avg_time = time_taken / float(written)
8146
  return (total_size - written) * avg_time
8147

    
8148

    
8149
def _WipeDisks(lu, instance):
8150
  """Wipes instance disks.
8151

8152
  @type lu: L{LogicalUnit}
8153
  @param lu: the logical unit on whose behalf we execute
8154
  @type instance: L{objects.Instance}
8155
  @param instance: the instance whose disks we should create
8156
  @return: the success of the wipe
8157

8158
  """
8159
  node = instance.primary_node
8160

    
8161
  for device in instance.disks:
8162
    lu.cfg.SetDiskID(device, node)
8163

    
8164
  logging.info("Pause sync of instance %s disks", instance.name)
8165
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8166

    
8167
  for idx, success in enumerate(result.payload):
8168
    if not success:
8169
      logging.warn("pause-sync of instance %s for disks %d failed",
8170
                   instance.name, idx)
8171

    
8172
  try:
8173
    for idx, device in enumerate(instance.disks):
8174
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8175
      # MAX_WIPE_CHUNK at max
8176
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8177
                            constants.MIN_WIPE_CHUNK_PERCENT)
8178
      # we _must_ make this an int, otherwise rounding errors will
8179
      # occur
8180
      wipe_chunk_size = int(wipe_chunk_size)
8181

    
8182
      lu.LogInfo("* Wiping disk %d", idx)
8183
      logging.info("Wiping disk %d for instance %s, node %s using"
8184
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8185

    
8186
      offset = 0
8187
      size = device.size
8188
      last_output = 0
8189
      start_time = time.time()
8190

    
8191
      while offset < size:
8192
        wipe_size = min(wipe_chunk_size, size - offset)
8193
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8194
                      idx, offset, wipe_size)
8195
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8196
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8197
                     (idx, offset, wipe_size))
8198
        now = time.time()
8199
        offset += wipe_size
8200
        if now - last_output >= 60:
8201
          eta = _CalcEta(now - start_time, offset, size)
8202
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8203
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8204
          last_output = now
8205
  finally:
8206
    logging.info("Resume sync of instance %s disks", instance.name)
8207

    
8208
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8209

    
8210
    for idx, success in enumerate(result.payload):
8211
      if not success:
8212
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8213
                      " look at the status and troubleshoot the issue", idx)
8214
        logging.warn("resume-sync of instance %s for disks %d failed",
8215
                     instance.name, idx)
8216

    
8217

    
8218
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8219
  """Create all disks for an instance.
8220

8221
  This abstracts away some work from AddInstance.
8222

8223
  @type lu: L{LogicalUnit}
8224
  @param lu: the logical unit on whose behalf we execute
8225
  @type instance: L{objects.Instance}
8226
  @param instance: the instance whose disks we should create
8227
  @type to_skip: list
8228
  @param to_skip: list of indices to skip
8229
  @type target_node: string
8230
  @param target_node: if passed, overrides the target node for creation
8231
  @rtype: boolean
8232
  @return: the success of the creation
8233

8234
  """
8235
  info = _GetInstanceInfoText(instance)
8236
  if target_node is None:
8237
    pnode = instance.primary_node
8238
    all_nodes = instance.all_nodes
8239
  else:
8240
    pnode = target_node
8241
    all_nodes = [pnode]
8242

    
8243
  if instance.disk_template in constants.DTS_FILEBASED:
8244
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8245
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8246

    
8247
    result.Raise("Failed to create directory '%s' on"
8248
                 " node %s" % (file_storage_dir, pnode))
8249

    
8250
  # Note: this needs to be kept in sync with adding of disks in
8251
  # LUInstanceSetParams
8252
  for idx, device in enumerate(instance.disks):
8253
    if to_skip and idx in to_skip:
8254
      continue
8255
    logging.info("Creating volume %s for instance %s",
8256
                 device.iv_name, instance.name)
8257
    #HARDCODE
8258
    for node in all_nodes:
8259
      f_create = node == pnode
8260
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8261

    
8262

    
8263
def _RemoveDisks(lu, instance, target_node=None):
8264
  """Remove all disks for an instance.
8265

8266
  This abstracts away some work from `AddInstance()` and
8267
  `RemoveInstance()`. Note that in case some of the devices couldn't
8268
  be removed, the removal will continue with the other ones (compare
8269
  with `_CreateDisks()`).
8270

8271
  @type lu: L{LogicalUnit}
8272
  @param lu: the logical unit on whose behalf we execute
8273
  @type instance: L{objects.Instance}
8274
  @param instance: the instance whose disks we should remove
8275
  @type target_node: string
8276
  @param target_node: used to override the node on which to remove the disks
8277
  @rtype: boolean
8278
  @return: the success of the removal
8279

8280
  """
8281
  logging.info("Removing block devices for instance %s", instance.name)
8282

    
8283
  all_result = True
8284
  for device in instance.disks:
8285
    if target_node:
8286
      edata = [(target_node, device)]
8287
    else:
8288
      edata = device.ComputeNodeTree(instance.primary_node)
8289
    for node, disk in edata:
8290
      lu.cfg.SetDiskID(disk, node)
8291
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8292
      if msg:
8293
        lu.LogWarning("Could not remove block device %s on node %s,"
8294
                      " continuing anyway: %s", device.iv_name, node, msg)
8295
        all_result = False
8296

    
8297
    # if this is a DRBD disk, return its port to the pool
8298
    if device.dev_type in constants.LDS_DRBD:
8299
      tcp_port = device.logical_id[2]
8300
      lu.cfg.AddTcpUdpPort(tcp_port)
8301

    
8302
  if instance.disk_template == constants.DT_FILE:
8303
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8304
    if target_node:
8305
      tgt = target_node
8306
    else:
8307
      tgt = instance.primary_node
8308
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8309
    if result.fail_msg:
8310
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8311
                    file_storage_dir, instance.primary_node, result.fail_msg)
8312
      all_result = False
8313

    
8314
  return all_result
8315

    
8316

    
8317
def _ComputeDiskSizePerVG(disk_template, disks):
8318
  """Compute disk size requirements in the volume group
8319

8320
  """
8321
  def _compute(disks, payload):
8322
    """Universal algorithm.
8323

8324
    """
8325
    vgs = {}
8326
    for disk in disks:
8327
      vgs[disk[constants.IDISK_VG]] = \
8328
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8329

    
8330
    return vgs
8331

    
8332
  # Required free disk space as a function of disk and swap space
8333
  req_size_dict = {
8334
    constants.DT_DISKLESS: {},
8335
    constants.DT_PLAIN: _compute(disks, 0),
8336
    # 128 MB are added for drbd metadata for each disk
8337
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8338
    constants.DT_FILE: {},
8339
    constants.DT_SHARED_FILE: {},
8340
  }
8341

    
8342
  if disk_template not in req_size_dict:
8343
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8344
                                 " is unknown" % disk_template)
8345

    
8346
  return req_size_dict[disk_template]
8347

    
8348

    
8349
def _ComputeDiskSize(disk_template, disks):
8350
  """Compute disk size requirements in the volume group
8351

8352
  """
8353
  # Required free disk space as a function of disk and swap space
8354
  req_size_dict = {
8355
    constants.DT_DISKLESS: None,
8356
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8357
    # 128 MB are added for drbd metadata for each disk
8358
    constants.DT_DRBD8:
8359
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8360
    constants.DT_FILE: None,
8361
    constants.DT_SHARED_FILE: 0,
8362
    constants.DT_BLOCK: 0,
8363
  }
8364

    
8365
  if disk_template not in req_size_dict:
8366
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8367
                                 " is unknown" % disk_template)
8368

    
8369
  return req_size_dict[disk_template]
8370

    
8371

    
8372
def _FilterVmNodes(lu, nodenames):
8373
  """Filters out non-vm_capable nodes from a list.
8374

8375
  @type lu: L{LogicalUnit}
8376
  @param lu: the logical unit for which we check
8377
  @type nodenames: list
8378
  @param nodenames: the list of nodes on which we should check
8379
  @rtype: list
8380
  @return: the list of vm-capable nodes
8381

8382
  """
8383
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8384
  return [name for name in nodenames if name not in vm_nodes]
8385

    
8386

    
8387
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8388
  """Hypervisor parameter validation.
8389

8390
  This function abstract the hypervisor parameter validation to be
8391
  used in both instance create and instance modify.
8392

8393
  @type lu: L{LogicalUnit}
8394
  @param lu: the logical unit for which we check
8395
  @type nodenames: list
8396
  @param nodenames: the list of nodes on which we should check
8397
  @type hvname: string
8398
  @param hvname: the name of the hypervisor we should use
8399
  @type hvparams: dict
8400
  @param hvparams: the parameters which we need to check
8401
  @raise errors.OpPrereqError: if the parameters are not valid
8402

8403
  """
8404
  nodenames = _FilterVmNodes(lu, nodenames)
8405

    
8406
  cluster = lu.cfg.GetClusterInfo()
8407
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8408

    
8409
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8410
  for node in nodenames:
8411
    info = hvinfo[node]
8412
    if info.offline:
8413
      continue
8414
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8415

    
8416

    
8417
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8418
  """OS parameters validation.
8419

8420
  @type lu: L{LogicalUnit}
8421
  @param lu: the logical unit for which we check
8422
  @type required: boolean
8423
  @param required: whether the validation should fail if the OS is not
8424
      found
8425
  @type nodenames: list
8426
  @param nodenames: the list of nodes on which we should check
8427
  @type osname: string
8428
  @param osname: the name of the hypervisor we should use
8429
  @type osparams: dict
8430
  @param osparams: the parameters which we need to check
8431
  @raise errors.OpPrereqError: if the parameters are not valid
8432

8433
  """
8434
  nodenames = _FilterVmNodes(lu, nodenames)
8435
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8436
                                   [constants.OS_VALIDATE_PARAMETERS],
8437
                                   osparams)
8438
  for node, nres in result.items():
8439
    # we don't check for offline cases since this should be run only
8440
    # against the master node and/or an instance's nodes
8441
    nres.Raise("OS Parameters validation failed on node %s" % node)
8442
    if not nres.payload:
8443
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8444
                 osname, node)
8445

    
8446

    
8447
class LUInstanceCreate(LogicalUnit):
8448
  """Create an instance.
8449

8450
  """
8451
  HPATH = "instance-add"
8452
  HTYPE = constants.HTYPE_INSTANCE
8453
  REQ_BGL = False
8454

    
8455
  def CheckArguments(self):
8456
    """Check arguments.
8457

8458
    """
8459
    # do not require name_check to ease forward/backward compatibility
8460
    # for tools
8461
    if self.op.no_install and self.op.start:
8462
      self.LogInfo("No-installation mode selected, disabling startup")
8463
      self.op.start = False
8464
    # validate/normalize the instance name
8465
    self.op.instance_name = \
8466
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8467

    
8468
    if self.op.ip_check and not self.op.name_check:
8469
      # TODO: make the ip check more flexible and not depend on the name check
8470
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8471
                                 " check", errors.ECODE_INVAL)
8472

    
8473
    # check nics' parameter names
8474
    for nic in self.op.nics:
8475
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8476

    
8477
    # check disks. parameter names and consistent adopt/no-adopt strategy
8478
    has_adopt = has_no_adopt = False
8479
    for disk in self.op.disks:
8480
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8481
      if constants.IDISK_ADOPT in disk:
8482
        has_adopt = True
8483
      else:
8484
        has_no_adopt = True
8485
    if has_adopt and has_no_adopt:
8486
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8487
                                 errors.ECODE_INVAL)
8488
    if has_adopt:
8489
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8490
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8491
                                   " '%s' disk template" %
8492
                                   self.op.disk_template,
8493
                                   errors.ECODE_INVAL)
8494
      if self.op.iallocator is not None:
8495
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8496
                                   " iallocator script", errors.ECODE_INVAL)
8497
      if self.op.mode == constants.INSTANCE_IMPORT:
8498
        raise errors.OpPrereqError("Disk adoption not allowed for"
8499
                                   " instance import", errors.ECODE_INVAL)
8500
    else:
8501
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8502
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8503
                                   " but no 'adopt' parameter given" %
8504
                                   self.op.disk_template,
8505
                                   errors.ECODE_INVAL)
8506

    
8507
    self.adopt_disks = has_adopt
8508

    
8509
    # instance name verification
8510
    if self.op.name_check:
8511
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8512
      self.op.instance_name = self.hostname1.name
8513
      # used in CheckPrereq for ip ping check
8514
      self.check_ip = self.hostname1.ip
8515
    else:
8516
      self.check_ip = None
8517

    
8518
    # file storage checks
8519
    if (self.op.file_driver and
8520
        not self.op.file_driver in constants.FILE_DRIVER):
8521
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8522
                                 self.op.file_driver, errors.ECODE_INVAL)
8523

    
8524
    if self.op.disk_template == constants.DT_FILE:
8525
      opcodes.RequireFileStorage()
8526
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8527
      opcodes.RequireSharedFileStorage()
8528

    
8529
    ### Node/iallocator related checks
8530
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8531

    
8532
    if self.op.pnode is not None:
8533
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8534
        if self.op.snode is None:
8535
          raise errors.OpPrereqError("The networked disk templates need"
8536
                                     " a mirror node", errors.ECODE_INVAL)
8537
      elif self.op.snode:
8538
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8539
                        " template")
8540
        self.op.snode = None
8541

    
8542
    self._cds = _GetClusterDomainSecret()
8543

    
8544
    if self.op.mode == constants.INSTANCE_IMPORT:
8545
      # On import force_variant must be True, because if we forced it at
8546
      # initial install, our only chance when importing it back is that it
8547
      # works again!
8548
      self.op.force_variant = True
8549

    
8550
      if self.op.no_install:
8551
        self.LogInfo("No-installation mode has no effect during import")
8552

    
8553
    elif self.op.mode == constants.INSTANCE_CREATE:
8554
      if self.op.os_type is None:
8555
        raise errors.OpPrereqError("No guest OS specified",
8556
                                   errors.ECODE_INVAL)
8557
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8558
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8559
                                   " installation" % self.op.os_type,
8560
                                   errors.ECODE_STATE)
8561
      if self.op.disk_template is None:
8562
        raise errors.OpPrereqError("No disk template specified",
8563
                                   errors.ECODE_INVAL)
8564

    
8565
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8566
      # Check handshake to ensure both clusters have the same domain secret
8567
      src_handshake = self.op.source_handshake
8568
      if not src_handshake:
8569
        raise errors.OpPrereqError("Missing source handshake",
8570
                                   errors.ECODE_INVAL)
8571

    
8572
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8573
                                                           src_handshake)
8574
      if errmsg:
8575
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8576
                                   errors.ECODE_INVAL)
8577

    
8578
      # Load and check source CA
8579
      self.source_x509_ca_pem = self.op.source_x509_ca
8580
      if not self.source_x509_ca_pem:
8581
        raise errors.OpPrereqError("Missing source X509 CA",
8582
                                   errors.ECODE_INVAL)
8583

    
8584
      try:
8585
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8586
                                                    self._cds)
8587
      except OpenSSL.crypto.Error, err:
8588
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8589
                                   (err, ), errors.ECODE_INVAL)
8590

    
8591
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8592
      if errcode is not None:
8593
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8594
                                   errors.ECODE_INVAL)
8595

    
8596
      self.source_x509_ca = cert
8597

    
8598
      src_instance_name = self.op.source_instance_name
8599
      if not src_instance_name:
8600
        raise errors.OpPrereqError("Missing source instance name",
8601
                                   errors.ECODE_INVAL)
8602

    
8603
      self.source_instance_name = \
8604
          netutils.GetHostname(name=src_instance_name).name
8605

    
8606
    else:
8607
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8608
                                 self.op.mode, errors.ECODE_INVAL)
8609

    
8610
  def ExpandNames(self):
8611
    """ExpandNames for CreateInstance.
8612

8613
    Figure out the right locks for instance creation.
8614

8615
    """
8616
    self.needed_locks = {}
8617

    
8618
    instance_name = self.op.instance_name
8619
    # this is just a preventive check, but someone might still add this
8620
    # instance in the meantime, and creation will fail at lock-add time
8621
    if instance_name in self.cfg.GetInstanceList():
8622
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8623
                                 instance_name, errors.ECODE_EXISTS)
8624

    
8625
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8626

    
8627
    if self.op.iallocator:
8628
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8629
      # specifying a group on instance creation and then selecting nodes from
8630
      # that group
8631
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8632
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8633
    else:
8634
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8635
      nodelist = [self.op.pnode]
8636
      if self.op.snode is not None:
8637
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8638
        nodelist.append(self.op.snode)
8639
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8640
      # Lock resources of instance's primary and secondary nodes (copy to
8641
      # prevent accidential modification)
8642
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8643

    
8644
    # in case of import lock the source node too
8645
    if self.op.mode == constants.INSTANCE_IMPORT:
8646
      src_node = self.op.src_node
8647
      src_path = self.op.src_path
8648

    
8649
      if src_path is None:
8650
        self.op.src_path = src_path = self.op.instance_name
8651

    
8652
      if src_node is None:
8653
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8654
        self.op.src_node = None
8655
        if os.path.isabs(src_path):
8656
          raise errors.OpPrereqError("Importing an instance from a path"
8657
                                     " requires a source node option",
8658
                                     errors.ECODE_INVAL)
8659
      else:
8660
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8661
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8662
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8663
        if not os.path.isabs(src_path):
8664
          self.op.src_path = src_path = \
8665
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8666

    
8667
  def _RunAllocator(self):
8668
    """Run the allocator based on input opcode.
8669

8670
    """
8671
    nics = [n.ToDict() for n in self.nics]
8672
    ial = IAllocator(self.cfg, self.rpc,
8673
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8674
                     name=self.op.instance_name,
8675
                     disk_template=self.op.disk_template,
8676
                     tags=self.op.tags,
8677
                     os=self.op.os_type,
8678
                     vcpus=self.be_full[constants.BE_VCPUS],
8679
                     memory=self.be_full[constants.BE_MEMORY],
8680
                     disks=self.disks,
8681
                     nics=nics,
8682
                     hypervisor=self.op.hypervisor,
8683
                     )
8684

    
8685
    ial.Run(self.op.iallocator)
8686

    
8687
    if not ial.success:
8688
      raise errors.OpPrereqError("Can't compute nodes using"
8689
                                 " iallocator '%s': %s" %
8690
                                 (self.op.iallocator, ial.info),
8691
                                 errors.ECODE_NORES)
8692
    if len(ial.result) != ial.required_nodes:
8693
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8694
                                 " of nodes (%s), required %s" %
8695
                                 (self.op.iallocator, len(ial.result),
8696
                                  ial.required_nodes), errors.ECODE_FAULT)
8697
    self.op.pnode = ial.result[0]
8698
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8699
                 self.op.instance_name, self.op.iallocator,
8700
                 utils.CommaJoin(ial.result))
8701
    if ial.required_nodes == 2:
8702
      self.op.snode = ial.result[1]
8703

    
8704
  def BuildHooksEnv(self):
8705
    """Build hooks env.
8706

8707
    This runs on master, primary and secondary nodes of the instance.
8708

8709
    """
8710
    env = {
8711
      "ADD_MODE": self.op.mode,
8712
      }
8713
    if self.op.mode == constants.INSTANCE_IMPORT:
8714
      env["SRC_NODE"] = self.op.src_node
8715
      env["SRC_PATH"] = self.op.src_path
8716
      env["SRC_IMAGES"] = self.src_images
8717

    
8718
    env.update(_BuildInstanceHookEnv(
8719
      name=self.op.instance_name,
8720
      primary_node=self.op.pnode,
8721
      secondary_nodes=self.secondaries,
8722
      status=self.op.start,
8723
      os_type=self.op.os_type,
8724
      minmem=self.be_full[constants.BE_MINMEM],
8725
      maxmem=self.be_full[constants.BE_MAXMEM],
8726
      vcpus=self.be_full[constants.BE_VCPUS],
8727
      nics=_NICListToTuple(self, self.nics),
8728
      disk_template=self.op.disk_template,
8729
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8730
             for d in self.disks],
8731
      bep=self.be_full,
8732
      hvp=self.hv_full,
8733
      hypervisor_name=self.op.hypervisor,
8734
      tags=self.op.tags,
8735
    ))
8736

    
8737
    return env
8738

    
8739
  def BuildHooksNodes(self):
8740
    """Build hooks nodes.
8741

8742
    """
8743
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8744
    return nl, nl
8745

    
8746
  def _ReadExportInfo(self):
8747
    """Reads the export information from disk.
8748

8749
    It will override the opcode source node and path with the actual
8750
    information, if these two were not specified before.
8751

8752
    @return: the export information
8753

8754
    """
8755
    assert self.op.mode == constants.INSTANCE_IMPORT
8756

    
8757
    src_node = self.op.src_node
8758
    src_path = self.op.src_path
8759

    
8760
    if src_node is None:
8761
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8762
      exp_list = self.rpc.call_export_list(locked_nodes)
8763
      found = False
8764
      for node in exp_list:
8765
        if exp_list[node].fail_msg:
8766
          continue
8767
        if src_path in exp_list[node].payload:
8768
          found = True
8769
          self.op.src_node = src_node = node
8770
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8771
                                                       src_path)
8772
          break
8773
      if not found:
8774
        raise errors.OpPrereqError("No export found for relative path %s" %
8775
                                    src_path, errors.ECODE_INVAL)
8776

    
8777
    _CheckNodeOnline(self, src_node)
8778
    result = self.rpc.call_export_info(src_node, src_path)
8779
    result.Raise("No export or invalid export found in dir %s" % src_path)
8780

    
8781
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8782
    if not export_info.has_section(constants.INISECT_EXP):
8783
      raise errors.ProgrammerError("Corrupted export config",
8784
                                   errors.ECODE_ENVIRON)
8785

    
8786
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8787
    if (int(ei_version) != constants.EXPORT_VERSION):
8788
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8789
                                 (ei_version, constants.EXPORT_VERSION),
8790
                                 errors.ECODE_ENVIRON)
8791
    return export_info
8792

    
8793
  def _ReadExportParams(self, einfo):
8794
    """Use export parameters as defaults.
8795

8796
    In case the opcode doesn't specify (as in override) some instance
8797
    parameters, then try to use them from the export information, if
8798
    that declares them.
8799

8800
    """
8801
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8802

    
8803
    if self.op.disk_template is None:
8804
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8805
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8806
                                          "disk_template")
8807
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8808
          raise errors.OpPrereqError("Disk template specified in configuration"
8809
                                     " file is not one of the allowed values:"
8810
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8811
      else:
8812
        raise errors.OpPrereqError("No disk template specified and the export"
8813
                                   " is missing the disk_template information",
8814
                                   errors.ECODE_INVAL)
8815

    
8816
    if not self.op.disks:
8817
      disks = []
8818
      # TODO: import the disk iv_name too
8819
      for idx in range(constants.MAX_DISKS):
8820
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8821
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8822
          disks.append({constants.IDISK_SIZE: disk_sz})
8823
      self.op.disks = disks
8824
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8825
        raise errors.OpPrereqError("No disk info specified and the export"
8826
                                   " is missing the disk information",
8827
                                   errors.ECODE_INVAL)
8828

    
8829
    if not self.op.nics:
8830
      nics = []
8831
      for idx in range(constants.MAX_NICS):
8832
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8833
          ndict = {}
8834
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8835
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8836
            ndict[name] = v
8837
          nics.append(ndict)
8838
        else:
8839
          break
8840
      self.op.nics = nics
8841

    
8842
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8843
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8844

    
8845
    if (self.op.hypervisor is None and
8846
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8847
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8848

    
8849
    if einfo.has_section(constants.INISECT_HYP):
8850
      # use the export parameters but do not override the ones
8851
      # specified by the user
8852
      for name, value in einfo.items(constants.INISECT_HYP):
8853
        if name not in self.op.hvparams:
8854
          self.op.hvparams[name] = value
8855

    
8856
    if einfo.has_section(constants.INISECT_BEP):
8857
      # use the parameters, without overriding
8858
      for name, value in einfo.items(constants.INISECT_BEP):
8859
        if name not in self.op.beparams:
8860
          self.op.beparams[name] = value
8861
        # Compatibility for the old "memory" be param
8862
        if name == constants.BE_MEMORY:
8863
          if constants.BE_MAXMEM not in self.op.beparams:
8864
            self.op.beparams[constants.BE_MAXMEM] = value
8865
          if constants.BE_MINMEM not in self.op.beparams:
8866
            self.op.beparams[constants.BE_MINMEM] = value
8867
    else:
8868
      # try to read the parameters old style, from the main section
8869
      for name in constants.BES_PARAMETERS:
8870
        if (name not in self.op.beparams and
8871
            einfo.has_option(constants.INISECT_INS, name)):
8872
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8873

    
8874
    if einfo.has_section(constants.INISECT_OSP):
8875
      # use the parameters, without overriding
8876
      for name, value in einfo.items(constants.INISECT_OSP):
8877
        if name not in self.op.osparams:
8878
          self.op.osparams[name] = value
8879

    
8880
  def _RevertToDefaults(self, cluster):
8881
    """Revert the instance parameters to the default values.
8882

8883
    """
8884
    # hvparams
8885
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8886
    for name in self.op.hvparams.keys():
8887
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8888
        del self.op.hvparams[name]
8889
    # beparams
8890
    be_defs = cluster.SimpleFillBE({})
8891
    for name in self.op.beparams.keys():
8892
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8893
        del self.op.beparams[name]
8894
    # nic params
8895
    nic_defs = cluster.SimpleFillNIC({})
8896
    for nic in self.op.nics:
8897
      for name in constants.NICS_PARAMETERS:
8898
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8899
          del nic[name]
8900
    # osparams
8901
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8902
    for name in self.op.osparams.keys():
8903
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8904
        del self.op.osparams[name]
8905

    
8906
  def _CalculateFileStorageDir(self):
8907
    """Calculate final instance file storage dir.
8908

8909
    """
8910
    # file storage dir calculation/check
8911
    self.instance_file_storage_dir = None
8912
    if self.op.disk_template in constants.DTS_FILEBASED:
8913
      # build the full file storage dir path
8914
      joinargs = []
8915

    
8916
      if self.op.disk_template == constants.DT_SHARED_FILE:
8917
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8918
      else:
8919
        get_fsd_fn = self.cfg.GetFileStorageDir
8920

    
8921
      cfg_storagedir = get_fsd_fn()
8922
      if not cfg_storagedir:
8923
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8924
      joinargs.append(cfg_storagedir)
8925

    
8926
      if self.op.file_storage_dir is not None:
8927
        joinargs.append(self.op.file_storage_dir)
8928

    
8929
      joinargs.append(self.op.instance_name)
8930

    
8931
      # pylint: disable=W0142
8932
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8933

    
8934
  def CheckPrereq(self):
8935
    """Check prerequisites.
8936

8937
    """
8938
    self._CalculateFileStorageDir()
8939

    
8940
    if self.op.mode == constants.INSTANCE_IMPORT:
8941
      export_info = self._ReadExportInfo()
8942
      self._ReadExportParams(export_info)
8943

    
8944
    if (not self.cfg.GetVGName() and
8945
        self.op.disk_template not in constants.DTS_NOT_LVM):
8946
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8947
                                 " instances", errors.ECODE_STATE)
8948

    
8949
    if (self.op.hypervisor is None or
8950
        self.op.hypervisor == constants.VALUE_AUTO):
8951
      self.op.hypervisor = self.cfg.GetHypervisorType()
8952

    
8953
    cluster = self.cfg.GetClusterInfo()
8954
    enabled_hvs = cluster.enabled_hypervisors
8955
    if self.op.hypervisor not in enabled_hvs:
8956
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8957
                                 " cluster (%s)" % (self.op.hypervisor,
8958
                                  ",".join(enabled_hvs)),
8959
                                 errors.ECODE_STATE)
8960

    
8961
    # Check tag validity
8962
    for tag in self.op.tags:
8963
      objects.TaggableObject.ValidateTag(tag)
8964

    
8965
    # check hypervisor parameter syntax (locally)
8966
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8967
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8968
                                      self.op.hvparams)
8969
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8970
    hv_type.CheckParameterSyntax(filled_hvp)
8971
    self.hv_full = filled_hvp
8972
    # check that we don't specify global parameters on an instance
8973
    _CheckGlobalHvParams(self.op.hvparams)
8974

    
8975
    # fill and remember the beparams dict
8976
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8977
    for param, value in self.op.beparams.iteritems():
8978
      if value == constants.VALUE_AUTO:
8979
        self.op.beparams[param] = default_beparams[param]
8980
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8981
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8982

    
8983
    # build os parameters
8984
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8985

    
8986
    # now that hvp/bep are in final format, let's reset to defaults,
8987
    # if told to do so
8988
    if self.op.identify_defaults:
8989
      self._RevertToDefaults(cluster)
8990

    
8991
    # NIC buildup
8992
    self.nics = []
8993
    for idx, nic in enumerate(self.op.nics):
8994
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8995
      nic_mode = nic_mode_req
8996
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8997
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8998

    
8999
      # in routed mode, for the first nic, the default ip is 'auto'
9000
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9001
        default_ip_mode = constants.VALUE_AUTO
9002
      else:
9003
        default_ip_mode = constants.VALUE_NONE
9004

    
9005
      # ip validity checks
9006
      ip = nic.get(constants.INIC_IP, default_ip_mode)
9007
      if ip is None or ip.lower() == constants.VALUE_NONE:
9008
        nic_ip = None
9009
      elif ip.lower() == constants.VALUE_AUTO:
9010
        if not self.op.name_check:
9011
          raise errors.OpPrereqError("IP address set to auto but name checks"
9012
                                     " have been skipped",
9013
                                     errors.ECODE_INVAL)
9014
        nic_ip = self.hostname1.ip
9015
      else:
9016
        if not netutils.IPAddress.IsValid(ip):
9017
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9018
                                     errors.ECODE_INVAL)
9019
        nic_ip = ip
9020

    
9021
      # TODO: check the ip address for uniqueness
9022
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9023
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
9024
                                   errors.ECODE_INVAL)
9025

    
9026
      # MAC address verification
9027
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9028
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9029
        mac = utils.NormalizeAndValidateMac(mac)
9030

    
9031
        try:
9032
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9033
        except errors.ReservationError:
9034
          raise errors.OpPrereqError("MAC address %s already in use"
9035
                                     " in cluster" % mac,
9036
                                     errors.ECODE_NOTUNIQUE)
9037

    
9038
      #  Build nic parameters
9039
      link = nic.get(constants.INIC_LINK, None)
9040
      if link == constants.VALUE_AUTO:
9041
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9042
      nicparams = {}
9043
      if nic_mode_req:
9044
        nicparams[constants.NIC_MODE] = nic_mode
9045
      if link:
9046
        nicparams[constants.NIC_LINK] = link
9047

    
9048
      check_params = cluster.SimpleFillNIC(nicparams)
9049
      objects.NIC.CheckParameterSyntax(check_params)
9050
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9051

    
9052
    # disk checks/pre-build
9053
    default_vg = self.cfg.GetVGName()
9054
    self.disks = []
9055
    for disk in self.op.disks:
9056
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9057
      if mode not in constants.DISK_ACCESS_SET:
9058
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9059
                                   mode, errors.ECODE_INVAL)
9060
      size = disk.get(constants.IDISK_SIZE, None)
9061
      if size is None:
9062
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9063
      try:
9064
        size = int(size)
9065
      except (TypeError, ValueError):
9066
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9067
                                   errors.ECODE_INVAL)
9068

    
9069
      data_vg = disk.get(constants.IDISK_VG, default_vg)
9070
      new_disk = {
9071
        constants.IDISK_SIZE: size,
9072
        constants.IDISK_MODE: mode,
9073
        constants.IDISK_VG: data_vg,
9074
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9075
        }
9076
      if constants.IDISK_ADOPT in disk:
9077
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9078
      self.disks.append(new_disk)
9079

    
9080
    if self.op.mode == constants.INSTANCE_IMPORT:
9081
      disk_images = []
9082
      for idx in range(len(self.disks)):
9083
        option = "disk%d_dump" % idx
9084
        if export_info.has_option(constants.INISECT_INS, option):
9085
          # FIXME: are the old os-es, disk sizes, etc. useful?
9086
          export_name = export_info.get(constants.INISECT_INS, option)
9087
          image = utils.PathJoin(self.op.src_path, export_name)
9088
          disk_images.append(image)
9089
        else:
9090
          disk_images.append(False)
9091

    
9092
      self.src_images = disk_images
9093

    
9094
      old_name = export_info.get(constants.INISECT_INS, "name")
9095
      if self.op.instance_name == old_name:
9096
        for idx, nic in enumerate(self.nics):
9097
          if nic.mac == constants.VALUE_AUTO:
9098
            nic_mac_ini = "nic%d_mac" % idx
9099
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9100

    
9101
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9102

    
9103
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9104
    if self.op.ip_check:
9105
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9106
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9107
                                   (self.check_ip, self.op.instance_name),
9108
                                   errors.ECODE_NOTUNIQUE)
9109

    
9110
    #### mac address generation
9111
    # By generating here the mac address both the allocator and the hooks get
9112
    # the real final mac address rather than the 'auto' or 'generate' value.
9113
    # There is a race condition between the generation and the instance object
9114
    # creation, which means that we know the mac is valid now, but we're not
9115
    # sure it will be when we actually add the instance. If things go bad
9116
    # adding the instance will abort because of a duplicate mac, and the
9117
    # creation job will fail.
9118
    for nic in self.nics:
9119
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9120
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9121

    
9122
    #### allocator run
9123

    
9124
    if self.op.iallocator is not None:
9125
      self._RunAllocator()
9126

    
9127
    # Release all unneeded node locks
9128
    _ReleaseLocks(self, locking.LEVEL_NODE,
9129
                  keep=filter(None, [self.op.pnode, self.op.snode,
9130
                                     self.op.src_node]))
9131

    
9132
    #### node related checks
9133

    
9134
    # check primary node
9135
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9136
    assert self.pnode is not None, \
9137
      "Cannot retrieve locked node %s" % self.op.pnode
9138
    if pnode.offline:
9139
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9140
                                 pnode.name, errors.ECODE_STATE)
9141
    if pnode.drained:
9142
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9143
                                 pnode.name, errors.ECODE_STATE)
9144
    if not pnode.vm_capable:
9145
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9146
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9147

    
9148
    self.secondaries = []
9149

    
9150
    # mirror node verification
9151
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9152
      if self.op.snode == pnode.name:
9153
        raise errors.OpPrereqError("The secondary node cannot be the"
9154
                                   " primary node", errors.ECODE_INVAL)
9155
      _CheckNodeOnline(self, self.op.snode)
9156
      _CheckNodeNotDrained(self, self.op.snode)
9157
      _CheckNodeVmCapable(self, self.op.snode)
9158
      self.secondaries.append(self.op.snode)
9159

    
9160
    nodenames = [pnode.name] + self.secondaries
9161

    
9162
    if not self.adopt_disks:
9163
      # Check lv size requirements, if not adopting
9164
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9165
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9166

    
9167
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9168
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9169
                                disk[constants.IDISK_ADOPT])
9170
                     for disk in self.disks])
9171
      if len(all_lvs) != len(self.disks):
9172
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9173
                                   errors.ECODE_INVAL)
9174
      for lv_name in all_lvs:
9175
        try:
9176
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9177
          # to ReserveLV uses the same syntax
9178
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9179
        except errors.ReservationError:
9180
          raise errors.OpPrereqError("LV named %s used by another instance" %
9181
                                     lv_name, errors.ECODE_NOTUNIQUE)
9182

    
9183
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9184
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9185

    
9186
      node_lvs = self.rpc.call_lv_list([pnode.name],
9187
                                       vg_names.payload.keys())[pnode.name]
9188
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9189
      node_lvs = node_lvs.payload
9190

    
9191
      delta = all_lvs.difference(node_lvs.keys())
9192
      if delta:
9193
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9194
                                   utils.CommaJoin(delta),
9195
                                   errors.ECODE_INVAL)
9196
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9197
      if online_lvs:
9198
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9199
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9200
                                   errors.ECODE_STATE)
9201
      # update the size of disk based on what is found
9202
      for dsk in self.disks:
9203
        dsk[constants.IDISK_SIZE] = \
9204
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9205
                                        dsk[constants.IDISK_ADOPT])][0]))
9206

    
9207
    elif self.op.disk_template == constants.DT_BLOCK:
9208
      # Normalize and de-duplicate device paths
9209
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9210
                       for disk in self.disks])
9211
      if len(all_disks) != len(self.disks):
9212
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9213
                                   errors.ECODE_INVAL)
9214
      baddisks = [d for d in all_disks
9215
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9216
      if baddisks:
9217
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9218
                                   " cannot be adopted" %
9219
                                   (", ".join(baddisks),
9220
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9221
                                   errors.ECODE_INVAL)
9222

    
9223
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9224
                                            list(all_disks))[pnode.name]
9225
      node_disks.Raise("Cannot get block device information from node %s" %
9226
                       pnode.name)
9227
      node_disks = node_disks.payload
9228
      delta = all_disks.difference(node_disks.keys())
9229
      if delta:
9230
        raise errors.OpPrereqError("Missing block device(s): %s" %
9231
                                   utils.CommaJoin(delta),
9232
                                   errors.ECODE_INVAL)
9233
      for dsk in self.disks:
9234
        dsk[constants.IDISK_SIZE] = \
9235
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9236

    
9237
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9238

    
9239
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9240
    # check OS parameters (remotely)
9241
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9242

    
9243
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9244

    
9245
    # memory check on primary node
9246
    if self.op.start:
9247
      _CheckNodeFreeMemory(self, self.pnode.name,
9248
                           "creating instance %s" % self.op.instance_name,
9249
                           self.be_full[constants.BE_MEMORY],
9250
                           self.op.hypervisor)
9251

    
9252
    self.dry_run_result = list(nodenames)
9253

    
9254
  def Exec(self, feedback_fn):
9255
    """Create and add the instance to the cluster.
9256

9257
    """
9258
    instance = self.op.instance_name
9259
    pnode_name = self.pnode.name
9260

    
9261
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9262
                self.owned_locks(locking.LEVEL_NODE)), \
9263
      "Node locks differ from node resource locks"
9264

    
9265
    ht_kind = self.op.hypervisor
9266
    if ht_kind in constants.HTS_REQ_PORT:
9267
      network_port = self.cfg.AllocatePort()
9268
    else:
9269
      network_port = None
9270

    
9271
    disks = _GenerateDiskTemplate(self,
9272
                                  self.op.disk_template,
9273
                                  instance, pnode_name,
9274
                                  self.secondaries,
9275
                                  self.disks,
9276
                                  self.instance_file_storage_dir,
9277
                                  self.op.file_driver,
9278
                                  0,
9279
                                  feedback_fn)
9280

    
9281
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9282
                            primary_node=pnode_name,
9283
                            nics=self.nics, disks=disks,
9284
                            disk_template=self.op.disk_template,
9285
                            admin_state=constants.ADMINST_DOWN,
9286
                            network_port=network_port,
9287
                            beparams=self.op.beparams,
9288
                            hvparams=self.op.hvparams,
9289
                            hypervisor=self.op.hypervisor,
9290
                            osparams=self.op.osparams,
9291
                            )
9292

    
9293
    if self.op.tags:
9294
      for tag in self.op.tags:
9295
        iobj.AddTag(tag)
9296

    
9297
    if self.adopt_disks:
9298
      if self.op.disk_template == constants.DT_PLAIN:
9299
        # rename LVs to the newly-generated names; we need to construct
9300
        # 'fake' LV disks with the old data, plus the new unique_id
9301
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9302
        rename_to = []
9303
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9304
          rename_to.append(t_dsk.logical_id)
9305
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9306
          self.cfg.SetDiskID(t_dsk, pnode_name)
9307
        result = self.rpc.call_blockdev_rename(pnode_name,
9308
                                               zip(tmp_disks, rename_to))
9309
        result.Raise("Failed to rename adoped LVs")
9310
    else:
9311
      feedback_fn("* creating instance disks...")
9312
      try:
9313
        _CreateDisks(self, iobj)
9314
      except errors.OpExecError:
9315
        self.LogWarning("Device creation failed, reverting...")
9316
        try:
9317
          _RemoveDisks(self, iobj)
9318
        finally:
9319
          self.cfg.ReleaseDRBDMinors(instance)
9320
          raise
9321

    
9322
    feedback_fn("adding instance %s to cluster config" % instance)
9323

    
9324
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9325

    
9326
    # Declare that we don't want to remove the instance lock anymore, as we've
9327
    # added the instance to the config
9328
    del self.remove_locks[locking.LEVEL_INSTANCE]
9329

    
9330
    if self.op.mode == constants.INSTANCE_IMPORT:
9331
      # Release unused nodes
9332
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9333
    else:
9334
      # Release all nodes
9335
      _ReleaseLocks(self, locking.LEVEL_NODE)
9336

    
9337
    disk_abort = False
9338
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9339
      feedback_fn("* wiping instance disks...")
9340
      try:
9341
        _WipeDisks(self, iobj)
9342
      except errors.OpExecError, err:
9343
        logging.exception("Wiping disks failed")
9344
        self.LogWarning("Wiping instance disks failed (%s)", err)
9345
        disk_abort = True
9346

    
9347
    if disk_abort:
9348
      # Something is already wrong with the disks, don't do anything else
9349
      pass
9350
    elif self.op.wait_for_sync:
9351
      disk_abort = not _WaitForSync(self, iobj)
9352
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9353
      # make sure the disks are not degraded (still sync-ing is ok)
9354
      feedback_fn("* checking mirrors status")
9355
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9356
    else:
9357
      disk_abort = False
9358

    
9359
    if disk_abort:
9360
      _RemoveDisks(self, iobj)
9361
      self.cfg.RemoveInstance(iobj.name)
9362
      # Make sure the instance lock gets removed
9363
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9364
      raise errors.OpExecError("There are some degraded disks for"
9365
                               " this instance")
9366

    
9367
    # Release all node resource locks
9368
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9369

    
9370
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9371
      if self.op.mode == constants.INSTANCE_CREATE:
9372
        if not self.op.no_install:
9373
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9374
                        not self.op.wait_for_sync)
9375
          if pause_sync:
9376
            feedback_fn("* pausing disk sync to install instance OS")
9377
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9378
                                                              iobj.disks, True)
9379
            for idx, success in enumerate(result.payload):
9380
              if not success:
9381
                logging.warn("pause-sync of instance %s for disk %d failed",
9382
                             instance, idx)
9383

    
9384
          feedback_fn("* running the instance OS create scripts...")
9385
          # FIXME: pass debug option from opcode to backend
9386
          os_add_result = \
9387
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9388
                                          self.op.debug_level)
9389
          if pause_sync:
9390
            feedback_fn("* resuming disk sync")
9391
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9392
                                                              iobj.disks, False)
9393
            for idx, success in enumerate(result.payload):
9394
              if not success:
9395
                logging.warn("resume-sync of instance %s for disk %d failed",
9396
                             instance, idx)
9397

    
9398
          os_add_result.Raise("Could not add os for instance %s"
9399
                              " on node %s" % (instance, pnode_name))
9400

    
9401
      elif self.op.mode == constants.INSTANCE_IMPORT:
9402
        feedback_fn("* running the instance OS import scripts...")
9403

    
9404
        transfers = []
9405

    
9406
        for idx, image in enumerate(self.src_images):
9407
          if not image:
9408
            continue
9409

    
9410
          # FIXME: pass debug option from opcode to backend
9411
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9412
                                             constants.IEIO_FILE, (image, ),
9413
                                             constants.IEIO_SCRIPT,
9414
                                             (iobj.disks[idx], idx),
9415
                                             None)
9416
          transfers.append(dt)
9417

    
9418
        import_result = \
9419
          masterd.instance.TransferInstanceData(self, feedback_fn,
9420
                                                self.op.src_node, pnode_name,
9421
                                                self.pnode.secondary_ip,
9422
                                                iobj, transfers)
9423
        if not compat.all(import_result):
9424
          self.LogWarning("Some disks for instance %s on node %s were not"
9425
                          " imported successfully" % (instance, pnode_name))
9426

    
9427
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9428
        feedback_fn("* preparing remote import...")
9429
        # The source cluster will stop the instance before attempting to make a
9430
        # connection. In some cases stopping an instance can take a long time,
9431
        # hence the shutdown timeout is added to the connection timeout.
9432
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9433
                           self.op.source_shutdown_timeout)
9434
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9435

    
9436
        assert iobj.primary_node == self.pnode.name
9437
        disk_results = \
9438
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9439
                                        self.source_x509_ca,
9440
                                        self._cds, timeouts)
9441
        if not compat.all(disk_results):
9442
          # TODO: Should the instance still be started, even if some disks
9443
          # failed to import (valid for local imports, too)?
9444
          self.LogWarning("Some disks for instance %s on node %s were not"
9445
                          " imported successfully" % (instance, pnode_name))
9446

    
9447
        # Run rename script on newly imported instance
9448
        assert iobj.name == instance
9449
        feedback_fn("Running rename script for %s" % instance)
9450
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9451
                                                   self.source_instance_name,
9452
                                                   self.op.debug_level)
9453
        if result.fail_msg:
9454
          self.LogWarning("Failed to run rename script for %s on node"
9455
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9456

    
9457
      else:
9458
        # also checked in the prereq part
9459
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9460
                                     % self.op.mode)
9461

    
9462
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9463

    
9464
    if self.op.start:
9465
      iobj.admin_state = constants.ADMINST_UP
9466
      self.cfg.Update(iobj, feedback_fn)
9467
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9468
      feedback_fn("* starting instance...")
9469
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9470
                                            False)
9471
      result.Raise("Could not start instance")
9472

    
9473
    return list(iobj.all_nodes)
9474

    
9475

    
9476
class LUInstanceConsole(NoHooksLU):
9477
  """Connect to an instance's console.
9478

9479
  This is somewhat special in that it returns the command line that
9480
  you need to run on the master node in order to connect to the
9481
  console.
9482

9483
  """
9484
  REQ_BGL = False
9485

    
9486
  def ExpandNames(self):
9487
    self.share_locks = _ShareAll()
9488
    self._ExpandAndLockInstance()
9489

    
9490
  def CheckPrereq(self):
9491
    """Check prerequisites.
9492

9493
    This checks that the instance is in the cluster.
9494

9495
    """
9496
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9497
    assert self.instance is not None, \
9498
      "Cannot retrieve locked instance %s" % self.op.instance_name
9499
    _CheckNodeOnline(self, self.instance.primary_node)
9500

    
9501
  def Exec(self, feedback_fn):
9502
    """Connect to the console of an instance
9503

9504
    """
9505
    instance = self.instance
9506
    node = instance.primary_node
9507

    
9508
    node_insts = self.rpc.call_instance_list([node],
9509
                                             [instance.hypervisor])[node]
9510
    node_insts.Raise("Can't get node information from %s" % node)
9511

    
9512
    if instance.name not in node_insts.payload:
9513
      if instance.admin_state == constants.ADMINST_UP:
9514
        state = constants.INSTST_ERRORDOWN
9515
      elif instance.admin_state == constants.ADMINST_DOWN:
9516
        state = constants.INSTST_ADMINDOWN
9517
      else:
9518
        state = constants.INSTST_ADMINOFFLINE
9519
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9520
                               (instance.name, state))
9521

    
9522
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9523

    
9524
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9525

    
9526

    
9527
def _GetInstanceConsole(cluster, instance):
9528
  """Returns console information for an instance.
9529

9530
  @type cluster: L{objects.Cluster}
9531
  @type instance: L{objects.Instance}
9532
  @rtype: dict
9533

9534
  """
9535
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9536
  # beparams and hvparams are passed separately, to avoid editing the
9537
  # instance and then saving the defaults in the instance itself.
9538
  hvparams = cluster.FillHV(instance)
9539
  beparams = cluster.FillBE(instance)
9540
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9541

    
9542
  assert console.instance == instance.name
9543
  assert console.Validate()
9544

    
9545
  return console.ToDict()
9546

    
9547

    
9548
class LUInstanceReplaceDisks(LogicalUnit):
9549
  """Replace the disks of an instance.
9550

9551
  """
9552
  HPATH = "mirrors-replace"
9553
  HTYPE = constants.HTYPE_INSTANCE
9554
  REQ_BGL = False
9555

    
9556
  def CheckArguments(self):
9557
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9558
                                  self.op.iallocator)
9559

    
9560
  def ExpandNames(self):
9561
    self._ExpandAndLockInstance()
9562

    
9563
    assert locking.LEVEL_NODE not in self.needed_locks
9564
    assert locking.LEVEL_NODE_RES not in self.needed_locks
9565
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9566

    
9567
    assert self.op.iallocator is None or self.op.remote_node is None, \
9568
      "Conflicting options"
9569

    
9570
    if self.op.remote_node is not None:
9571
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9572

    
9573
      # Warning: do not remove the locking of the new secondary here
9574
      # unless DRBD8.AddChildren is changed to work in parallel;
9575
      # currently it doesn't since parallel invocations of
9576
      # FindUnusedMinor will conflict
9577
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9578
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9579
    else:
9580
      self.needed_locks[locking.LEVEL_NODE] = []
9581
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9582

    
9583
      if self.op.iallocator is not None:
9584
        # iallocator will select a new node in the same group
9585
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9586

    
9587
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9588

    
9589
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9590
                                   self.op.iallocator, self.op.remote_node,
9591
                                   self.op.disks, False, self.op.early_release)
9592

    
9593
    self.tasklets = [self.replacer]
9594

    
9595
  def DeclareLocks(self, level):
9596
    if level == locking.LEVEL_NODEGROUP:
9597
      assert self.op.remote_node is None
9598
      assert self.op.iallocator is not None
9599
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9600

    
9601
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9602
      # Lock all groups used by instance optimistically; this requires going
9603
      # via the node before it's locked, requiring verification later on
9604
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9605
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9606

    
9607
    elif level == locking.LEVEL_NODE:
9608
      if self.op.iallocator is not None:
9609
        assert self.op.remote_node is None
9610
        assert not self.needed_locks[locking.LEVEL_NODE]
9611

    
9612
        # Lock member nodes of all locked groups
9613
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9614
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9615
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9616
      else:
9617
        self._LockInstancesNodes()
9618
    elif level == locking.LEVEL_NODE_RES:
9619
      # Reuse node locks
9620
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9621
        self.needed_locks[locking.LEVEL_NODE]
9622

    
9623
  def BuildHooksEnv(self):
9624
    """Build hooks env.
9625

9626
    This runs on the master, the primary and all the secondaries.
9627

9628
    """
9629
    instance = self.replacer.instance
9630
    env = {
9631
      "MODE": self.op.mode,
9632
      "NEW_SECONDARY": self.op.remote_node,
9633
      "OLD_SECONDARY": instance.secondary_nodes[0],
9634
      }
9635
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9636
    return env
9637

    
9638
  def BuildHooksNodes(self):
9639
    """Build hooks nodes.
9640

9641
    """
9642
    instance = self.replacer.instance
9643
    nl = [
9644
      self.cfg.GetMasterNode(),
9645
      instance.primary_node,
9646
      ]
9647
    if self.op.remote_node is not None:
9648
      nl.append(self.op.remote_node)
9649
    return nl, nl
9650

    
9651
  def CheckPrereq(self):
9652
    """Check prerequisites.
9653

9654
    """
9655
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9656
            self.op.iallocator is None)
9657

    
9658
    # Verify if node group locks are still correct
9659
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9660
    if owned_groups:
9661
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9662

    
9663
    return LogicalUnit.CheckPrereq(self)
9664

    
9665

    
9666
class TLReplaceDisks(Tasklet):
9667
  """Replaces disks for an instance.
9668

9669
  Note: Locking is not within the scope of this class.
9670

9671
  """
9672
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9673
               disks, delay_iallocator, early_release):
9674
    """Initializes this class.
9675

9676
    """
9677
    Tasklet.__init__(self, lu)
9678

    
9679
    # Parameters
9680
    self.instance_name = instance_name
9681
    self.mode = mode
9682
    self.iallocator_name = iallocator_name
9683
    self.remote_node = remote_node
9684
    self.disks = disks
9685
    self.delay_iallocator = delay_iallocator
9686
    self.early_release = early_release
9687

    
9688
    # Runtime data
9689
    self.instance = None
9690
    self.new_node = None
9691
    self.target_node = None
9692
    self.other_node = None
9693
    self.remote_node_info = None
9694
    self.node_secondary_ip = None
9695

    
9696
  @staticmethod
9697
  def CheckArguments(mode, remote_node, iallocator):
9698
    """Helper function for users of this class.
9699

9700
    """
9701
    # check for valid parameter combination
9702
    if mode == constants.REPLACE_DISK_CHG:
9703
      if remote_node is None and iallocator is None:
9704
        raise errors.OpPrereqError("When changing the secondary either an"
9705
                                   " iallocator script must be used or the"
9706
                                   " new node given", errors.ECODE_INVAL)
9707

    
9708
      if remote_node is not None and iallocator is not None:
9709
        raise errors.OpPrereqError("Give either the iallocator or the new"
9710
                                   " secondary, not both", errors.ECODE_INVAL)
9711

    
9712
    elif remote_node is not None or iallocator is not None:
9713
      # Not replacing the secondary
9714
      raise errors.OpPrereqError("The iallocator and new node options can"
9715
                                 " only be used when changing the"
9716
                                 " secondary node", errors.ECODE_INVAL)
9717

    
9718
  @staticmethod
9719
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9720
    """Compute a new secondary node using an IAllocator.
9721

9722
    """
9723
    ial = IAllocator(lu.cfg, lu.rpc,
9724
                     mode=constants.IALLOCATOR_MODE_RELOC,
9725
                     name=instance_name,
9726
                     relocate_from=list(relocate_from))
9727

    
9728
    ial.Run(iallocator_name)
9729

    
9730
    if not ial.success:
9731
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9732
                                 " %s" % (iallocator_name, ial.info),
9733
                                 errors.ECODE_NORES)
9734

    
9735
    if len(ial.result) != ial.required_nodes:
9736
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9737
                                 " of nodes (%s), required %s" %
9738
                                 (iallocator_name,
9739
                                  len(ial.result), ial.required_nodes),
9740
                                 errors.ECODE_FAULT)
9741

    
9742
    remote_node_name = ial.result[0]
9743

    
9744
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9745
               instance_name, remote_node_name)
9746

    
9747
    return remote_node_name
9748

    
9749
  def _FindFaultyDisks(self, node_name):
9750
    """Wrapper for L{_FindFaultyInstanceDisks}.
9751

9752
    """
9753
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9754
                                    node_name, True)
9755

    
9756
  def _CheckDisksActivated(self, instance):
9757
    """Checks if the instance disks are activated.
9758

9759
    @param instance: The instance to check disks
9760
    @return: True if they are activated, False otherwise
9761

9762
    """
9763
    nodes = instance.all_nodes
9764

    
9765
    for idx, dev in enumerate(instance.disks):
9766
      for node in nodes:
9767
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9768
        self.cfg.SetDiskID(dev, node)
9769

    
9770
        result = self.rpc.call_blockdev_find(node, dev)
9771

    
9772
        if result.offline:
9773
          continue
9774
        elif result.fail_msg or not result.payload:
9775
          return False
9776

    
9777
    return True
9778

    
9779
  def CheckPrereq(self):
9780
    """Check prerequisites.
9781

9782
    This checks that the instance is in the cluster.
9783

9784
    """
9785
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9786
    assert instance is not None, \
9787
      "Cannot retrieve locked instance %s" % self.instance_name
9788

    
9789
    if instance.disk_template != constants.DT_DRBD8:
9790
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9791
                                 " instances", errors.ECODE_INVAL)
9792

    
9793
    if len(instance.secondary_nodes) != 1:
9794
      raise errors.OpPrereqError("The instance has a strange layout,"
9795
                                 " expected one secondary but found %d" %
9796
                                 len(instance.secondary_nodes),
9797
                                 errors.ECODE_FAULT)
9798

    
9799
    if not self.delay_iallocator:
9800
      self._CheckPrereq2()
9801

    
9802
  def _CheckPrereq2(self):
9803
    """Check prerequisites, second part.
9804

9805
    This function should always be part of CheckPrereq. It was separated and is
9806
    now called from Exec because during node evacuation iallocator was only
9807
    called with an unmodified cluster model, not taking planned changes into
9808
    account.
9809

9810
    """
9811
    instance = self.instance
9812
    secondary_node = instance.secondary_nodes[0]
9813

    
9814
    if self.iallocator_name is None:
9815
      remote_node = self.remote_node
9816
    else:
9817
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9818
                                       instance.name, instance.secondary_nodes)
9819

    
9820
    if remote_node is None:
9821
      self.remote_node_info = None
9822
    else:
9823
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9824
             "Remote node '%s' is not locked" % remote_node
9825

    
9826
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9827
      assert self.remote_node_info is not None, \
9828
        "Cannot retrieve locked node %s" % remote_node
9829

    
9830
    if remote_node == self.instance.primary_node:
9831
      raise errors.OpPrereqError("The specified node is the primary node of"
9832
                                 " the instance", errors.ECODE_INVAL)
9833

    
9834
    if remote_node == secondary_node:
9835
      raise errors.OpPrereqError("The specified node is already the"
9836
                                 " secondary node of the instance",
9837
                                 errors.ECODE_INVAL)
9838

    
9839
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9840
                                    constants.REPLACE_DISK_CHG):
9841
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9842
                                 errors.ECODE_INVAL)
9843

    
9844
    if self.mode == constants.REPLACE_DISK_AUTO:
9845
      if not self._CheckDisksActivated(instance):
9846
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9847
                                   " first" % self.instance_name,
9848
                                   errors.ECODE_STATE)
9849
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9850
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9851

    
9852
      if faulty_primary and faulty_secondary:
9853
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9854
                                   " one node and can not be repaired"
9855
                                   " automatically" % self.instance_name,
9856
                                   errors.ECODE_STATE)
9857

    
9858
      if faulty_primary:
9859
        self.disks = faulty_primary
9860
        self.target_node = instance.primary_node
9861
        self.other_node = secondary_node
9862
        check_nodes = [self.target_node, self.other_node]
9863
      elif faulty_secondary:
9864
        self.disks = faulty_secondary
9865
        self.target_node = secondary_node
9866
        self.other_node = instance.primary_node
9867
        check_nodes = [self.target_node, self.other_node]
9868
      else:
9869
        self.disks = []
9870
        check_nodes = []
9871

    
9872
    else:
9873
      # Non-automatic modes
9874
      if self.mode == constants.REPLACE_DISK_PRI:
9875
        self.target_node = instance.primary_node
9876
        self.other_node = secondary_node
9877
        check_nodes = [self.target_node, self.other_node]
9878

    
9879
      elif self.mode == constants.REPLACE_DISK_SEC:
9880
        self.target_node = secondary_node
9881
        self.other_node = instance.primary_node
9882
        check_nodes = [self.target_node, self.other_node]
9883

    
9884
      elif self.mode == constants.REPLACE_DISK_CHG:
9885
        self.new_node = remote_node
9886
        self.other_node = instance.primary_node
9887
        self.target_node = secondary_node
9888
        check_nodes = [self.new_node, self.other_node]
9889

    
9890
        _CheckNodeNotDrained(self.lu, remote_node)
9891
        _CheckNodeVmCapable(self.lu, remote_node)
9892

    
9893
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9894
        assert old_node_info is not None
9895
        if old_node_info.offline and not self.early_release:
9896
          # doesn't make sense to delay the release
9897
          self.early_release = True
9898
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9899
                          " early-release mode", secondary_node)
9900

    
9901
      else:
9902
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9903
                                     self.mode)
9904

    
9905
      # If not specified all disks should be replaced
9906
      if not self.disks:
9907
        self.disks = range(len(self.instance.disks))
9908

    
9909
    for node in check_nodes:
9910
      _CheckNodeOnline(self.lu, node)
9911

    
9912
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9913
                                                          self.other_node,
9914
                                                          self.target_node]
9915
                              if node_name is not None)
9916

    
9917
    # Release unneeded node and node resource locks
9918
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9919
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
9920

    
9921
    # Release any owned node group
9922
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9923
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9924

    
9925
    # Check whether disks are valid
9926
    for disk_idx in self.disks:
9927
      instance.FindDisk(disk_idx)
9928

    
9929
    # Get secondary node IP addresses
9930
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9931
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9932

    
9933
  def Exec(self, feedback_fn):
9934
    """Execute disk replacement.
9935

9936
    This dispatches the disk replacement to the appropriate handler.
9937

9938
    """
9939
    if self.delay_iallocator:
9940
      self._CheckPrereq2()
9941

    
9942
    if __debug__:
9943
      # Verify owned locks before starting operation
9944
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9945
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9946
          ("Incorrect node locks, owning %s, expected %s" %
9947
           (owned_nodes, self.node_secondary_ip.keys()))
9948
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
9949
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
9950

    
9951
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9952
      assert list(owned_instances) == [self.instance_name], \
9953
          "Instance '%s' not locked" % self.instance_name
9954

    
9955
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9956
          "Should not own any node group lock at this point"
9957

    
9958
    if not self.disks:
9959
      feedback_fn("No disks need replacement")
9960
      return
9961

    
9962
    feedback_fn("Replacing disk(s) %s for %s" %
9963
                (utils.CommaJoin(self.disks), self.instance.name))
9964

    
9965
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
9966

    
9967
    # Activate the instance disks if we're replacing them on a down instance
9968
    if activate_disks:
9969
      _StartInstanceDisks(self.lu, self.instance, True)
9970

    
9971
    try:
9972
      # Should we replace the secondary node?
9973
      if self.new_node is not None:
9974
        fn = self._ExecDrbd8Secondary
9975
      else:
9976
        fn = self._ExecDrbd8DiskOnly
9977

    
9978
      result = fn(feedback_fn)
9979
    finally:
9980
      # Deactivate the instance disks if we're replacing them on a
9981
      # down instance
9982
      if activate_disks:
9983
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9984

    
9985
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
9986

    
9987
    if __debug__:
9988
      # Verify owned locks
9989
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
9990
      nodes = frozenset(self.node_secondary_ip)
9991
      assert ((self.early_release and not owned_nodes) or
9992
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9993
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9994
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9995

    
9996
    return result
9997

    
9998
  def _CheckVolumeGroup(self, nodes):
9999
    self.lu.LogInfo("Checking volume groups")
10000

    
10001
    vgname = self.cfg.GetVGName()
10002

    
10003
    # Make sure volume group exists on all involved nodes
10004
    results = self.rpc.call_vg_list(nodes)
10005
    if not results:
10006
      raise errors.OpExecError("Can't list volume groups on the nodes")
10007

    
10008
    for node in nodes:
10009
      res = results[node]
10010
      res.Raise("Error checking node %s" % node)
10011
      if vgname not in res.payload:
10012
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
10013
                                 (vgname, node))
10014

    
10015
  def _CheckDisksExistence(self, nodes):
10016
    # Check disk existence
10017
    for idx, dev in enumerate(self.instance.disks):
10018
      if idx not in self.disks:
10019
        continue
10020

    
10021
      for node in nodes:
10022
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10023
        self.cfg.SetDiskID(dev, node)
10024

    
10025
        result = self.rpc.call_blockdev_find(node, dev)
10026

    
10027
        msg = result.fail_msg
10028
        if msg or not result.payload:
10029
          if not msg:
10030
            msg = "disk not found"
10031
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10032
                                   (idx, node, msg))
10033

    
10034
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10035
    for idx, dev in enumerate(self.instance.disks):
10036
      if idx not in self.disks:
10037
        continue
10038

    
10039
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10040
                      (idx, node_name))
10041

    
10042
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10043
                                   ldisk=ldisk):
10044
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10045
                                 " replace disks for instance %s" %
10046
                                 (node_name, self.instance.name))
10047

    
10048
  def _CreateNewStorage(self, node_name):
10049
    """Create new storage on the primary or secondary node.
10050

10051
    This is only used for same-node replaces, not for changing the
10052
    secondary node, hence we don't want to modify the existing disk.
10053

10054
    """
10055
    iv_names = {}
10056

    
10057
    for idx, dev in enumerate(self.instance.disks):
10058
      if idx not in self.disks:
10059
        continue
10060

    
10061
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10062

    
10063
      self.cfg.SetDiskID(dev, node_name)
10064

    
10065
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10066
      names = _GenerateUniqueNames(self.lu, lv_names)
10067

    
10068
      vg_data = dev.children[0].logical_id[0]
10069
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10070
                             logical_id=(vg_data, names[0]))
10071
      vg_meta = dev.children[1].logical_id[0]
10072
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10073
                             logical_id=(vg_meta, names[1]))
10074

    
10075
      new_lvs = [lv_data, lv_meta]
10076
      old_lvs = [child.Copy() for child in dev.children]
10077
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10078

    
10079
      # we pass force_create=True to force the LVM creation
10080
      for new_lv in new_lvs:
10081
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10082
                        _GetInstanceInfoText(self.instance), False)
10083

    
10084
    return iv_names
10085

    
10086
  def _CheckDevices(self, node_name, iv_names):
10087
    for name, (dev, _, _) in iv_names.iteritems():
10088
      self.cfg.SetDiskID(dev, node_name)
10089

    
10090
      result = self.rpc.call_blockdev_find(node_name, dev)
10091

    
10092
      msg = result.fail_msg
10093
      if msg or not result.payload:
10094
        if not msg:
10095
          msg = "disk not found"
10096
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10097
                                 (name, msg))
10098

    
10099
      if result.payload.is_degraded:
10100
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10101

    
10102
  def _RemoveOldStorage(self, node_name, iv_names):
10103
    for name, (_, old_lvs, _) in iv_names.iteritems():
10104
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10105

    
10106
      for lv in old_lvs:
10107
        self.cfg.SetDiskID(lv, node_name)
10108

    
10109
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10110
        if msg:
10111
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10112
                             hint="remove unused LVs manually")
10113

    
10114
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10115
    """Replace a disk on the primary or secondary for DRBD 8.
10116

10117
    The algorithm for replace is quite complicated:
10118

10119
      1. for each disk to be replaced:
10120

10121
        1. create new LVs on the target node with unique names
10122
        1. detach old LVs from the drbd device
10123
        1. rename old LVs to name_replaced.<time_t>
10124
        1. rename new LVs to old LVs
10125
        1. attach the new LVs (with the old names now) to the drbd device
10126

10127
      1. wait for sync across all devices
10128

10129
      1. for each modified disk:
10130

10131
        1. remove old LVs (which have the name name_replaces.<time_t>)
10132

10133
    Failures are not very well handled.
10134

10135
    """
10136
    steps_total = 6
10137

    
10138
    # Step: check device activation
10139
    self.lu.LogStep(1, steps_total, "Check device existence")
10140
    self._CheckDisksExistence([self.other_node, self.target_node])
10141
    self._CheckVolumeGroup([self.target_node, self.other_node])
10142

    
10143
    # Step: check other node consistency
10144
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10145
    self._CheckDisksConsistency(self.other_node,
10146
                                self.other_node == self.instance.primary_node,
10147
                                False)
10148

    
10149
    # Step: create new storage
10150
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10151
    iv_names = self._CreateNewStorage(self.target_node)
10152

    
10153
    # Step: for each lv, detach+rename*2+attach
10154
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10155
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10156
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10157

    
10158
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10159
                                                     old_lvs)
10160
      result.Raise("Can't detach drbd from local storage on node"
10161
                   " %s for device %s" % (self.target_node, dev.iv_name))
10162
      #dev.children = []
10163
      #cfg.Update(instance)
10164

    
10165
      # ok, we created the new LVs, so now we know we have the needed
10166
      # storage; as such, we proceed on the target node to rename
10167
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10168
      # using the assumption that logical_id == physical_id (which in
10169
      # turn is the unique_id on that node)
10170

    
10171
      # FIXME(iustin): use a better name for the replaced LVs
10172
      temp_suffix = int(time.time())
10173
      ren_fn = lambda d, suff: (d.physical_id[0],
10174
                                d.physical_id[1] + "_replaced-%s" % suff)
10175

    
10176
      # Build the rename list based on what LVs exist on the node
10177
      rename_old_to_new = []
10178
      for to_ren in old_lvs:
10179
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10180
        if not result.fail_msg and result.payload:
10181
          # device exists
10182
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10183

    
10184
      self.lu.LogInfo("Renaming the old LVs on the target node")
10185
      result = self.rpc.call_blockdev_rename(self.target_node,
10186
                                             rename_old_to_new)
10187
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10188

    
10189
      # Now we rename the new LVs to the old LVs
10190
      self.lu.LogInfo("Renaming the new LVs on the target node")
10191
      rename_new_to_old = [(new, old.physical_id)
10192
                           for old, new in zip(old_lvs, new_lvs)]
10193
      result = self.rpc.call_blockdev_rename(self.target_node,
10194
                                             rename_new_to_old)
10195
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10196

    
10197
      # Intermediate steps of in memory modifications
10198
      for old, new in zip(old_lvs, new_lvs):
10199
        new.logical_id = old.logical_id
10200
        self.cfg.SetDiskID(new, self.target_node)
10201

    
10202
      # We need to modify old_lvs so that removal later removes the
10203
      # right LVs, not the newly added ones; note that old_lvs is a
10204
      # copy here
10205
      for disk in old_lvs:
10206
        disk.logical_id = ren_fn(disk, temp_suffix)
10207
        self.cfg.SetDiskID(disk, self.target_node)
10208

    
10209
      # Now that the new lvs have the old name, we can add them to the device
10210
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10211
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10212
                                                  new_lvs)
10213
      msg = result.fail_msg
10214
      if msg:
10215
        for new_lv in new_lvs:
10216
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10217
                                               new_lv).fail_msg
10218
          if msg2:
10219
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10220
                               hint=("cleanup manually the unused logical"
10221
                                     "volumes"))
10222
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10223

    
10224
    cstep = itertools.count(5)
10225

    
10226
    if self.early_release:
10227
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10228
      self._RemoveOldStorage(self.target_node, iv_names)
10229
      # TODO: Check if releasing locks early still makes sense
10230
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10231
    else:
10232
      # Release all resource locks except those used by the instance
10233
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10234
                    keep=self.node_secondary_ip.keys())
10235

    
10236
    # Release all node locks while waiting for sync
10237
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10238

    
10239
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10240
    # shutdown in the caller into consideration.
10241

    
10242
    # Wait for sync
10243
    # This can fail as the old devices are degraded and _WaitForSync
10244
    # does a combined result over all disks, so we don't check its return value
10245
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10246
    _WaitForSync(self.lu, self.instance)
10247

    
10248
    # Check all devices manually
10249
    self._CheckDevices(self.instance.primary_node, iv_names)
10250

    
10251
    # Step: remove old storage
10252
    if not self.early_release:
10253
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10254
      self._RemoveOldStorage(self.target_node, iv_names)
10255

    
10256
  def _ExecDrbd8Secondary(self, feedback_fn):
10257
    """Replace the secondary node for DRBD 8.
10258

10259
    The algorithm for replace is quite complicated:
10260
      - for all disks of the instance:
10261
        - create new LVs on the new node with same names
10262
        - shutdown the drbd device on the old secondary
10263
        - disconnect the drbd network on the primary
10264
        - create the drbd device on the new secondary
10265
        - network attach the drbd on the primary, using an artifice:
10266
          the drbd code for Attach() will connect to the network if it
10267
          finds a device which is connected to the good local disks but
10268
          not network enabled
10269
      - wait for sync across all devices
10270
      - remove all disks from the old secondary
10271

10272
    Failures are not very well handled.
10273

10274
    """
10275
    steps_total = 6
10276

    
10277
    pnode = self.instance.primary_node
10278

    
10279
    # Step: check device activation
10280
    self.lu.LogStep(1, steps_total, "Check device existence")
10281
    self._CheckDisksExistence([self.instance.primary_node])
10282
    self._CheckVolumeGroup([self.instance.primary_node])
10283

    
10284
    # Step: check other node consistency
10285
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10286
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10287

    
10288
    # Step: create new storage
10289
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10290
    for idx, dev in enumerate(self.instance.disks):
10291
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10292
                      (self.new_node, idx))
10293
      # we pass force_create=True to force LVM creation
10294
      for new_lv in dev.children:
10295
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10296
                        _GetInstanceInfoText(self.instance), False)
10297

    
10298
    # Step 4: dbrd minors and drbd setups changes
10299
    # after this, we must manually remove the drbd minors on both the
10300
    # error and the success paths
10301
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10302
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10303
                                         for dev in self.instance.disks],
10304
                                        self.instance.name)
10305
    logging.debug("Allocated minors %r", minors)
10306

    
10307
    iv_names = {}
10308
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10309
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10310
                      (self.new_node, idx))
10311
      # create new devices on new_node; note that we create two IDs:
10312
      # one without port, so the drbd will be activated without
10313
      # networking information on the new node at this stage, and one
10314
      # with network, for the latter activation in step 4
10315
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10316
      if self.instance.primary_node == o_node1:
10317
        p_minor = o_minor1
10318
      else:
10319
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10320
        p_minor = o_minor2
10321

    
10322
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10323
                      p_minor, new_minor, o_secret)
10324
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10325
                    p_minor, new_minor, o_secret)
10326

    
10327
      iv_names[idx] = (dev, dev.children, new_net_id)
10328
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10329
                    new_net_id)
10330
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10331
                              logical_id=new_alone_id,
10332
                              children=dev.children,
10333
                              size=dev.size)
10334
      try:
10335
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10336
                              _GetInstanceInfoText(self.instance), False)
10337
      except errors.GenericError:
10338
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10339
        raise
10340

    
10341
    # We have new devices, shutdown the drbd on the old secondary
10342
    for idx, dev in enumerate(self.instance.disks):
10343
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10344
      self.cfg.SetDiskID(dev, self.target_node)
10345
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10346
      if msg:
10347
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10348
                           "node: %s" % (idx, msg),
10349
                           hint=("Please cleanup this device manually as"
10350
                                 " soon as possible"))
10351

    
10352
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10353
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10354
                                               self.instance.disks)[pnode]
10355

    
10356
    msg = result.fail_msg
10357
    if msg:
10358
      # detaches didn't succeed (unlikely)
10359
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10360
      raise errors.OpExecError("Can't detach the disks from the network on"
10361
                               " old node: %s" % (msg,))
10362

    
10363
    # if we managed to detach at least one, we update all the disks of
10364
    # the instance to point to the new secondary
10365
    self.lu.LogInfo("Updating instance configuration")
10366
    for dev, _, new_logical_id in iv_names.itervalues():
10367
      dev.logical_id = new_logical_id
10368
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10369

    
10370
    self.cfg.Update(self.instance, feedback_fn)
10371

    
10372
    # Release all node locks (the configuration has been updated)
10373
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10374

    
10375
    # and now perform the drbd attach
10376
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10377
                    " (standalone => connected)")
10378
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10379
                                            self.new_node],
10380
                                           self.node_secondary_ip,
10381
                                           self.instance.disks,
10382
                                           self.instance.name,
10383
                                           False)
10384
    for to_node, to_result in result.items():
10385
      msg = to_result.fail_msg
10386
      if msg:
10387
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10388
                           to_node, msg,
10389
                           hint=("please do a gnt-instance info to see the"
10390
                                 " status of disks"))
10391

    
10392
    cstep = itertools.count(5)
10393

    
10394
    if self.early_release:
10395
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10396
      self._RemoveOldStorage(self.target_node, iv_names)
10397
      # TODO: Check if releasing locks early still makes sense
10398
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10399
    else:
10400
      # Release all resource locks except those used by the instance
10401
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10402
                    keep=self.node_secondary_ip.keys())
10403

    
10404
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10405
    # shutdown in the caller into consideration.
10406

    
10407
    # Wait for sync
10408
    # This can fail as the old devices are degraded and _WaitForSync
10409
    # does a combined result over all disks, so we don't check its return value
10410
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10411
    _WaitForSync(self.lu, self.instance)
10412

    
10413
    # Check all devices manually
10414
    self._CheckDevices(self.instance.primary_node, iv_names)
10415

    
10416
    # Step: remove old storage
10417
    if not self.early_release:
10418
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10419
      self._RemoveOldStorage(self.target_node, iv_names)
10420

    
10421

    
10422
class LURepairNodeStorage(NoHooksLU):
10423
  """Repairs the volume group on a node.
10424

10425
  """
10426
  REQ_BGL = False
10427

    
10428
  def CheckArguments(self):
10429
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10430

    
10431
    storage_type = self.op.storage_type
10432

    
10433
    if (constants.SO_FIX_CONSISTENCY not in
10434
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10435
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10436
                                 " repaired" % storage_type,
10437
                                 errors.ECODE_INVAL)
10438

    
10439
  def ExpandNames(self):
10440
    self.needed_locks = {
10441
      locking.LEVEL_NODE: [self.op.node_name],
10442
      }
10443

    
10444
  def _CheckFaultyDisks(self, instance, node_name):
10445
    """Ensure faulty disks abort the opcode or at least warn."""
10446
    try:
10447
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10448
                                  node_name, True):
10449
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10450
                                   " node '%s'" % (instance.name, node_name),
10451
                                   errors.ECODE_STATE)
10452
    except errors.OpPrereqError, err:
10453
      if self.op.ignore_consistency:
10454
        self.proc.LogWarning(str(err.args[0]))
10455
      else:
10456
        raise
10457

    
10458
  def CheckPrereq(self):
10459
    """Check prerequisites.
10460

10461
    """
10462
    # Check whether any instance on this node has faulty disks
10463
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10464
      if inst.admin_state != constants.ADMINST_UP:
10465
        continue
10466
      check_nodes = set(inst.all_nodes)
10467
      check_nodes.discard(self.op.node_name)
10468
      for inst_node_name in check_nodes:
10469
        self._CheckFaultyDisks(inst, inst_node_name)
10470

    
10471
  def Exec(self, feedback_fn):
10472
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10473
                (self.op.name, self.op.node_name))
10474

    
10475
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10476
    result = self.rpc.call_storage_execute(self.op.node_name,
10477
                                           self.op.storage_type, st_args,
10478
                                           self.op.name,
10479
                                           constants.SO_FIX_CONSISTENCY)
10480
    result.Raise("Failed to repair storage unit '%s' on %s" %
10481
                 (self.op.name, self.op.node_name))
10482

    
10483

    
10484
class LUNodeEvacuate(NoHooksLU):
10485
  """Evacuates instances off a list of nodes.
10486

10487
  """
10488
  REQ_BGL = False
10489

    
10490
  def CheckArguments(self):
10491
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10492

    
10493
  def ExpandNames(self):
10494
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10495

    
10496
    if self.op.remote_node is not None:
10497
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10498
      assert self.op.remote_node
10499

    
10500
      if self.op.remote_node == self.op.node_name:
10501
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10502
                                   " secondary node", errors.ECODE_INVAL)
10503

    
10504
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10505
        raise errors.OpPrereqError("Without the use of an iallocator only"
10506
                                   " secondary instances can be evacuated",
10507
                                   errors.ECODE_INVAL)
10508

    
10509
    # Declare locks
10510
    self.share_locks = _ShareAll()
10511
    self.needed_locks = {
10512
      locking.LEVEL_INSTANCE: [],
10513
      locking.LEVEL_NODEGROUP: [],
10514
      locking.LEVEL_NODE: [],
10515
      }
10516

    
10517
    if self.op.remote_node is None:
10518
      # Iallocator will choose any node(s) in the same group
10519
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10520
    else:
10521
      group_nodes = frozenset([self.op.remote_node])
10522

    
10523
    # Determine nodes to be locked
10524
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10525

    
10526
  def _DetermineInstances(self):
10527
    """Builds list of instances to operate on.
10528

10529
    """
10530
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10531

    
10532
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10533
      # Primary instances only
10534
      inst_fn = _GetNodePrimaryInstances
10535
      assert self.op.remote_node is None, \
10536
        "Evacuating primary instances requires iallocator"
10537
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10538
      # Secondary instances only
10539
      inst_fn = _GetNodeSecondaryInstances
10540
    else:
10541
      # All instances
10542
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10543
      inst_fn = _GetNodeInstances
10544

    
10545
    return inst_fn(self.cfg, self.op.node_name)
10546

    
10547
  def DeclareLocks(self, level):
10548
    if level == locking.LEVEL_INSTANCE:
10549
      # Lock instances optimistically, needs verification once node and group
10550
      # locks have been acquired
10551
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10552
        set(i.name for i in self._DetermineInstances())
10553

    
10554
    elif level == locking.LEVEL_NODEGROUP:
10555
      # Lock node groups optimistically, needs verification once nodes have
10556
      # been acquired
10557
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10558
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10559

    
10560
    elif level == locking.LEVEL_NODE:
10561
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10562

    
10563
  def CheckPrereq(self):
10564
    # Verify locks
10565
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10566
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10567
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10568

    
10569
    assert owned_nodes == self.lock_nodes
10570

    
10571
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10572
    if owned_groups != wanted_groups:
10573
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10574
                               " current groups are '%s', used to be '%s'" %
10575
                               (utils.CommaJoin(wanted_groups),
10576
                                utils.CommaJoin(owned_groups)))
10577

    
10578
    # Determine affected instances
10579
    self.instances = self._DetermineInstances()
10580
    self.instance_names = [i.name for i in self.instances]
10581

    
10582
    if set(self.instance_names) != owned_instances:
10583
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10584
                               " were acquired, current instances are '%s',"
10585
                               " used to be '%s'" %
10586
                               (self.op.node_name,
10587
                                utils.CommaJoin(self.instance_names),
10588
                                utils.CommaJoin(owned_instances)))
10589

    
10590
    if self.instance_names:
10591
      self.LogInfo("Evacuating instances from node '%s': %s",
10592
                   self.op.node_name,
10593
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10594
    else:
10595
      self.LogInfo("No instances to evacuate from node '%s'",
10596
                   self.op.node_name)
10597

    
10598
    if self.op.remote_node is not None:
10599
      for i in self.instances:
10600
        if i.primary_node == self.op.remote_node:
10601
          raise errors.OpPrereqError("Node %s is the primary node of"
10602
                                     " instance %s, cannot use it as"
10603
                                     " secondary" %
10604
                                     (self.op.remote_node, i.name),
10605
                                     errors.ECODE_INVAL)
10606

    
10607
  def Exec(self, feedback_fn):
10608
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10609

    
10610
    if not self.instance_names:
10611
      # No instances to evacuate
10612
      jobs = []
10613

    
10614
    elif self.op.iallocator is not None:
10615
      # TODO: Implement relocation to other group
10616
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10617
                       evac_mode=self.op.mode,
10618
                       instances=list(self.instance_names))
10619

    
10620
      ial.Run(self.op.iallocator)
10621

    
10622
      if not ial.success:
10623
        raise errors.OpPrereqError("Can't compute node evacuation using"
10624
                                   " iallocator '%s': %s" %
10625
                                   (self.op.iallocator, ial.info),
10626
                                   errors.ECODE_NORES)
10627

    
10628
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10629

    
10630
    elif self.op.remote_node is not None:
10631
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10632
      jobs = [
10633
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10634
                                        remote_node=self.op.remote_node,
10635
                                        disks=[],
10636
                                        mode=constants.REPLACE_DISK_CHG,
10637
                                        early_release=self.op.early_release)]
10638
        for instance_name in self.instance_names
10639
        ]
10640

    
10641
    else:
10642
      raise errors.ProgrammerError("No iallocator or remote node")
10643

    
10644
    return ResultWithJobs(jobs)
10645

    
10646

    
10647
def _SetOpEarlyRelease(early_release, op):
10648
  """Sets C{early_release} flag on opcodes if available.
10649

10650
  """
10651
  try:
10652
    op.early_release = early_release
10653
  except AttributeError:
10654
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10655

    
10656
  return op
10657

    
10658

    
10659
def _NodeEvacDest(use_nodes, group, nodes):
10660
  """Returns group or nodes depending on caller's choice.
10661

10662
  """
10663
  if use_nodes:
10664
    return utils.CommaJoin(nodes)
10665
  else:
10666
    return group
10667

    
10668

    
10669
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10670
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10671

10672
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10673
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10674

10675
  @type lu: L{LogicalUnit}
10676
  @param lu: Logical unit instance
10677
  @type alloc_result: tuple/list
10678
  @param alloc_result: Result from iallocator
10679
  @type early_release: bool
10680
  @param early_release: Whether to release locks early if possible
10681
  @type use_nodes: bool
10682
  @param use_nodes: Whether to display node names instead of groups
10683

10684
  """
10685
  (moved, failed, jobs) = alloc_result
10686

    
10687
  if failed:
10688
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10689
                                 for (name, reason) in failed)
10690
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10691
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10692

    
10693
  if moved:
10694
    lu.LogInfo("Instances to be moved: %s",
10695
               utils.CommaJoin("%s (to %s)" %
10696
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10697
                               for (name, group, nodes) in moved))
10698

    
10699
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10700
              map(opcodes.OpCode.LoadOpCode, ops))
10701
          for ops in jobs]
10702

    
10703

    
10704
class LUInstanceGrowDisk(LogicalUnit):
10705
  """Grow a disk of an instance.
10706

10707
  """
10708
  HPATH = "disk-grow"
10709
  HTYPE = constants.HTYPE_INSTANCE
10710
  REQ_BGL = False
10711

    
10712
  def ExpandNames(self):
10713
    self._ExpandAndLockInstance()
10714
    self.needed_locks[locking.LEVEL_NODE] = []
10715
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10716
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10717

    
10718
  def DeclareLocks(self, level):
10719
    if level == locking.LEVEL_NODE:
10720
      self._LockInstancesNodes()
10721
    elif level == locking.LEVEL_NODE_RES:
10722
      # Copy node locks
10723
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10724
        self.needed_locks[locking.LEVEL_NODE][:]
10725

    
10726
  def BuildHooksEnv(self):
10727
    """Build hooks env.
10728

10729
    This runs on the master, the primary and all the secondaries.
10730

10731
    """
10732
    env = {
10733
      "DISK": self.op.disk,
10734
      "AMOUNT": self.op.amount,
10735
      }
10736
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10737
    return env
10738

    
10739
  def BuildHooksNodes(self):
10740
    """Build hooks nodes.
10741

10742
    """
10743
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10744
    return (nl, nl)
10745

    
10746
  def CheckPrereq(self):
10747
    """Check prerequisites.
10748

10749
    This checks that the instance is in the cluster.
10750

10751
    """
10752
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10753
    assert instance is not None, \
10754
      "Cannot retrieve locked instance %s" % self.op.instance_name
10755
    nodenames = list(instance.all_nodes)
10756
    for node in nodenames:
10757
      _CheckNodeOnline(self, node)
10758

    
10759
    self.instance = instance
10760

    
10761
    if instance.disk_template not in constants.DTS_GROWABLE:
10762
      raise errors.OpPrereqError("Instance's disk layout does not support"
10763
                                 " growing", errors.ECODE_INVAL)
10764

    
10765
    self.disk = instance.FindDisk(self.op.disk)
10766

    
10767
    if instance.disk_template not in (constants.DT_FILE,
10768
                                      constants.DT_SHARED_FILE):
10769
      # TODO: check the free disk space for file, when that feature will be
10770
      # supported
10771
      _CheckNodesFreeDiskPerVG(self, nodenames,
10772
                               self.disk.ComputeGrowth(self.op.amount))
10773

    
10774
  def Exec(self, feedback_fn):
10775
    """Execute disk grow.
10776

10777
    """
10778
    instance = self.instance
10779
    disk = self.disk
10780

    
10781
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10782
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10783
            self.owned_locks(locking.LEVEL_NODE_RES))
10784

    
10785
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10786
    if not disks_ok:
10787
      raise errors.OpExecError("Cannot activate block device to grow")
10788

    
10789
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10790
                (self.op.disk, instance.name,
10791
                 utils.FormatUnit(self.op.amount, "h")))
10792

    
10793
    # First run all grow ops in dry-run mode
10794
    for node in instance.all_nodes:
10795
      self.cfg.SetDiskID(disk, node)
10796
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10797
      result.Raise("Grow request failed to node %s" % node)
10798

    
10799
    # We know that (as far as we can test) operations across different
10800
    # nodes will succeed, time to run it for real
10801
    for node in instance.all_nodes:
10802
      self.cfg.SetDiskID(disk, node)
10803
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10804
      result.Raise("Grow request failed to node %s" % node)
10805

    
10806
      # TODO: Rewrite code to work properly
10807
      # DRBD goes into sync mode for a short amount of time after executing the
10808
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10809
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10810
      # time is a work-around.
10811
      time.sleep(5)
10812

    
10813
    disk.RecordGrow(self.op.amount)
10814
    self.cfg.Update(instance, feedback_fn)
10815

    
10816
    # Changes have been recorded, release node lock
10817
    _ReleaseLocks(self, locking.LEVEL_NODE)
10818

    
10819
    # Downgrade lock while waiting for sync
10820
    self.glm.downgrade(locking.LEVEL_INSTANCE)
10821

    
10822
    if self.op.wait_for_sync:
10823
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10824
      if disk_abort:
10825
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10826
                             " status; please check the instance")
10827
      if instance.admin_state != constants.ADMINST_UP:
10828
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10829
    elif instance.admin_state != constants.ADMINST_UP:
10830
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10831
                           " not supposed to be running because no wait for"
10832
                           " sync mode was requested")
10833

    
10834
    assert self.owned_locks(locking.LEVEL_NODE_RES)
10835
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10836

    
10837

    
10838
class LUInstanceQueryData(NoHooksLU):
10839
  """Query runtime instance data.
10840

10841
  """
10842
  REQ_BGL = False
10843

    
10844
  def ExpandNames(self):
10845
    self.needed_locks = {}
10846

    
10847
    # Use locking if requested or when non-static information is wanted
10848
    if not (self.op.static or self.op.use_locking):
10849
      self.LogWarning("Non-static data requested, locks need to be acquired")
10850
      self.op.use_locking = True
10851

    
10852
    if self.op.instances or not self.op.use_locking:
10853
      # Expand instance names right here
10854
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10855
    else:
10856
      # Will use acquired locks
10857
      self.wanted_names = None
10858

    
10859
    if self.op.use_locking:
10860
      self.share_locks = _ShareAll()
10861

    
10862
      if self.wanted_names is None:
10863
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10864
      else:
10865
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10866

    
10867
      self.needed_locks[locking.LEVEL_NODE] = []
10868
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10869

    
10870
  def DeclareLocks(self, level):
10871
    if self.op.use_locking and level == locking.LEVEL_NODE:
10872
      self._LockInstancesNodes()
10873

    
10874
  def CheckPrereq(self):
10875
    """Check prerequisites.
10876

10877
    This only checks the optional instance list against the existing names.
10878

10879
    """
10880
    if self.wanted_names is None:
10881
      assert self.op.use_locking, "Locking was not used"
10882
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10883

    
10884
    self.wanted_instances = \
10885
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10886

    
10887
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10888
    """Returns the status of a block device
10889

10890
    """
10891
    if self.op.static or not node:
10892
      return None
10893

    
10894
    self.cfg.SetDiskID(dev, node)
10895

    
10896
    result = self.rpc.call_blockdev_find(node, dev)
10897
    if result.offline:
10898
      return None
10899

    
10900
    result.Raise("Can't compute disk status for %s" % instance_name)
10901

    
10902
    status = result.payload
10903
    if status is None:
10904
      return None
10905

    
10906
    return (status.dev_path, status.major, status.minor,
10907
            status.sync_percent, status.estimated_time,
10908
            status.is_degraded, status.ldisk_status)
10909

    
10910
  def _ComputeDiskStatus(self, instance, snode, dev):
10911
    """Compute block device status.
10912

10913
    """
10914
    if dev.dev_type in constants.LDS_DRBD:
10915
      # we change the snode then (otherwise we use the one passed in)
10916
      if dev.logical_id[0] == instance.primary_node:
10917
        snode = dev.logical_id[1]
10918
      else:
10919
        snode = dev.logical_id[0]
10920

    
10921
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10922
                                              instance.name, dev)
10923
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10924

    
10925
    if dev.children:
10926
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10927
                                        instance, snode),
10928
                         dev.children)
10929
    else:
10930
      dev_children = []
10931

    
10932
    return {
10933
      "iv_name": dev.iv_name,
10934
      "dev_type": dev.dev_type,
10935
      "logical_id": dev.logical_id,
10936
      "physical_id": dev.physical_id,
10937
      "pstatus": dev_pstatus,
10938
      "sstatus": dev_sstatus,
10939
      "children": dev_children,
10940
      "mode": dev.mode,
10941
      "size": dev.size,
10942
      }
10943

    
10944
  def Exec(self, feedback_fn):
10945
    """Gather and return data"""
10946
    result = {}
10947

    
10948
    cluster = self.cfg.GetClusterInfo()
10949

    
10950
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10951
                                          for i in self.wanted_instances)
10952
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10953
      if self.op.static or pnode.offline:
10954
        remote_state = None
10955
        if pnode.offline:
10956
          self.LogWarning("Primary node %s is marked offline, returning static"
10957
                          " information only for instance %s" %
10958
                          (pnode.name, instance.name))
10959
      else:
10960
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10961
                                                  instance.name,
10962
                                                  instance.hypervisor)
10963
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10964
        remote_info = remote_info.payload
10965
        if remote_info and "state" in remote_info:
10966
          remote_state = "up"
10967
        else:
10968
          if instance.admin_state == constants.ADMINST_UP:
10969
            remote_state = "down"
10970
          else:
10971
            remote_state = instance.admin_state
10972

    
10973
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10974
                  instance.disks)
10975

    
10976
      result[instance.name] = {
10977
        "name": instance.name,
10978
        "config_state": instance.admin_state,
10979
        "run_state": remote_state,
10980
        "pnode": instance.primary_node,
10981
        "snodes": instance.secondary_nodes,
10982
        "os": instance.os,
10983
        # this happens to be the same format used for hooks
10984
        "nics": _NICListToTuple(self, instance.nics),
10985
        "disk_template": instance.disk_template,
10986
        "disks": disks,
10987
        "hypervisor": instance.hypervisor,
10988
        "network_port": instance.network_port,
10989
        "hv_instance": instance.hvparams,
10990
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10991
        "be_instance": instance.beparams,
10992
        "be_actual": cluster.FillBE(instance),
10993
        "os_instance": instance.osparams,
10994
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10995
        "serial_no": instance.serial_no,
10996
        "mtime": instance.mtime,
10997
        "ctime": instance.ctime,
10998
        "uuid": instance.uuid,
10999
        }
11000

    
11001
    return result
11002

    
11003

    
11004
class LUInstanceSetParams(LogicalUnit):
11005
  """Modifies an instances's parameters.
11006

11007
  """
11008
  HPATH = "instance-modify"
11009
  HTYPE = constants.HTYPE_INSTANCE
11010
  REQ_BGL = False
11011

    
11012
  def CheckArguments(self):
11013
    if not (self.op.nics or self.op.disks or self.op.disk_template or
11014
            self.op.hvparams or self.op.beparams or self.op.os_name or
11015
            self.op.online_inst or self.op.offline_inst):
11016
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11017

    
11018
    if self.op.hvparams:
11019
      _CheckGlobalHvParams(self.op.hvparams)
11020

    
11021
    # Disk validation
11022
    disk_addremove = 0
11023
    for disk_op, disk_dict in self.op.disks:
11024
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11025
      if disk_op == constants.DDM_REMOVE:
11026
        disk_addremove += 1
11027
        continue
11028
      elif disk_op == constants.DDM_ADD:
11029
        disk_addremove += 1
11030
      else:
11031
        if not isinstance(disk_op, int):
11032
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11033
        if not isinstance(disk_dict, dict):
11034
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11035
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11036

    
11037
      if disk_op == constants.DDM_ADD:
11038
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11039
        if mode not in constants.DISK_ACCESS_SET:
11040
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11041
                                     errors.ECODE_INVAL)
11042
        size = disk_dict.get(constants.IDISK_SIZE, None)
11043
        if size is None:
11044
          raise errors.OpPrereqError("Required disk parameter size missing",
11045
                                     errors.ECODE_INVAL)
11046
        try:
11047
          size = int(size)
11048
        except (TypeError, ValueError), err:
11049
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11050
                                     str(err), errors.ECODE_INVAL)
11051
        disk_dict[constants.IDISK_SIZE] = size
11052
      else:
11053
        # modification of disk
11054
        if constants.IDISK_SIZE in disk_dict:
11055
          raise errors.OpPrereqError("Disk size change not possible, use"
11056
                                     " grow-disk", errors.ECODE_INVAL)
11057

    
11058
    if disk_addremove > 1:
11059
      raise errors.OpPrereqError("Only one disk add or remove operation"
11060
                                 " supported at a time", errors.ECODE_INVAL)
11061

    
11062
    if self.op.disks and self.op.disk_template is not None:
11063
      raise errors.OpPrereqError("Disk template conversion and other disk"
11064
                                 " changes not supported at the same time",
11065
                                 errors.ECODE_INVAL)
11066

    
11067
    if (self.op.disk_template and
11068
        self.op.disk_template in constants.DTS_INT_MIRROR and
11069
        self.op.remote_node is None):
11070
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
11071
                                 " one requires specifying a secondary node",
11072
                                 errors.ECODE_INVAL)
11073

    
11074
    # NIC validation
11075
    nic_addremove = 0
11076
    for nic_op, nic_dict in self.op.nics:
11077
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11078
      if nic_op == constants.DDM_REMOVE:
11079
        nic_addremove += 1
11080
        continue
11081
      elif nic_op == constants.DDM_ADD:
11082
        nic_addremove += 1
11083
      else:
11084
        if not isinstance(nic_op, int):
11085
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11086
        if not isinstance(nic_dict, dict):
11087
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11088
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11089

    
11090
      # nic_dict should be a dict
11091
      nic_ip = nic_dict.get(constants.INIC_IP, None)
11092
      if nic_ip is not None:
11093
        if nic_ip.lower() == constants.VALUE_NONE:
11094
          nic_dict[constants.INIC_IP] = None
11095
        else:
11096
          if not netutils.IPAddress.IsValid(nic_ip):
11097
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11098
                                       errors.ECODE_INVAL)
11099

    
11100
      nic_bridge = nic_dict.get("bridge", None)
11101
      nic_link = nic_dict.get(constants.INIC_LINK, None)
11102
      if nic_bridge and nic_link:
11103
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11104
                                   " at the same time", errors.ECODE_INVAL)
11105
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11106
        nic_dict["bridge"] = None
11107
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11108
        nic_dict[constants.INIC_LINK] = None
11109

    
11110
      if nic_op == constants.DDM_ADD:
11111
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11112
        if nic_mac is None:
11113
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11114

    
11115
      if constants.INIC_MAC in nic_dict:
11116
        nic_mac = nic_dict[constants.INIC_MAC]
11117
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11118
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11119

    
11120
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11121
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11122
                                     " modifying an existing nic",
11123
                                     errors.ECODE_INVAL)
11124

    
11125
    if nic_addremove > 1:
11126
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11127
                                 " supported at a time", errors.ECODE_INVAL)
11128

    
11129
  def ExpandNames(self):
11130
    self._ExpandAndLockInstance()
11131
    # Can't even acquire node locks in shared mode as upcoming changes in
11132
    # Ganeti 2.6 will start to modify the node object on disk conversion
11133
    self.needed_locks[locking.LEVEL_NODE] = []
11134
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11135
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11136

    
11137
  def DeclareLocks(self, level):
11138
    if level == locking.LEVEL_NODE:
11139
      self._LockInstancesNodes()
11140
      if self.op.disk_template and self.op.remote_node:
11141
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11142
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11143
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11144
      # Copy node locks
11145
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11146
        self.needed_locks[locking.LEVEL_NODE][:]
11147

    
11148
  def BuildHooksEnv(self):
11149
    """Build hooks env.
11150

11151
    This runs on the master, primary and secondaries.
11152

11153
    """
11154
    args = dict()
11155
    if constants.BE_MINMEM in self.be_new:
11156
      args["minmem"] = self.be_new[constants.BE_MINMEM]
11157
    if constants.BE_MAXMEM in self.be_new:
11158
      args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11159
    if constants.BE_VCPUS in self.be_new:
11160
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11161
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11162
    # information at all.
11163
    if self.op.nics:
11164
      args["nics"] = []
11165
      nic_override = dict(self.op.nics)
11166
      for idx, nic in enumerate(self.instance.nics):
11167
        if idx in nic_override:
11168
          this_nic_override = nic_override[idx]
11169
        else:
11170
          this_nic_override = {}
11171
        if constants.INIC_IP in this_nic_override:
11172
          ip = this_nic_override[constants.INIC_IP]
11173
        else:
11174
          ip = nic.ip
11175
        if constants.INIC_MAC in this_nic_override:
11176
          mac = this_nic_override[constants.INIC_MAC]
11177
        else:
11178
          mac = nic.mac
11179
        if idx in self.nic_pnew:
11180
          nicparams = self.nic_pnew[idx]
11181
        else:
11182
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11183
        mode = nicparams[constants.NIC_MODE]
11184
        link = nicparams[constants.NIC_LINK]
11185
        args["nics"].append((ip, mac, mode, link))
11186
      if constants.DDM_ADD in nic_override:
11187
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11188
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11189
        nicparams = self.nic_pnew[constants.DDM_ADD]
11190
        mode = nicparams[constants.NIC_MODE]
11191
        link = nicparams[constants.NIC_LINK]
11192
        args["nics"].append((ip, mac, mode, link))
11193
      elif constants.DDM_REMOVE in nic_override:
11194
        del args["nics"][-1]
11195

    
11196
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11197
    if self.op.disk_template:
11198
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11199

    
11200
    return env
11201

    
11202
  def BuildHooksNodes(self):
11203
    """Build hooks nodes.
11204

11205
    """
11206
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11207
    return (nl, nl)
11208

    
11209
  def CheckPrereq(self):
11210
    """Check prerequisites.
11211

11212
    This only checks the instance list against the existing names.
11213

11214
    """
11215
    # checking the new params on the primary/secondary nodes
11216

    
11217
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11218
    cluster = self.cluster = self.cfg.GetClusterInfo()
11219
    assert self.instance is not None, \
11220
      "Cannot retrieve locked instance %s" % self.op.instance_name
11221
    pnode = instance.primary_node
11222
    nodelist = list(instance.all_nodes)
11223

    
11224
    # OS change
11225
    if self.op.os_name and not self.op.force:
11226
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11227
                      self.op.force_variant)
11228
      instance_os = self.op.os_name
11229
    else:
11230
      instance_os = instance.os
11231

    
11232
    if self.op.disk_template:
11233
      if instance.disk_template == self.op.disk_template:
11234
        raise errors.OpPrereqError("Instance already has disk template %s" %
11235
                                   instance.disk_template, errors.ECODE_INVAL)
11236

    
11237
      if (instance.disk_template,
11238
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11239
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11240
                                   " %s to %s" % (instance.disk_template,
11241
                                                  self.op.disk_template),
11242
                                   errors.ECODE_INVAL)
11243
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11244
                          msg="cannot change disk template")
11245
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11246
        if self.op.remote_node == pnode:
11247
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11248
                                     " as the primary node of the instance" %
11249
                                     self.op.remote_node, errors.ECODE_STATE)
11250
        _CheckNodeOnline(self, self.op.remote_node)
11251
        _CheckNodeNotDrained(self, self.op.remote_node)
11252
        # FIXME: here we assume that the old instance type is DT_PLAIN
11253
        assert instance.disk_template == constants.DT_PLAIN
11254
        disks = [{constants.IDISK_SIZE: d.size,
11255
                  constants.IDISK_VG: d.logical_id[0]}
11256
                 for d in instance.disks]
11257
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11258
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11259

    
11260
    # hvparams processing
11261
    if self.op.hvparams:
11262
      hv_type = instance.hypervisor
11263
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11264
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11265
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11266

    
11267
      # local check
11268
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11269
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11270
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11271
      self.hv_inst = i_hvdict # the new dict (without defaults)
11272
    else:
11273
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11274
                                              instance.hvparams)
11275
      self.hv_new = self.hv_inst = {}
11276

    
11277
    # beparams processing
11278
    if self.op.beparams:
11279
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11280
                                   use_none=True)
11281
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11282
      be_new = cluster.SimpleFillBE(i_bedict)
11283
      self.be_proposed = self.be_new = be_new # the new actual values
11284
      self.be_inst = i_bedict # the new dict (without defaults)
11285
    else:
11286
      self.be_new = self.be_inst = {}
11287
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11288
    be_old = cluster.FillBE(instance)
11289

    
11290
    # CPU param validation -- checking every time a paramtere is
11291
    # changed to cover all cases where either CPU mask or vcpus have
11292
    # changed
11293
    if (constants.BE_VCPUS in self.be_proposed and
11294
        constants.HV_CPU_MASK in self.hv_proposed):
11295
      cpu_list = \
11296
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11297
      # Verify mask is consistent with number of vCPUs. Can skip this
11298
      # test if only 1 entry in the CPU mask, which means same mask
11299
      # is applied to all vCPUs.
11300
      if (len(cpu_list) > 1 and
11301
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11302
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11303
                                   " CPU mask [%s]" %
11304
                                   (self.be_proposed[constants.BE_VCPUS],
11305
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11306
                                   errors.ECODE_INVAL)
11307

    
11308
      # Only perform this test if a new CPU mask is given
11309
      if constants.HV_CPU_MASK in self.hv_new:
11310
        # Calculate the largest CPU number requested
11311
        max_requested_cpu = max(map(max, cpu_list))
11312
        # Check that all of the instance's nodes have enough physical CPUs to
11313
        # satisfy the requested CPU mask
11314
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11315
                                max_requested_cpu + 1, instance.hypervisor)
11316

    
11317
    # osparams processing
11318
    if self.op.osparams:
11319
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11320
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11321
      self.os_inst = i_osdict # the new dict (without defaults)
11322
    else:
11323
      self.os_inst = {}
11324

    
11325
    self.warn = []
11326

    
11327
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11328
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11329
      mem_check_list = [pnode]
11330
      if be_new[constants.BE_AUTO_BALANCE]:
11331
        # either we changed auto_balance to yes or it was from before
11332
        mem_check_list.extend(instance.secondary_nodes)
11333
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11334
                                                  instance.hypervisor)
11335
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11336
                                         instance.hypervisor)
11337
      pninfo = nodeinfo[pnode]
11338
      msg = pninfo.fail_msg
11339
      if msg:
11340
        # Assume the primary node is unreachable and go ahead
11341
        self.warn.append("Can't get info from primary node %s: %s" %
11342
                         (pnode, msg))
11343
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11344
        self.warn.append("Node data from primary node %s doesn't contain"
11345
                         " free memory information" % pnode)
11346
      elif instance_info.fail_msg:
11347
        self.warn.append("Can't get instance runtime information: %s" %
11348
                        instance_info.fail_msg)
11349
      else:
11350
        if instance_info.payload:
11351
          current_mem = int(instance_info.payload["memory"])
11352
        else:
11353
          # Assume instance not running
11354
          # (there is a slight race condition here, but it's not very probable,
11355
          # and we have no other way to check)
11356
          current_mem = 0
11357
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11358
                    pninfo.payload["memory_free"])
11359
        if miss_mem > 0:
11360
          raise errors.OpPrereqError("This change will prevent the instance"
11361
                                     " from starting, due to %d MB of memory"
11362
                                     " missing on its primary node" % miss_mem,
11363
                                     errors.ECODE_NORES)
11364

    
11365
      if be_new[constants.BE_AUTO_BALANCE]:
11366
        for node, nres in nodeinfo.items():
11367
          if node not in instance.secondary_nodes:
11368
            continue
11369
          nres.Raise("Can't get info from secondary node %s" % node,
11370
                     prereq=True, ecode=errors.ECODE_STATE)
11371
          if not isinstance(nres.payload.get("memory_free", None), int):
11372
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11373
                                       " memory information" % node,
11374
                                       errors.ECODE_STATE)
11375
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11376
            raise errors.OpPrereqError("This change will prevent the instance"
11377
                                       " from failover to its secondary node"
11378
                                       " %s, due to not enough memory" % node,
11379
                                       errors.ECODE_STATE)
11380

    
11381
    # NIC processing
11382
    self.nic_pnew = {}
11383
    self.nic_pinst = {}
11384
    for nic_op, nic_dict in self.op.nics:
11385
      if nic_op == constants.DDM_REMOVE:
11386
        if not instance.nics:
11387
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11388
                                     errors.ECODE_INVAL)
11389
        continue
11390
      if nic_op != constants.DDM_ADD:
11391
        # an existing nic
11392
        if not instance.nics:
11393
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11394
                                     " no NICs" % nic_op,
11395
                                     errors.ECODE_INVAL)
11396
        if nic_op < 0 or nic_op >= len(instance.nics):
11397
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11398
                                     " are 0 to %d" %
11399
                                     (nic_op, len(instance.nics) - 1),
11400
                                     errors.ECODE_INVAL)
11401
        old_nic_params = instance.nics[nic_op].nicparams
11402
        old_nic_ip = instance.nics[nic_op].ip
11403
      else:
11404
        old_nic_params = {}
11405
        old_nic_ip = None
11406

    
11407
      update_params_dict = dict([(key, nic_dict[key])
11408
                                 for key in constants.NICS_PARAMETERS
11409
                                 if key in nic_dict])
11410

    
11411
      if "bridge" in nic_dict:
11412
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11413

    
11414
      new_nic_params = _GetUpdatedParams(old_nic_params,
11415
                                         update_params_dict)
11416
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11417
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11418
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11419
      self.nic_pinst[nic_op] = new_nic_params
11420
      self.nic_pnew[nic_op] = new_filled_nic_params
11421
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11422

    
11423
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11424
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11425
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11426
        if msg:
11427
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11428
          if self.op.force:
11429
            self.warn.append(msg)
11430
          else:
11431
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11432
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11433
        if constants.INIC_IP in nic_dict:
11434
          nic_ip = nic_dict[constants.INIC_IP]
11435
        else:
11436
          nic_ip = old_nic_ip
11437
        if nic_ip is None:
11438
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11439
                                     " on a routed nic", errors.ECODE_INVAL)
11440
      if constants.INIC_MAC in nic_dict:
11441
        nic_mac = nic_dict[constants.INIC_MAC]
11442
        if nic_mac is None:
11443
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11444
                                     errors.ECODE_INVAL)
11445
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11446
          # otherwise generate the mac
11447
          nic_dict[constants.INIC_MAC] = \
11448
            self.cfg.GenerateMAC(self.proc.GetECId())
11449
        else:
11450
          # or validate/reserve the current one
11451
          try:
11452
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11453
          except errors.ReservationError:
11454
            raise errors.OpPrereqError("MAC address %s already in use"
11455
                                       " in cluster" % nic_mac,
11456
                                       errors.ECODE_NOTUNIQUE)
11457

    
11458
    # DISK processing
11459
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11460
      raise errors.OpPrereqError("Disk operations not supported for"
11461
                                 " diskless instances",
11462
                                 errors.ECODE_INVAL)
11463
    for disk_op, _ in self.op.disks:
11464
      if disk_op == constants.DDM_REMOVE:
11465
        if len(instance.disks) == 1:
11466
          raise errors.OpPrereqError("Cannot remove the last disk of"
11467
                                     " an instance", errors.ECODE_INVAL)
11468
        _CheckInstanceState(self, instance, INSTANCE_DOWN,
11469
                            msg="cannot remove disks")
11470

    
11471
      if (disk_op == constants.DDM_ADD and
11472
          len(instance.disks) >= constants.MAX_DISKS):
11473
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11474
                                   " add more" % constants.MAX_DISKS,
11475
                                   errors.ECODE_STATE)
11476
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11477
        # an existing disk
11478
        if disk_op < 0 or disk_op >= len(instance.disks):
11479
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11480
                                     " are 0 to %d" %
11481
                                     (disk_op, len(instance.disks)),
11482
                                     errors.ECODE_INVAL)
11483

    
11484
    # disabling the instance
11485
    if self.op.offline_inst:
11486
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11487
                          msg="cannot change instance state to offline")
11488

    
11489
    # enabling the instance
11490
    if self.op.online_inst:
11491
      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11492
                          msg="cannot make instance go online")
11493

    
11494
  def _ConvertPlainToDrbd(self, feedback_fn):
11495
    """Converts an instance from plain to drbd.
11496

11497
    """
11498
    feedback_fn("Converting template to drbd")
11499
    instance = self.instance
11500
    pnode = instance.primary_node
11501
    snode = self.op.remote_node
11502

    
11503
    assert instance.disk_template == constants.DT_PLAIN
11504

    
11505
    # create a fake disk info for _GenerateDiskTemplate
11506
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11507
                  constants.IDISK_VG: d.logical_id[0]}
11508
                 for d in instance.disks]
11509
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11510
                                      instance.name, pnode, [snode],
11511
                                      disk_info, None, None, 0, feedback_fn)
11512
    info = _GetInstanceInfoText(instance)
11513
    feedback_fn("Creating aditional volumes...")
11514
    # first, create the missing data and meta devices
11515
    for disk in new_disks:
11516
      # unfortunately this is... not too nice
11517
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11518
                            info, True)
11519
      for child in disk.children:
11520
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11521
    # at this stage, all new LVs have been created, we can rename the
11522
    # old ones
11523
    feedback_fn("Renaming original volumes...")
11524
    rename_list = [(o, n.children[0].logical_id)
11525
                   for (o, n) in zip(instance.disks, new_disks)]
11526
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11527
    result.Raise("Failed to rename original LVs")
11528

    
11529
    feedback_fn("Initializing DRBD devices...")
11530
    # all child devices are in place, we can now create the DRBD devices
11531
    for disk in new_disks:
11532
      for node in [pnode, snode]:
11533
        f_create = node == pnode
11534
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11535

    
11536
    # at this point, the instance has been modified
11537
    instance.disk_template = constants.DT_DRBD8
11538
    instance.disks = new_disks
11539
    self.cfg.Update(instance, feedback_fn)
11540

    
11541
    # Release node locks while waiting for sync
11542
    _ReleaseLocks(self, locking.LEVEL_NODE)
11543

    
11544
    # disks are created, waiting for sync
11545
    disk_abort = not _WaitForSync(self, instance,
11546
                                  oneshot=not self.op.wait_for_sync)
11547
    if disk_abort:
11548
      raise errors.OpExecError("There are some degraded disks for"
11549
                               " this instance, please cleanup manually")
11550

    
11551
    # Node resource locks will be released by caller
11552

    
11553
  def _ConvertDrbdToPlain(self, feedback_fn):
11554
    """Converts an instance from drbd to plain.
11555

11556
    """
11557
    instance = self.instance
11558

    
11559
    assert len(instance.secondary_nodes) == 1
11560
    assert instance.disk_template == constants.DT_DRBD8
11561

    
11562
    pnode = instance.primary_node
11563
    snode = instance.secondary_nodes[0]
11564
    feedback_fn("Converting template to plain")
11565

    
11566
    old_disks = instance.disks
11567
    new_disks = [d.children[0] for d in old_disks]
11568

    
11569
    # copy over size and mode
11570
    for parent, child in zip(old_disks, new_disks):
11571
      child.size = parent.size
11572
      child.mode = parent.mode
11573

    
11574
    # update instance structure
11575
    instance.disks = new_disks
11576
    instance.disk_template = constants.DT_PLAIN
11577
    self.cfg.Update(instance, feedback_fn)
11578

    
11579
    # Release locks in case removing disks takes a while
11580
    _ReleaseLocks(self, locking.LEVEL_NODE)
11581

    
11582
    feedback_fn("Removing volumes on the secondary node...")
11583
    for disk in old_disks:
11584
      self.cfg.SetDiskID(disk, snode)
11585
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11586
      if msg:
11587
        self.LogWarning("Could not remove block device %s on node %s,"
11588
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11589

    
11590
    feedback_fn("Removing unneeded volumes on the primary node...")
11591
    for idx, disk in enumerate(old_disks):
11592
      meta = disk.children[1]
11593
      self.cfg.SetDiskID(meta, pnode)
11594
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11595
      if msg:
11596
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11597
                        " continuing anyway: %s", idx, pnode, msg)
11598

    
11599
    # this is a DRBD disk, return its port to the pool
11600
    for disk in old_disks:
11601
      tcp_port = disk.logical_id[2]
11602
      self.cfg.AddTcpUdpPort(tcp_port)
11603

    
11604
    # Node resource locks will be released by caller
11605

    
11606
  def Exec(self, feedback_fn):
11607
    """Modifies an instance.
11608

11609
    All parameters take effect only at the next restart of the instance.
11610

11611
    """
11612
    # Process here the warnings from CheckPrereq, as we don't have a
11613
    # feedback_fn there.
11614
    for warn in self.warn:
11615
      feedback_fn("WARNING: %s" % warn)
11616

    
11617
    assert ((self.op.disk_template is None) ^
11618
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11619
      "Not owning any node resource locks"
11620

    
11621
    result = []
11622
    instance = self.instance
11623
    # disk changes
11624
    for disk_op, disk_dict in self.op.disks:
11625
      if disk_op == constants.DDM_REMOVE:
11626
        # remove the last disk
11627
        device = instance.disks.pop()
11628
        device_idx = len(instance.disks)
11629
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11630
          self.cfg.SetDiskID(disk, node)
11631
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11632
          if msg:
11633
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11634
                            " continuing anyway", device_idx, node, msg)
11635
        result.append(("disk/%d" % device_idx, "remove"))
11636

    
11637
        # if this is a DRBD disk, return its port to the pool
11638
        if device.dev_type in constants.LDS_DRBD:
11639
          tcp_port = device.logical_id[2]
11640
          self.cfg.AddTcpUdpPort(tcp_port)
11641
      elif disk_op == constants.DDM_ADD:
11642
        # add a new disk
11643
        if instance.disk_template in (constants.DT_FILE,
11644
                                        constants.DT_SHARED_FILE):
11645
          file_driver, file_path = instance.disks[0].logical_id
11646
          file_path = os.path.dirname(file_path)
11647
        else:
11648
          file_driver = file_path = None
11649
        disk_idx_base = len(instance.disks)
11650
        new_disk = _GenerateDiskTemplate(self,
11651
                                         instance.disk_template,
11652
                                         instance.name, instance.primary_node,
11653
                                         instance.secondary_nodes,
11654
                                         [disk_dict],
11655
                                         file_path,
11656
                                         file_driver,
11657
                                         disk_idx_base, feedback_fn)[0]
11658
        instance.disks.append(new_disk)
11659
        info = _GetInstanceInfoText(instance)
11660

    
11661
        logging.info("Creating volume %s for instance %s",
11662
                     new_disk.iv_name, instance.name)
11663
        # Note: this needs to be kept in sync with _CreateDisks
11664
        #HARDCODE
11665
        for node in instance.all_nodes:
11666
          f_create = node == instance.primary_node
11667
          try:
11668
            _CreateBlockDev(self, node, instance, new_disk,
11669
                            f_create, info, f_create)
11670
          except errors.OpExecError, err:
11671
            self.LogWarning("Failed to create volume %s (%s) on"
11672
                            " node %s: %s",
11673
                            new_disk.iv_name, new_disk, node, err)
11674
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11675
                       (new_disk.size, new_disk.mode)))
11676
      else:
11677
        # change a given disk
11678
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11679
        result.append(("disk.mode/%d" % disk_op,
11680
                       disk_dict[constants.IDISK_MODE]))
11681

    
11682
    if self.op.disk_template:
11683
      if __debug__:
11684
        check_nodes = set(instance.all_nodes)
11685
        if self.op.remote_node:
11686
          check_nodes.add(self.op.remote_node)
11687
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11688
          owned = self.owned_locks(level)
11689
          assert not (check_nodes - owned), \
11690
            ("Not owning the correct locks, owning %r, expected at least %r" %
11691
             (owned, check_nodes))
11692

    
11693
      r_shut = _ShutdownInstanceDisks(self, instance)
11694
      if not r_shut:
11695
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11696
                                 " proceed with disk template conversion")
11697
      mode = (instance.disk_template, self.op.disk_template)
11698
      try:
11699
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11700
      except:
11701
        self.cfg.ReleaseDRBDMinors(instance.name)
11702
        raise
11703
      result.append(("disk_template", self.op.disk_template))
11704

    
11705
      assert instance.disk_template == self.op.disk_template, \
11706
        ("Expected disk template '%s', found '%s'" %
11707
         (self.op.disk_template, instance.disk_template))
11708

    
11709
    # Release node and resource locks if there are any (they might already have
11710
    # been released during disk conversion)
11711
    _ReleaseLocks(self, locking.LEVEL_NODE)
11712
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11713

    
11714
    # NIC changes
11715
    for nic_op, nic_dict in self.op.nics:
11716
      if nic_op == constants.DDM_REMOVE:
11717
        # remove the last nic
11718
        del instance.nics[-1]
11719
        result.append(("nic.%d" % len(instance.nics), "remove"))
11720
      elif nic_op == constants.DDM_ADD:
11721
        # mac and bridge should be set, by now
11722
        mac = nic_dict[constants.INIC_MAC]
11723
        ip = nic_dict.get(constants.INIC_IP, None)
11724
        nicparams = self.nic_pinst[constants.DDM_ADD]
11725
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11726
        instance.nics.append(new_nic)
11727
        result.append(("nic.%d" % (len(instance.nics) - 1),
11728
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11729
                       (new_nic.mac, new_nic.ip,
11730
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11731
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11732
                       )))
11733
      else:
11734
        for key in (constants.INIC_MAC, constants.INIC_IP):
11735
          if key in nic_dict:
11736
            setattr(instance.nics[nic_op], key, nic_dict[key])
11737
        if nic_op in self.nic_pinst:
11738
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11739
        for key, val in nic_dict.iteritems():
11740
          result.append(("nic.%s/%d" % (key, nic_op), val))
11741

    
11742
    # hvparams changes
11743
    if self.op.hvparams:
11744
      instance.hvparams = self.hv_inst
11745
      for key, val in self.op.hvparams.iteritems():
11746
        result.append(("hv/%s" % key, val))
11747

    
11748
    # beparams changes
11749
    if self.op.beparams:
11750
      instance.beparams = self.be_inst
11751
      for key, val in self.op.beparams.iteritems():
11752
        result.append(("be/%s" % key, val))
11753

    
11754
    # OS change
11755
    if self.op.os_name:
11756
      instance.os = self.op.os_name
11757

    
11758
    # osparams changes
11759
    if self.op.osparams:
11760
      instance.osparams = self.os_inst
11761
      for key, val in self.op.osparams.iteritems():
11762
        result.append(("os/%s" % key, val))
11763

    
11764
    # online/offline instance
11765
    if self.op.online_inst:
11766
      self.cfg.MarkInstanceDown(instance.name)
11767
      result.append(("admin_state", constants.ADMINST_DOWN))
11768
    if self.op.offline_inst:
11769
      self.cfg.MarkInstanceOffline(instance.name)
11770
      result.append(("admin_state", constants.ADMINST_OFFLINE))
11771

    
11772
    self.cfg.Update(instance, feedback_fn)
11773

    
11774
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11775
                self.owned_locks(locking.LEVEL_NODE)), \
11776
      "All node locks should have been released by now"
11777

    
11778
    return result
11779

    
11780
  _DISK_CONVERSIONS = {
11781
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11782
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11783
    }
11784

    
11785

    
11786
class LUInstanceChangeGroup(LogicalUnit):
11787
  HPATH = "instance-change-group"
11788
  HTYPE = constants.HTYPE_INSTANCE
11789
  REQ_BGL = False
11790

    
11791
  def ExpandNames(self):
11792
    self.share_locks = _ShareAll()
11793
    self.needed_locks = {
11794
      locking.LEVEL_NODEGROUP: [],
11795
      locking.LEVEL_NODE: [],
11796
      }
11797

    
11798
    self._ExpandAndLockInstance()
11799

    
11800
    if self.op.target_groups:
11801
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11802
                                  self.op.target_groups)
11803
    else:
11804
      self.req_target_uuids = None
11805

    
11806
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11807

    
11808
  def DeclareLocks(self, level):
11809
    if level == locking.LEVEL_NODEGROUP:
11810
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11811

    
11812
      if self.req_target_uuids:
11813
        lock_groups = set(self.req_target_uuids)
11814

    
11815
        # Lock all groups used by instance optimistically; this requires going
11816
        # via the node before it's locked, requiring verification later on
11817
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11818
        lock_groups.update(instance_groups)
11819
      else:
11820
        # No target groups, need to lock all of them
11821
        lock_groups = locking.ALL_SET
11822

    
11823
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11824

    
11825
    elif level == locking.LEVEL_NODE:
11826
      if self.req_target_uuids:
11827
        # Lock all nodes used by instances
11828
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11829
        self._LockInstancesNodes()
11830

    
11831
        # Lock all nodes in all potential target groups
11832
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11833
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11834
        member_nodes = [node_name
11835
                        for group in lock_groups
11836
                        for node_name in self.cfg.GetNodeGroup(group).members]
11837
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11838
      else:
11839
        # Lock all nodes as all groups are potential targets
11840
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11841

    
11842
  def CheckPrereq(self):
11843
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11844
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11845
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11846

    
11847
    assert (self.req_target_uuids is None or
11848
            owned_groups.issuperset(self.req_target_uuids))
11849
    assert owned_instances == set([self.op.instance_name])
11850

    
11851
    # Get instance information
11852
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11853

    
11854
    # Check if node groups for locked instance are still correct
11855
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11856
      ("Instance %s's nodes changed while we kept the lock" %
11857
       self.op.instance_name)
11858

    
11859
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11860
                                           owned_groups)
11861

    
11862
    if self.req_target_uuids:
11863
      # User requested specific target groups
11864
      self.target_uuids = self.req_target_uuids
11865
    else:
11866
      # All groups except those used by the instance are potential targets
11867
      self.target_uuids = owned_groups - inst_groups
11868

    
11869
    conflicting_groups = self.target_uuids & inst_groups
11870
    if conflicting_groups:
11871
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11872
                                 " used by the instance '%s'" %
11873
                                 (utils.CommaJoin(conflicting_groups),
11874
                                  self.op.instance_name),
11875
                                 errors.ECODE_INVAL)
11876

    
11877
    if not self.target_uuids:
11878
      raise errors.OpPrereqError("There are no possible target groups",
11879
                                 errors.ECODE_INVAL)
11880

    
11881
  def BuildHooksEnv(self):
11882
    """Build hooks env.
11883

11884
    """
11885
    assert self.target_uuids
11886

    
11887
    env = {
11888
      "TARGET_GROUPS": " ".join(self.target_uuids),
11889
      }
11890

    
11891
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11892

    
11893
    return env
11894

    
11895
  def BuildHooksNodes(self):
11896
    """Build hooks nodes.
11897

11898
    """
11899
    mn = self.cfg.GetMasterNode()
11900
    return ([mn], [mn])
11901

    
11902
  def Exec(self, feedback_fn):
11903
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11904

    
11905
    assert instances == [self.op.instance_name], "Instance not locked"
11906

    
11907
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11908
                     instances=instances, target_groups=list(self.target_uuids))
11909

    
11910
    ial.Run(self.op.iallocator)
11911

    
11912
    if not ial.success:
11913
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11914
                                 " instance '%s' using iallocator '%s': %s" %
11915
                                 (self.op.instance_name, self.op.iallocator,
11916
                                  ial.info),
11917
                                 errors.ECODE_NORES)
11918

    
11919
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11920

    
11921
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11922
                 " instance '%s'", len(jobs), self.op.instance_name)
11923

    
11924
    return ResultWithJobs(jobs)
11925

    
11926

    
11927
class LUBackupQuery(NoHooksLU):
11928
  """Query the exports list
11929

11930
  """
11931
  REQ_BGL = False
11932

    
11933
  def ExpandNames(self):
11934
    self.needed_locks = {}
11935
    self.share_locks[locking.LEVEL_NODE] = 1
11936
    if not self.op.nodes:
11937
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11938
    else:
11939
      self.needed_locks[locking.LEVEL_NODE] = \
11940
        _GetWantedNodes(self, self.op.nodes)
11941

    
11942
  def Exec(self, feedback_fn):
11943
    """Compute the list of all the exported system images.
11944

11945
    @rtype: dict
11946
    @return: a dictionary with the structure node->(export-list)
11947
        where export-list is a list of the instances exported on
11948
        that node.
11949

11950
    """
11951
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11952
    rpcresult = self.rpc.call_export_list(self.nodes)
11953
    result = {}
11954
    for node in rpcresult:
11955
      if rpcresult[node].fail_msg:
11956
        result[node] = False
11957
      else:
11958
        result[node] = rpcresult[node].payload
11959

    
11960
    return result
11961

    
11962

    
11963
class LUBackupPrepare(NoHooksLU):
11964
  """Prepares an instance for an export and returns useful information.
11965

11966
  """
11967
  REQ_BGL = False
11968

    
11969
  def ExpandNames(self):
11970
    self._ExpandAndLockInstance()
11971

    
11972
  def CheckPrereq(self):
11973
    """Check prerequisites.
11974

11975
    """
11976
    instance_name = self.op.instance_name
11977

    
11978
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11979
    assert self.instance is not None, \
11980
          "Cannot retrieve locked instance %s" % self.op.instance_name
11981
    _CheckNodeOnline(self, self.instance.primary_node)
11982

    
11983
    self._cds = _GetClusterDomainSecret()
11984

    
11985
  def Exec(self, feedback_fn):
11986
    """Prepares an instance for an export.
11987

11988
    """
11989
    instance = self.instance
11990

    
11991
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11992
      salt = utils.GenerateSecret(8)
11993

    
11994
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11995
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11996
                                              constants.RIE_CERT_VALIDITY)
11997
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11998

    
11999
      (name, cert_pem) = result.payload
12000

    
12001
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12002
                                             cert_pem)
12003

    
12004
      return {
12005
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12006
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12007
                          salt),
12008
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12009
        }
12010

    
12011
    return None
12012

    
12013

    
12014
class LUBackupExport(LogicalUnit):
12015
  """Export an instance to an image in the cluster.
12016

12017
  """
12018
  HPATH = "instance-export"
12019
  HTYPE = constants.HTYPE_INSTANCE
12020
  REQ_BGL = False
12021

    
12022
  def CheckArguments(self):
12023
    """Check the arguments.
12024

12025
    """
12026
    self.x509_key_name = self.op.x509_key_name
12027
    self.dest_x509_ca_pem = self.op.destination_x509_ca
12028

    
12029
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12030
      if not self.x509_key_name:
12031
        raise errors.OpPrereqError("Missing X509 key name for encryption",
12032
                                   errors.ECODE_INVAL)
12033

    
12034
      if not self.dest_x509_ca_pem:
12035
        raise errors.OpPrereqError("Missing destination X509 CA",
12036
                                   errors.ECODE_INVAL)
12037

    
12038
  def ExpandNames(self):
12039
    self._ExpandAndLockInstance()
12040

    
12041
    # Lock all nodes for local exports
12042
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12043
      # FIXME: lock only instance primary and destination node
12044
      #
12045
      # Sad but true, for now we have do lock all nodes, as we don't know where
12046
      # the previous export might be, and in this LU we search for it and
12047
      # remove it from its current node. In the future we could fix this by:
12048
      #  - making a tasklet to search (share-lock all), then create the
12049
      #    new one, then one to remove, after
12050
      #  - removing the removal operation altogether
12051
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12052

    
12053
  def DeclareLocks(self, level):
12054
    """Last minute lock declaration."""
12055
    # All nodes are locked anyway, so nothing to do here.
12056

    
12057
  def BuildHooksEnv(self):
12058
    """Build hooks env.
12059

12060
    This will run on the master, primary node and target node.
12061

12062
    """
12063
    env = {
12064
      "EXPORT_MODE": self.op.mode,
12065
      "EXPORT_NODE": self.op.target_node,
12066
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12067
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12068
      # TODO: Generic function for boolean env variables
12069
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12070
      }
12071

    
12072
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12073

    
12074
    return env
12075

    
12076
  def BuildHooksNodes(self):
12077
    """Build hooks nodes.
12078

12079
    """
12080
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12081

    
12082
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12083
      nl.append(self.op.target_node)
12084

    
12085
    return (nl, nl)
12086

    
12087
  def CheckPrereq(self):
12088
    """Check prerequisites.
12089

12090
    This checks that the instance and node names are valid.
12091

12092
    """
12093
    instance_name = self.op.instance_name
12094

    
12095
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12096
    assert self.instance is not None, \
12097
          "Cannot retrieve locked instance %s" % self.op.instance_name
12098
    _CheckNodeOnline(self, self.instance.primary_node)
12099

    
12100
    if (self.op.remove_instance and
12101
        self.instance.admin_state == constants.ADMINST_UP and
12102
        not self.op.shutdown):
12103
      raise errors.OpPrereqError("Can not remove instance without shutting it"
12104
                                 " down before")
12105

    
12106
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12107
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12108
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12109
      assert self.dst_node is not None
12110

    
12111
      _CheckNodeOnline(self, self.dst_node.name)
12112
      _CheckNodeNotDrained(self, self.dst_node.name)
12113

    
12114
      self._cds = None
12115
      self.dest_disk_info = None
12116
      self.dest_x509_ca = None
12117

    
12118
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12119
      self.dst_node = None
12120

    
12121
      if len(self.op.target_node) != len(self.instance.disks):
12122
        raise errors.OpPrereqError(("Received destination information for %s"
12123
                                    " disks, but instance %s has %s disks") %
12124
                                   (len(self.op.target_node), instance_name,
12125
                                    len(self.instance.disks)),
12126
                                   errors.ECODE_INVAL)
12127

    
12128
      cds = _GetClusterDomainSecret()
12129

    
12130
      # Check X509 key name
12131
      try:
12132
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12133
      except (TypeError, ValueError), err:
12134
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12135

    
12136
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12137
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12138
                                   errors.ECODE_INVAL)
12139

    
12140
      # Load and verify CA
12141
      try:
12142
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12143
      except OpenSSL.crypto.Error, err:
12144
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12145
                                   (err, ), errors.ECODE_INVAL)
12146

    
12147
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12148
      if errcode is not None:
12149
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12150
                                   (msg, ), errors.ECODE_INVAL)
12151

    
12152
      self.dest_x509_ca = cert
12153

    
12154
      # Verify target information
12155
      disk_info = []
12156
      for idx, disk_data in enumerate(self.op.target_node):
12157
        try:
12158
          (host, port, magic) = \
12159
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12160
        except errors.GenericError, err:
12161
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12162
                                     (idx, err), errors.ECODE_INVAL)
12163

    
12164
        disk_info.append((host, port, magic))
12165

    
12166
      assert len(disk_info) == len(self.op.target_node)
12167
      self.dest_disk_info = disk_info
12168

    
12169
    else:
12170
      raise errors.ProgrammerError("Unhandled export mode %r" %
12171
                                   self.op.mode)
12172

    
12173
    # instance disk type verification
12174
    # TODO: Implement export support for file-based disks
12175
    for disk in self.instance.disks:
12176
      if disk.dev_type == constants.LD_FILE:
12177
        raise errors.OpPrereqError("Export not supported for instances with"
12178
                                   " file-based disks", errors.ECODE_INVAL)
12179

    
12180
  def _CleanupExports(self, feedback_fn):
12181
    """Removes exports of current instance from all other nodes.
12182

12183
    If an instance in a cluster with nodes A..D was exported to node C, its
12184
    exports will be removed from the nodes A, B and D.
12185

12186
    """
12187
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12188

    
12189
    nodelist = self.cfg.GetNodeList()
12190
    nodelist.remove(self.dst_node.name)
12191

    
12192
    # on one-node clusters nodelist will be empty after the removal
12193
    # if we proceed the backup would be removed because OpBackupQuery
12194
    # substitutes an empty list with the full cluster node list.
12195
    iname = self.instance.name
12196
    if nodelist:
12197
      feedback_fn("Removing old exports for instance %s" % iname)
12198
      exportlist = self.rpc.call_export_list(nodelist)
12199
      for node in exportlist:
12200
        if exportlist[node].fail_msg:
12201
          continue
12202
        if iname in exportlist[node].payload:
12203
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12204
          if msg:
12205
            self.LogWarning("Could not remove older export for instance %s"
12206
                            " on node %s: %s", iname, node, msg)
12207

    
12208
  def Exec(self, feedback_fn):
12209
    """Export an instance to an image in the cluster.
12210

12211
    """
12212
    assert self.op.mode in constants.EXPORT_MODES
12213

    
12214
    instance = self.instance
12215
    src_node = instance.primary_node
12216

    
12217
    if self.op.shutdown:
12218
      # shutdown the instance, but not the disks
12219
      feedback_fn("Shutting down instance %s" % instance.name)
12220
      result = self.rpc.call_instance_shutdown(src_node, instance,
12221
                                               self.op.shutdown_timeout)
12222
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12223
      result.Raise("Could not shutdown instance %s on"
12224
                   " node %s" % (instance.name, src_node))
12225

    
12226
    # set the disks ID correctly since call_instance_start needs the
12227
    # correct drbd minor to create the symlinks
12228
    for disk in instance.disks:
12229
      self.cfg.SetDiskID(disk, src_node)
12230

    
12231
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
12232

    
12233
    if activate_disks:
12234
      # Activate the instance disks if we'exporting a stopped instance
12235
      feedback_fn("Activating disks for %s" % instance.name)
12236
      _StartInstanceDisks(self, instance, None)
12237

    
12238
    try:
12239
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12240
                                                     instance)
12241

    
12242
      helper.CreateSnapshots()
12243
      try:
12244
        if (self.op.shutdown and
12245
            instance.admin_state == constants.ADMINST_UP and
12246
            not self.op.remove_instance):
12247
          assert not activate_disks
12248
          feedback_fn("Starting instance %s" % instance.name)
12249
          result = self.rpc.call_instance_start(src_node,
12250
                                                (instance, None, None), False)
12251
          msg = result.fail_msg
12252
          if msg:
12253
            feedback_fn("Failed to start instance: %s" % msg)
12254
            _ShutdownInstanceDisks(self, instance)
12255
            raise errors.OpExecError("Could not start instance: %s" % msg)
12256

    
12257
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12258
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12259
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12260
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12261
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12262

    
12263
          (key_name, _, _) = self.x509_key_name
12264

    
12265
          dest_ca_pem = \
12266
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12267
                                            self.dest_x509_ca)
12268

    
12269
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12270
                                                     key_name, dest_ca_pem,
12271
                                                     timeouts)
12272
      finally:
12273
        helper.Cleanup()
12274

    
12275
      # Check for backwards compatibility
12276
      assert len(dresults) == len(instance.disks)
12277
      assert compat.all(isinstance(i, bool) for i in dresults), \
12278
             "Not all results are boolean: %r" % dresults
12279

    
12280
    finally:
12281
      if activate_disks:
12282
        feedback_fn("Deactivating disks for %s" % instance.name)
12283
        _ShutdownInstanceDisks(self, instance)
12284

    
12285
    if not (compat.all(dresults) and fin_resu):
12286
      failures = []
12287
      if not fin_resu:
12288
        failures.append("export finalization")
12289
      if not compat.all(dresults):
12290
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12291
                               if not dsk)
12292
        failures.append("disk export: disk(s) %s" % fdsk)
12293

    
12294
      raise errors.OpExecError("Export failed, errors in %s" %
12295
                               utils.CommaJoin(failures))
12296

    
12297
    # At this point, the export was successful, we can cleanup/finish
12298

    
12299
    # Remove instance if requested
12300
    if self.op.remove_instance:
12301
      feedback_fn("Removing instance %s" % instance.name)
12302
      _RemoveInstance(self, feedback_fn, instance,
12303
                      self.op.ignore_remove_failures)
12304

    
12305
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12306
      self._CleanupExports(feedback_fn)
12307

    
12308
    return fin_resu, dresults
12309

    
12310

    
12311
class LUBackupRemove(NoHooksLU):
12312
  """Remove exports related to the named instance.
12313

12314
  """
12315
  REQ_BGL = False
12316

    
12317
  def ExpandNames(self):
12318
    self.needed_locks = {}
12319
    # We need all nodes to be locked in order for RemoveExport to work, but we
12320
    # don't need to lock the instance itself, as nothing will happen to it (and
12321
    # we can remove exports also for a removed instance)
12322
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12323

    
12324
  def Exec(self, feedback_fn):
12325
    """Remove any export.
12326

12327
    """
12328
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12329
    # If the instance was not found we'll try with the name that was passed in.
12330
    # This will only work if it was an FQDN, though.
12331
    fqdn_warn = False
12332
    if not instance_name:
12333
      fqdn_warn = True
12334
      instance_name = self.op.instance_name
12335

    
12336
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12337
    exportlist = self.rpc.call_export_list(locked_nodes)
12338
    found = False
12339
    for node in exportlist:
12340
      msg = exportlist[node].fail_msg
12341
      if msg:
12342
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12343
        continue
12344
      if instance_name in exportlist[node].payload:
12345
        found = True
12346
        result = self.rpc.call_export_remove(node, instance_name)
12347
        msg = result.fail_msg
12348
        if msg:
12349
          logging.error("Could not remove export for instance %s"
12350
                        " on node %s: %s", instance_name, node, msg)
12351

    
12352
    if fqdn_warn and not found:
12353
      feedback_fn("Export not found. If trying to remove an export belonging"
12354
                  " to a deleted instance please use its Fully Qualified"
12355
                  " Domain Name.")
12356

    
12357

    
12358
class LUGroupAdd(LogicalUnit):
12359
  """Logical unit for creating node groups.
12360

12361
  """
12362
  HPATH = "group-add"
12363
  HTYPE = constants.HTYPE_GROUP
12364
  REQ_BGL = False
12365

    
12366
  def ExpandNames(self):
12367
    # We need the new group's UUID here so that we can create and acquire the
12368
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12369
    # that it should not check whether the UUID exists in the configuration.
12370
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12371
    self.needed_locks = {}
12372
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12373

    
12374
  def CheckPrereq(self):
12375
    """Check prerequisites.
12376

12377
    This checks that the given group name is not an existing node group
12378
    already.
12379

12380
    """
12381
    try:
12382
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12383
    except errors.OpPrereqError:
12384
      pass
12385
    else:
12386
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12387
                                 " node group (UUID: %s)" %
12388
                                 (self.op.group_name, existing_uuid),
12389
                                 errors.ECODE_EXISTS)
12390

    
12391
    if self.op.ndparams:
12392
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12393

    
12394
  def BuildHooksEnv(self):
12395
    """Build hooks env.
12396

12397
    """
12398
    return {
12399
      "GROUP_NAME": self.op.group_name,
12400
      }
12401

    
12402
  def BuildHooksNodes(self):
12403
    """Build hooks nodes.
12404

12405
    """
12406
    mn = self.cfg.GetMasterNode()
12407
    return ([mn], [mn])
12408

    
12409
  def Exec(self, feedback_fn):
12410
    """Add the node group to the cluster.
12411

12412
    """
12413
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12414
                                  uuid=self.group_uuid,
12415
                                  alloc_policy=self.op.alloc_policy,
12416
                                  ndparams=self.op.ndparams)
12417

    
12418
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12419
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12420

    
12421

    
12422
class LUGroupAssignNodes(NoHooksLU):
12423
  """Logical unit for assigning nodes to groups.
12424

12425
  """
12426
  REQ_BGL = False
12427

    
12428
  def ExpandNames(self):
12429
    # These raise errors.OpPrereqError on their own:
12430
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12431
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12432

    
12433
    # We want to lock all the affected nodes and groups. We have readily
12434
    # available the list of nodes, and the *destination* group. To gather the
12435
    # list of "source" groups, we need to fetch node information later on.
12436
    self.needed_locks = {
12437
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12438
      locking.LEVEL_NODE: self.op.nodes,
12439
      }
12440

    
12441
  def DeclareLocks(self, level):
12442
    if level == locking.LEVEL_NODEGROUP:
12443
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12444

    
12445
      # Try to get all affected nodes' groups without having the group or node
12446
      # lock yet. Needs verification later in the code flow.
12447
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12448

    
12449
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12450

    
12451
  def CheckPrereq(self):
12452
    """Check prerequisites.
12453

12454
    """
12455
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12456
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12457
            frozenset(self.op.nodes))
12458

    
12459
    expected_locks = (set([self.group_uuid]) |
12460
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12461
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12462
    if actual_locks != expected_locks:
12463
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12464
                               " current groups are '%s', used to be '%s'" %
12465
                               (utils.CommaJoin(expected_locks),
12466
                                utils.CommaJoin(actual_locks)))
12467

    
12468
    self.node_data = self.cfg.GetAllNodesInfo()
12469
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12470
    instance_data = self.cfg.GetAllInstancesInfo()
12471

    
12472
    if self.group is None:
12473
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12474
                               (self.op.group_name, self.group_uuid))
12475

    
12476
    (new_splits, previous_splits) = \
12477
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12478
                                             for node in self.op.nodes],
12479
                                            self.node_data, instance_data)
12480

    
12481
    if new_splits:
12482
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12483

    
12484
      if not self.op.force:
12485
        raise errors.OpExecError("The following instances get split by this"
12486
                                 " change and --force was not given: %s" %
12487
                                 fmt_new_splits)
12488
      else:
12489
        self.LogWarning("This operation will split the following instances: %s",
12490
                        fmt_new_splits)
12491

    
12492
        if previous_splits:
12493
          self.LogWarning("In addition, these already-split instances continue"
12494
                          " to be split across groups: %s",
12495
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12496

    
12497
  def Exec(self, feedback_fn):
12498
    """Assign nodes to a new group.
12499

12500
    """
12501
    for node in self.op.nodes:
12502
      self.node_data[node].group = self.group_uuid
12503

    
12504
    # FIXME: Depends on side-effects of modifying the result of
12505
    # C{cfg.GetAllNodesInfo}
12506

    
12507
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12508

    
12509
  @staticmethod
12510
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12511
    """Check for split instances after a node assignment.
12512

12513
    This method considers a series of node assignments as an atomic operation,
12514
    and returns information about split instances after applying the set of
12515
    changes.
12516

12517
    In particular, it returns information about newly split instances, and
12518
    instances that were already split, and remain so after the change.
12519

12520
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12521
    considered.
12522

12523
    @type changes: list of (node_name, new_group_uuid) pairs.
12524
    @param changes: list of node assignments to consider.
12525
    @param node_data: a dict with data for all nodes
12526
    @param instance_data: a dict with all instances to consider
12527
    @rtype: a two-tuple
12528
    @return: a list of instances that were previously okay and result split as a
12529
      consequence of this change, and a list of instances that were previously
12530
      split and this change does not fix.
12531

12532
    """
12533
    changed_nodes = dict((node, group) for node, group in changes
12534
                         if node_data[node].group != group)
12535

    
12536
    all_split_instances = set()
12537
    previously_split_instances = set()
12538

    
12539
    def InstanceNodes(instance):
12540
      return [instance.primary_node] + list(instance.secondary_nodes)
12541

    
12542
    for inst in instance_data.values():
12543
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12544
        continue
12545

    
12546
      instance_nodes = InstanceNodes(inst)
12547

    
12548
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12549
        previously_split_instances.add(inst.name)
12550

    
12551
      if len(set(changed_nodes.get(node, node_data[node].group)
12552
                 for node in instance_nodes)) > 1:
12553
        all_split_instances.add(inst.name)
12554

    
12555
    return (list(all_split_instances - previously_split_instances),
12556
            list(previously_split_instances & all_split_instances))
12557

    
12558

    
12559
class _GroupQuery(_QueryBase):
12560
  FIELDS = query.GROUP_FIELDS
12561

    
12562
  def ExpandNames(self, lu):
12563
    lu.needed_locks = {}
12564

    
12565
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12566
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12567

    
12568
    if not self.names:
12569
      self.wanted = [name_to_uuid[name]
12570
                     for name in utils.NiceSort(name_to_uuid.keys())]
12571
    else:
12572
      # Accept names to be either names or UUIDs.
12573
      missing = []
12574
      self.wanted = []
12575
      all_uuid = frozenset(self._all_groups.keys())
12576

    
12577
      for name in self.names:
12578
        if name in all_uuid:
12579
          self.wanted.append(name)
12580
        elif name in name_to_uuid:
12581
          self.wanted.append(name_to_uuid[name])
12582
        else:
12583
          missing.append(name)
12584

    
12585
      if missing:
12586
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12587
                                   utils.CommaJoin(missing),
12588
                                   errors.ECODE_NOENT)
12589

    
12590
  def DeclareLocks(self, lu, level):
12591
    pass
12592

    
12593
  def _GetQueryData(self, lu):
12594
    """Computes the list of node groups and their attributes.
12595

12596
    """
12597
    do_nodes = query.GQ_NODE in self.requested_data
12598
    do_instances = query.GQ_INST in self.requested_data
12599

    
12600
    group_to_nodes = None
12601
    group_to_instances = None
12602

    
12603
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12604
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12605
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12606
    # instance->node. Hence, we will need to process nodes even if we only need
12607
    # instance information.
12608
    if do_nodes or do_instances:
12609
      all_nodes = lu.cfg.GetAllNodesInfo()
12610
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12611
      node_to_group = {}
12612

    
12613
      for node in all_nodes.values():
12614
        if node.group in group_to_nodes:
12615
          group_to_nodes[node.group].append(node.name)
12616
          node_to_group[node.name] = node.group
12617

    
12618
      if do_instances:
12619
        all_instances = lu.cfg.GetAllInstancesInfo()
12620
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12621

    
12622
        for instance in all_instances.values():
12623
          node = instance.primary_node
12624
          if node in node_to_group:
12625
            group_to_instances[node_to_group[node]].append(instance.name)
12626

    
12627
        if not do_nodes:
12628
          # Do not pass on node information if it was not requested.
12629
          group_to_nodes = None
12630

    
12631
    return query.GroupQueryData([self._all_groups[uuid]
12632
                                 for uuid in self.wanted],
12633
                                group_to_nodes, group_to_instances)
12634

    
12635

    
12636
class LUGroupQuery(NoHooksLU):
12637
  """Logical unit for querying node groups.
12638

12639
  """
12640
  REQ_BGL = False
12641

    
12642
  def CheckArguments(self):
12643
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12644
                          self.op.output_fields, False)
12645

    
12646
  def ExpandNames(self):
12647
    self.gq.ExpandNames(self)
12648

    
12649
  def DeclareLocks(self, level):
12650
    self.gq.DeclareLocks(self, level)
12651

    
12652
  def Exec(self, feedback_fn):
12653
    return self.gq.OldStyleQuery(self)
12654

    
12655

    
12656
class LUGroupSetParams(LogicalUnit):
12657
  """Modifies the parameters of a node group.
12658

12659
  """
12660
  HPATH = "group-modify"
12661
  HTYPE = constants.HTYPE_GROUP
12662
  REQ_BGL = False
12663

    
12664
  def CheckArguments(self):
12665
    all_changes = [
12666
      self.op.ndparams,
12667
      self.op.alloc_policy,
12668
      ]
12669

    
12670
    if all_changes.count(None) == len(all_changes):
12671
      raise errors.OpPrereqError("Please pass at least one modification",
12672
                                 errors.ECODE_INVAL)
12673

    
12674
  def ExpandNames(self):
12675
    # This raises errors.OpPrereqError on its own:
12676
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12677

    
12678
    self.needed_locks = {
12679
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12680
      }
12681

    
12682
  def CheckPrereq(self):
12683
    """Check prerequisites.
12684

12685
    """
12686
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12687

    
12688
    if self.group is None:
12689
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12690
                               (self.op.group_name, self.group_uuid))
12691

    
12692
    if self.op.ndparams:
12693
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12694
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12695
      self.new_ndparams = new_ndparams
12696

    
12697
  def BuildHooksEnv(self):
12698
    """Build hooks env.
12699

12700
    """
12701
    return {
12702
      "GROUP_NAME": self.op.group_name,
12703
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12704
      }
12705

    
12706
  def BuildHooksNodes(self):
12707
    """Build hooks nodes.
12708

12709
    """
12710
    mn = self.cfg.GetMasterNode()
12711
    return ([mn], [mn])
12712

    
12713
  def Exec(self, feedback_fn):
12714
    """Modifies the node group.
12715

12716
    """
12717
    result = []
12718

    
12719
    if self.op.ndparams:
12720
      self.group.ndparams = self.new_ndparams
12721
      result.append(("ndparams", str(self.group.ndparams)))
12722

    
12723
    if self.op.alloc_policy:
12724
      self.group.alloc_policy = self.op.alloc_policy
12725

    
12726
    self.cfg.Update(self.group, feedback_fn)
12727
    return result
12728

    
12729

    
12730
class LUGroupRemove(LogicalUnit):
12731
  HPATH = "group-remove"
12732
  HTYPE = constants.HTYPE_GROUP
12733
  REQ_BGL = False
12734

    
12735
  def ExpandNames(self):
12736
    # This will raises errors.OpPrereqError on its own:
12737
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12738
    self.needed_locks = {
12739
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12740
      }
12741

    
12742
  def CheckPrereq(self):
12743
    """Check prerequisites.
12744

12745
    This checks that the given group name exists as a node group, that is
12746
    empty (i.e., contains no nodes), and that is not the last group of the
12747
    cluster.
12748

12749
    """
12750
    # Verify that the group is empty.
12751
    group_nodes = [node.name
12752
                   for node in self.cfg.GetAllNodesInfo().values()
12753
                   if node.group == self.group_uuid]
12754

    
12755
    if group_nodes:
12756
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12757
                                 " nodes: %s" %
12758
                                 (self.op.group_name,
12759
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12760
                                 errors.ECODE_STATE)
12761

    
12762
    # Verify the cluster would not be left group-less.
12763
    if len(self.cfg.GetNodeGroupList()) == 1:
12764
      raise errors.OpPrereqError("Group '%s' is the only group,"
12765
                                 " cannot be removed" %
12766
                                 self.op.group_name,
12767
                                 errors.ECODE_STATE)
12768

    
12769
  def BuildHooksEnv(self):
12770
    """Build hooks env.
12771

12772
    """
12773
    return {
12774
      "GROUP_NAME": self.op.group_name,
12775
      }
12776

    
12777
  def BuildHooksNodes(self):
12778
    """Build hooks nodes.
12779

12780
    """
12781
    mn = self.cfg.GetMasterNode()
12782
    return ([mn], [mn])
12783

    
12784
  def Exec(self, feedback_fn):
12785
    """Remove the node group.
12786

12787
    """
12788
    try:
12789
      self.cfg.RemoveNodeGroup(self.group_uuid)
12790
    except errors.ConfigurationError:
12791
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12792
                               (self.op.group_name, self.group_uuid))
12793

    
12794
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12795

    
12796

    
12797
class LUGroupRename(LogicalUnit):
12798
  HPATH = "group-rename"
12799
  HTYPE = constants.HTYPE_GROUP
12800
  REQ_BGL = False
12801

    
12802
  def ExpandNames(self):
12803
    # This raises errors.OpPrereqError on its own:
12804
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12805

    
12806
    self.needed_locks = {
12807
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12808
      }
12809

    
12810
  def CheckPrereq(self):
12811
    """Check prerequisites.
12812

12813
    Ensures requested new name is not yet used.
12814

12815
    """
12816
    try:
12817
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12818
    except errors.OpPrereqError:
12819
      pass
12820
    else:
12821
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12822
                                 " node group (UUID: %s)" %
12823
                                 (self.op.new_name, new_name_uuid),
12824
                                 errors.ECODE_EXISTS)
12825

    
12826
  def BuildHooksEnv(self):
12827
    """Build hooks env.
12828

12829
    """
12830
    return {
12831
      "OLD_NAME": self.op.group_name,
12832
      "NEW_NAME": self.op.new_name,
12833
      }
12834

    
12835
  def BuildHooksNodes(self):
12836
    """Build hooks nodes.
12837

12838
    """
12839
    mn = self.cfg.GetMasterNode()
12840

    
12841
    all_nodes = self.cfg.GetAllNodesInfo()
12842
    all_nodes.pop(mn, None)
12843

    
12844
    run_nodes = [mn]
12845
    run_nodes.extend(node.name for node in all_nodes.values()
12846
                     if node.group == self.group_uuid)
12847

    
12848
    return (run_nodes, run_nodes)
12849

    
12850
  def Exec(self, feedback_fn):
12851
    """Rename the node group.
12852

12853
    """
12854
    group = self.cfg.GetNodeGroup(self.group_uuid)
12855

    
12856
    if group is None:
12857
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12858
                               (self.op.group_name, self.group_uuid))
12859

    
12860
    group.name = self.op.new_name
12861
    self.cfg.Update(group, feedback_fn)
12862

    
12863
    return self.op.new_name
12864

    
12865

    
12866
class LUGroupEvacuate(LogicalUnit):
12867
  HPATH = "group-evacuate"
12868
  HTYPE = constants.HTYPE_GROUP
12869
  REQ_BGL = False
12870

    
12871
  def ExpandNames(self):
12872
    # This raises errors.OpPrereqError on its own:
12873
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12874

    
12875
    if self.op.target_groups:
12876
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12877
                                  self.op.target_groups)
12878
    else:
12879
      self.req_target_uuids = []
12880

    
12881
    if self.group_uuid in self.req_target_uuids:
12882
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12883
                                 " as a target group (targets are %s)" %
12884
                                 (self.group_uuid,
12885
                                  utils.CommaJoin(self.req_target_uuids)),
12886
                                 errors.ECODE_INVAL)
12887

    
12888
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12889

    
12890
    self.share_locks = _ShareAll()
12891
    self.needed_locks = {
12892
      locking.LEVEL_INSTANCE: [],
12893
      locking.LEVEL_NODEGROUP: [],
12894
      locking.LEVEL_NODE: [],
12895
      }
12896

    
12897
  def DeclareLocks(self, level):
12898
    if level == locking.LEVEL_INSTANCE:
12899
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12900

    
12901
      # Lock instances optimistically, needs verification once node and group
12902
      # locks have been acquired
12903
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12904
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12905

    
12906
    elif level == locking.LEVEL_NODEGROUP:
12907
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12908

    
12909
      if self.req_target_uuids:
12910
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12911

    
12912
        # Lock all groups used by instances optimistically; this requires going
12913
        # via the node before it's locked, requiring verification later on
12914
        lock_groups.update(group_uuid
12915
                           for instance_name in
12916
                             self.owned_locks(locking.LEVEL_INSTANCE)
12917
                           for group_uuid in
12918
                             self.cfg.GetInstanceNodeGroups(instance_name))
12919
      else:
12920
        # No target groups, need to lock all of them
12921
        lock_groups = locking.ALL_SET
12922

    
12923
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12924

    
12925
    elif level == locking.LEVEL_NODE:
12926
      # This will only lock the nodes in the group to be evacuated which
12927
      # contain actual instances
12928
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12929
      self._LockInstancesNodes()
12930

    
12931
      # Lock all nodes in group to be evacuated and target groups
12932
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12933
      assert self.group_uuid in owned_groups
12934
      member_nodes = [node_name
12935
                      for group in owned_groups
12936
                      for node_name in self.cfg.GetNodeGroup(group).members]
12937
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12938

    
12939
  def CheckPrereq(self):
12940
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12941
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12942
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12943

    
12944
    assert owned_groups.issuperset(self.req_target_uuids)
12945
    assert self.group_uuid in owned_groups
12946

    
12947
    # Check if locked instances are still correct
12948
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12949

    
12950
    # Get instance information
12951
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12952

    
12953
    # Check if node groups for locked instances are still correct
12954
    for instance_name in owned_instances:
12955
      inst = self.instances[instance_name]
12956
      assert owned_nodes.issuperset(inst.all_nodes), \
12957
        "Instance %s's nodes changed while we kept the lock" % instance_name
12958

    
12959
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12960
                                             owned_groups)
12961

    
12962
      assert self.group_uuid in inst_groups, \
12963
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12964

    
12965
    if self.req_target_uuids:
12966
      # User requested specific target groups
12967
      self.target_uuids = self.req_target_uuids
12968
    else:
12969
      # All groups except the one to be evacuated are potential targets
12970
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12971
                           if group_uuid != self.group_uuid]
12972

    
12973
      if not self.target_uuids:
12974
        raise errors.OpPrereqError("There are no possible target groups",
12975
                                   errors.ECODE_INVAL)
12976

    
12977
  def BuildHooksEnv(self):
12978
    """Build hooks env.
12979

12980
    """
12981
    return {
12982
      "GROUP_NAME": self.op.group_name,
12983
      "TARGET_GROUPS": " ".join(self.target_uuids),
12984
      }
12985

    
12986
  def BuildHooksNodes(self):
12987
    """Build hooks nodes.
12988

12989
    """
12990
    mn = self.cfg.GetMasterNode()
12991

    
12992
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12993

    
12994
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12995

    
12996
    return (run_nodes, run_nodes)
12997

    
12998
  def Exec(self, feedback_fn):
12999
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13000

    
13001
    assert self.group_uuid not in self.target_uuids
13002

    
13003
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13004
                     instances=instances, target_groups=self.target_uuids)
13005

    
13006
    ial.Run(self.op.iallocator)
13007

    
13008
    if not ial.success:
13009
      raise errors.OpPrereqError("Can't compute group evacuation using"
13010
                                 " iallocator '%s': %s" %
13011
                                 (self.op.iallocator, ial.info),
13012
                                 errors.ECODE_NORES)
13013

    
13014
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13015

    
13016
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13017
                 len(jobs), self.op.group_name)
13018

    
13019
    return ResultWithJobs(jobs)
13020

    
13021

    
13022
class TagsLU(NoHooksLU): # pylint: disable=W0223
13023
  """Generic tags LU.
13024

13025
  This is an abstract class which is the parent of all the other tags LUs.
13026

13027
  """
13028
  def ExpandNames(self):
13029
    self.group_uuid = None
13030
    self.needed_locks = {}
13031
    if self.op.kind == constants.TAG_NODE:
13032
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13033
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
13034
    elif self.op.kind == constants.TAG_INSTANCE:
13035
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13036
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13037
    elif self.op.kind == constants.TAG_NODEGROUP:
13038
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13039

    
13040
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13041
    # not possible to acquire the BGL based on opcode parameters)
13042

    
13043
  def CheckPrereq(self):
13044
    """Check prerequisites.
13045

13046
    """
13047
    if self.op.kind == constants.TAG_CLUSTER:
13048
      self.target = self.cfg.GetClusterInfo()
13049
    elif self.op.kind == constants.TAG_NODE:
13050
      self.target = self.cfg.GetNodeInfo(self.op.name)
13051
    elif self.op.kind == constants.TAG_INSTANCE:
13052
      self.target = self.cfg.GetInstanceInfo(self.op.name)
13053
    elif self.op.kind == constants.TAG_NODEGROUP:
13054
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
13055
    else:
13056
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13057
                                 str(self.op.kind), errors.ECODE_INVAL)
13058

    
13059

    
13060
class LUTagsGet(TagsLU):
13061
  """Returns the tags of a given object.
13062

13063
  """
13064
  REQ_BGL = False
13065

    
13066
  def ExpandNames(self):
13067
    TagsLU.ExpandNames(self)
13068

    
13069
    # Share locks as this is only a read operation
13070
    self.share_locks = _ShareAll()
13071

    
13072
  def Exec(self, feedback_fn):
13073
    """Returns the tag list.
13074

13075
    """
13076
    return list(self.target.GetTags())
13077

    
13078

    
13079
class LUTagsSearch(NoHooksLU):
13080
  """Searches the tags for a given pattern.
13081

13082
  """
13083
  REQ_BGL = False
13084

    
13085
  def ExpandNames(self):
13086
    self.needed_locks = {}
13087

    
13088
  def CheckPrereq(self):
13089
    """Check prerequisites.
13090

13091
    This checks the pattern passed for validity by compiling it.
13092

13093
    """
13094
    try:
13095
      self.re = re.compile(self.op.pattern)
13096
    except re.error, err:
13097
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13098
                                 (self.op.pattern, err), errors.ECODE_INVAL)
13099

    
13100
  def Exec(self, feedback_fn):
13101
    """Returns the tag list.
13102

13103
    """
13104
    cfg = self.cfg
13105
    tgts = [("/cluster", cfg.GetClusterInfo())]
13106
    ilist = cfg.GetAllInstancesInfo().values()
13107
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13108
    nlist = cfg.GetAllNodesInfo().values()
13109
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13110
    tgts.extend(("/nodegroup/%s" % n.name, n)
13111
                for n in cfg.GetAllNodeGroupsInfo().values())
13112
    results = []
13113
    for path, target in tgts:
13114
      for tag in target.GetTags():
13115
        if self.re.search(tag):
13116
          results.append((path, tag))
13117
    return results
13118

    
13119

    
13120
class LUTagsSet(TagsLU):
13121
  """Sets a tag on a given object.
13122

13123
  """
13124
  REQ_BGL = False
13125

    
13126
  def CheckPrereq(self):
13127
    """Check prerequisites.
13128

13129
    This checks the type and length of the tag name and value.
13130

13131
    """
13132
    TagsLU.CheckPrereq(self)
13133
    for tag in self.op.tags:
13134
      objects.TaggableObject.ValidateTag(tag)
13135

    
13136
  def Exec(self, feedback_fn):
13137
    """Sets the tag.
13138

13139
    """
13140
    try:
13141
      for tag in self.op.tags:
13142
        self.target.AddTag(tag)
13143
    except errors.TagError, err:
13144
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13145
    self.cfg.Update(self.target, feedback_fn)
13146

    
13147

    
13148
class LUTagsDel(TagsLU):
13149
  """Delete a list of tags from a given object.
13150

13151
  """
13152
  REQ_BGL = False
13153

    
13154
  def CheckPrereq(self):
13155
    """Check prerequisites.
13156

13157
    This checks that we have the given tag.
13158

13159
    """
13160
    TagsLU.CheckPrereq(self)
13161
    for tag in self.op.tags:
13162
      objects.TaggableObject.ValidateTag(tag)
13163
    del_tags = frozenset(self.op.tags)
13164
    cur_tags = self.target.GetTags()
13165

    
13166
    diff_tags = del_tags - cur_tags
13167
    if diff_tags:
13168
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13169
      raise errors.OpPrereqError("Tag(s) %s not found" %
13170
                                 (utils.CommaJoin(diff_names), ),
13171
                                 errors.ECODE_NOENT)
13172

    
13173
  def Exec(self, feedback_fn):
13174
    """Remove the tag from the object.
13175

13176
    """
13177
    for tag in self.op.tags:
13178
      self.target.RemoveTag(tag)
13179
    self.cfg.Update(self.target, feedback_fn)
13180

    
13181

    
13182
class LUTestDelay(NoHooksLU):
13183
  """Sleep for a specified amount of time.
13184

13185
  This LU sleeps on the master and/or nodes for a specified amount of
13186
  time.
13187

13188
  """
13189
  REQ_BGL = False
13190

    
13191
  def ExpandNames(self):
13192
    """Expand names and set required locks.
13193

13194
    This expands the node list, if any.
13195

13196
    """
13197
    self.needed_locks = {}
13198
    if self.op.on_nodes:
13199
      # _GetWantedNodes can be used here, but is not always appropriate to use
13200
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13201
      # more information.
13202
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13203
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13204

    
13205
  def _TestDelay(self):
13206
    """Do the actual sleep.
13207

13208
    """
13209
    if self.op.on_master:
13210
      if not utils.TestDelay(self.op.duration):
13211
        raise errors.OpExecError("Error during master delay test")
13212
    if self.op.on_nodes:
13213
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13214
      for node, node_result in result.items():
13215
        node_result.Raise("Failure during rpc call to node %s" % node)
13216

    
13217
  def Exec(self, feedback_fn):
13218
    """Execute the test delay opcode, with the wanted repetitions.
13219

13220
    """
13221
    if self.op.repeat == 0:
13222
      self._TestDelay()
13223
    else:
13224
      top_value = self.op.repeat - 1
13225
      for i in range(self.op.repeat):
13226
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13227
        self._TestDelay()
13228

    
13229

    
13230
class LUTestJqueue(NoHooksLU):
13231
  """Utility LU to test some aspects of the job queue.
13232

13233
  """
13234
  REQ_BGL = False
13235

    
13236
  # Must be lower than default timeout for WaitForJobChange to see whether it
13237
  # notices changed jobs
13238
  _CLIENT_CONNECT_TIMEOUT = 20.0
13239
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13240

    
13241
  @classmethod
13242
  def _NotifyUsingSocket(cls, cb, errcls):
13243
    """Opens a Unix socket and waits for another program to connect.
13244

13245
    @type cb: callable
13246
    @param cb: Callback to send socket name to client
13247
    @type errcls: class
13248
    @param errcls: Exception class to use for errors
13249

13250
    """
13251
    # Using a temporary directory as there's no easy way to create temporary
13252
    # sockets without writing a custom loop around tempfile.mktemp and
13253
    # socket.bind
13254
    tmpdir = tempfile.mkdtemp()
13255
    try:
13256
      tmpsock = utils.PathJoin(tmpdir, "sock")
13257

    
13258
      logging.debug("Creating temporary socket at %s", tmpsock)
13259
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13260
      try:
13261
        sock.bind(tmpsock)
13262
        sock.listen(1)
13263

    
13264
        # Send details to client
13265
        cb(tmpsock)
13266

    
13267
        # Wait for client to connect before continuing
13268
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13269
        try:
13270
          (conn, _) = sock.accept()
13271
        except socket.error, err:
13272
          raise errcls("Client didn't connect in time (%s)" % err)
13273
      finally:
13274
        sock.close()
13275
    finally:
13276
      # Remove as soon as client is connected
13277
      shutil.rmtree(tmpdir)
13278

    
13279
    # Wait for client to close
13280
    try:
13281
      try:
13282
        # pylint: disable=E1101
13283
        # Instance of '_socketobject' has no ... member
13284
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13285
        conn.recv(1)
13286
      except socket.error, err:
13287
        raise errcls("Client failed to confirm notification (%s)" % err)
13288
    finally:
13289
      conn.close()
13290

    
13291
  def _SendNotification(self, test, arg, sockname):
13292
    """Sends a notification to the client.
13293

13294
    @type test: string
13295
    @param test: Test name
13296
    @param arg: Test argument (depends on test)
13297
    @type sockname: string
13298
    @param sockname: Socket path
13299

13300
    """
13301
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13302

    
13303
  def _Notify(self, prereq, test, arg):
13304
    """Notifies the client of a test.
13305

13306
    @type prereq: bool
13307
    @param prereq: Whether this is a prereq-phase test
13308
    @type test: string
13309
    @param test: Test name
13310
    @param arg: Test argument (depends on test)
13311

13312
    """
13313
    if prereq:
13314
      errcls = errors.OpPrereqError
13315
    else:
13316
      errcls = errors.OpExecError
13317

    
13318
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13319
                                                  test, arg),
13320
                                   errcls)
13321

    
13322
  def CheckArguments(self):
13323
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13324
    self.expandnames_calls = 0
13325

    
13326
  def ExpandNames(self):
13327
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13328
    if checkargs_calls < 1:
13329
      raise errors.ProgrammerError("CheckArguments was not called")
13330

    
13331
    self.expandnames_calls += 1
13332

    
13333
    if self.op.notify_waitlock:
13334
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13335

    
13336
    self.LogInfo("Expanding names")
13337

    
13338
    # Get lock on master node (just to get a lock, not for a particular reason)
13339
    self.needed_locks = {
13340
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13341
      }
13342

    
13343
  def Exec(self, feedback_fn):
13344
    if self.expandnames_calls < 1:
13345
      raise errors.ProgrammerError("ExpandNames was not called")
13346

    
13347
    if self.op.notify_exec:
13348
      self._Notify(False, constants.JQT_EXEC, None)
13349

    
13350
    self.LogInfo("Executing")
13351

    
13352
    if self.op.log_messages:
13353
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13354
      for idx, msg in enumerate(self.op.log_messages):
13355
        self.LogInfo("Sending log message %s", idx + 1)
13356
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13357
        # Report how many test messages have been sent
13358
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13359

    
13360
    if self.op.fail:
13361
      raise errors.OpExecError("Opcode failure was requested")
13362

    
13363
    return True
13364

    
13365

    
13366
class IAllocator(object):
13367
  """IAllocator framework.
13368

13369
  An IAllocator instance has three sets of attributes:
13370
    - cfg that is needed to query the cluster
13371
    - input data (all members of the _KEYS class attribute are required)
13372
    - four buffer attributes (in|out_data|text), that represent the
13373
      input (to the external script) in text and data structure format,
13374
      and the output from it, again in two formats
13375
    - the result variables from the script (success, info, nodes) for
13376
      easy usage
13377

13378
  """
13379
  # pylint: disable=R0902
13380
  # lots of instance attributes
13381

    
13382
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13383
    self.cfg = cfg
13384
    self.rpc = rpc_runner
13385
    # init buffer variables
13386
    self.in_text = self.out_text = self.in_data = self.out_data = None
13387
    # init all input fields so that pylint is happy
13388
    self.mode = mode
13389
    self.memory = self.disks = self.disk_template = None
13390
    self.os = self.tags = self.nics = self.vcpus = None
13391
    self.hypervisor = None
13392
    self.relocate_from = None
13393
    self.name = None
13394
    self.instances = None
13395
    self.evac_mode = None
13396
    self.target_groups = []
13397
    # computed fields
13398
    self.required_nodes = None
13399
    # init result fields
13400
    self.success = self.info = self.result = None
13401

    
13402
    try:
13403
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13404
    except KeyError:
13405
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13406
                                   " IAllocator" % self.mode)
13407

    
13408
    keyset = [n for (n, _) in keydata]
13409

    
13410
    for key in kwargs:
13411
      if key not in keyset:
13412
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13413
                                     " IAllocator" % key)
13414
      setattr(self, key, kwargs[key])
13415

    
13416
    for key in keyset:
13417
      if key not in kwargs:
13418
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13419
                                     " IAllocator" % key)
13420
    self._BuildInputData(compat.partial(fn, self), keydata)
13421

    
13422
  def _ComputeClusterData(self):
13423
    """Compute the generic allocator input data.
13424

13425
    This is the data that is independent of the actual operation.
13426

13427
    """
13428
    cfg = self.cfg
13429
    cluster_info = cfg.GetClusterInfo()
13430
    # cluster data
13431
    data = {
13432
      "version": constants.IALLOCATOR_VERSION,
13433
      "cluster_name": cfg.GetClusterName(),
13434
      "cluster_tags": list(cluster_info.GetTags()),
13435
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13436
      # we don't have job IDs
13437
      }
13438
    ninfo = cfg.GetAllNodesInfo()
13439
    iinfo = cfg.GetAllInstancesInfo().values()
13440
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13441

    
13442
    # node data
13443
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13444

    
13445
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13446
      hypervisor_name = self.hypervisor
13447
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13448
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13449
    else:
13450
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13451

    
13452
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13453
                                        hypervisor_name)
13454
    node_iinfo = \
13455
      self.rpc.call_all_instances_info(node_list,
13456
                                       cluster_info.enabled_hypervisors)
13457

    
13458
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13459

    
13460
    config_ndata = self._ComputeBasicNodeData(ninfo)
13461
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13462
                                                 i_list, config_ndata)
13463
    assert len(data["nodes"]) == len(ninfo), \
13464
        "Incomplete node data computed"
13465

    
13466
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13467

    
13468
    self.in_data = data
13469

    
13470
  @staticmethod
13471
  def _ComputeNodeGroupData(cfg):
13472
    """Compute node groups data.
13473

13474
    """
13475
    ng = dict((guuid, {
13476
      "name": gdata.name,
13477
      "alloc_policy": gdata.alloc_policy,
13478
      })
13479
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13480

    
13481
    return ng
13482

    
13483
  @staticmethod
13484
  def _ComputeBasicNodeData(node_cfg):
13485
    """Compute global node data.
13486

13487
    @rtype: dict
13488
    @returns: a dict of name: (node dict, node config)
13489

13490
    """
13491
    # fill in static (config-based) values
13492
    node_results = dict((ninfo.name, {
13493
      "tags": list(ninfo.GetTags()),
13494
      "primary_ip": ninfo.primary_ip,
13495
      "secondary_ip": ninfo.secondary_ip,
13496
      "offline": ninfo.offline,
13497
      "drained": ninfo.drained,
13498
      "master_candidate": ninfo.master_candidate,
13499
      "group": ninfo.group,
13500
      "master_capable": ninfo.master_capable,
13501
      "vm_capable": ninfo.vm_capable,
13502
      })
13503
      for ninfo in node_cfg.values())
13504

    
13505
    return node_results
13506

    
13507
  @staticmethod
13508
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13509
                              node_results):
13510
    """Compute global node data.
13511

13512
    @param node_results: the basic node structures as filled from the config
13513

13514
    """
13515
    # make a copy of the current dict
13516
    node_results = dict(node_results)
13517
    for nname, nresult in node_data.items():
13518
      assert nname in node_results, "Missing basic data for node %s" % nname
13519
      ninfo = node_cfg[nname]
13520

    
13521
      if not (ninfo.offline or ninfo.drained):
13522
        nresult.Raise("Can't get data for node %s" % nname)
13523
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13524
                                nname)
13525
        remote_info = nresult.payload
13526

    
13527
        for attr in ["memory_total", "memory_free", "memory_dom0",
13528
                     "vg_size", "vg_free", "cpu_total"]:
13529
          if attr not in remote_info:
13530
            raise errors.OpExecError("Node '%s' didn't return attribute"
13531
                                     " '%s'" % (nname, attr))
13532
          if not isinstance(remote_info[attr], int):
13533
            raise errors.OpExecError("Node '%s' returned invalid value"
13534
                                     " for '%s': %s" %
13535
                                     (nname, attr, remote_info[attr]))
13536
        # compute memory used by primary instances
13537
        i_p_mem = i_p_up_mem = 0
13538
        for iinfo, beinfo in i_list:
13539
          if iinfo.primary_node == nname:
13540
            i_p_mem += beinfo[constants.BE_MEMORY]
13541
            if iinfo.name not in node_iinfo[nname].payload:
13542
              i_used_mem = 0
13543
            else:
13544
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13545
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13546
            remote_info["memory_free"] -= max(0, i_mem_diff)
13547

    
13548
            if iinfo.admin_state == constants.ADMINST_UP:
13549
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13550

    
13551
        # compute memory used by instances
13552
        pnr_dyn = {
13553
          "total_memory": remote_info["memory_total"],
13554
          "reserved_memory": remote_info["memory_dom0"],
13555
          "free_memory": remote_info["memory_free"],
13556
          "total_disk": remote_info["vg_size"],
13557
          "free_disk": remote_info["vg_free"],
13558
          "total_cpus": remote_info["cpu_total"],
13559
          "i_pri_memory": i_p_mem,
13560
          "i_pri_up_memory": i_p_up_mem,
13561
          }
13562
        pnr_dyn.update(node_results[nname])
13563
        node_results[nname] = pnr_dyn
13564

    
13565
    return node_results
13566

    
13567
  @staticmethod
13568
  def _ComputeInstanceData(cluster_info, i_list):
13569
    """Compute global instance data.
13570

13571
    """
13572
    instance_data = {}
13573
    for iinfo, beinfo in i_list:
13574
      nic_data = []
13575
      for nic in iinfo.nics:
13576
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13577
        nic_dict = {
13578
          "mac": nic.mac,
13579
          "ip": nic.ip,
13580
          "mode": filled_params[constants.NIC_MODE],
13581
          "link": filled_params[constants.NIC_LINK],
13582
          }
13583
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13584
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13585
        nic_data.append(nic_dict)
13586
      pir = {
13587
        "tags": list(iinfo.GetTags()),
13588
        "admin_state": iinfo.admin_state,
13589
        "vcpus": beinfo[constants.BE_VCPUS],
13590
        "memory": beinfo[constants.BE_MEMORY],
13591
        "os": iinfo.os,
13592
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13593
        "nics": nic_data,
13594
        "disks": [{constants.IDISK_SIZE: dsk.size,
13595
                   constants.IDISK_MODE: dsk.mode}
13596
                  for dsk in iinfo.disks],
13597
        "disk_template": iinfo.disk_template,
13598
        "hypervisor": iinfo.hypervisor,
13599
        }
13600
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13601
                                                 pir["disks"])
13602
      instance_data[iinfo.name] = pir
13603

    
13604
    return instance_data
13605

    
13606
  def _AddNewInstance(self):
13607
    """Add new instance data to allocator structure.
13608

13609
    This in combination with _AllocatorGetClusterData will create the
13610
    correct structure needed as input for the allocator.
13611

13612
    The checks for the completeness of the opcode must have already been
13613
    done.
13614

13615
    """
13616
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13617

    
13618
    if self.disk_template in constants.DTS_INT_MIRROR:
13619
      self.required_nodes = 2
13620
    else:
13621
      self.required_nodes = 1
13622

    
13623
    request = {
13624
      "name": self.name,
13625
      "disk_template": self.disk_template,
13626
      "tags": self.tags,
13627
      "os": self.os,
13628
      "vcpus": self.vcpus,
13629
      "memory": self.memory,
13630
      "disks": self.disks,
13631
      "disk_space_total": disk_space,
13632
      "nics": self.nics,
13633
      "required_nodes": self.required_nodes,
13634
      "hypervisor": self.hypervisor,
13635
      }
13636

    
13637
    return request
13638

    
13639
  def _AddRelocateInstance(self):
13640
    """Add relocate instance data to allocator structure.
13641

13642
    This in combination with _IAllocatorGetClusterData will create the
13643
    correct structure needed as input for the allocator.
13644

13645
    The checks for the completeness of the opcode must have already been
13646
    done.
13647

13648
    """
13649
    instance = self.cfg.GetInstanceInfo(self.name)
13650
    if instance is None:
13651
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13652
                                   " IAllocator" % self.name)
13653

    
13654
    if instance.disk_template not in constants.DTS_MIRRORED:
13655
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13656
                                 errors.ECODE_INVAL)
13657

    
13658
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13659
        len(instance.secondary_nodes) != 1:
13660
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13661
                                 errors.ECODE_STATE)
13662

    
13663
    self.required_nodes = 1
13664
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13665
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13666

    
13667
    request = {
13668
      "name": self.name,
13669
      "disk_space_total": disk_space,
13670
      "required_nodes": self.required_nodes,
13671
      "relocate_from": self.relocate_from,
13672
      }
13673
    return request
13674

    
13675
  def _AddNodeEvacuate(self):
13676
    """Get data for node-evacuate requests.
13677

13678
    """
13679
    return {
13680
      "instances": self.instances,
13681
      "evac_mode": self.evac_mode,
13682
      }
13683

    
13684
  def _AddChangeGroup(self):
13685
    """Get data for node-evacuate requests.
13686

13687
    """
13688
    return {
13689
      "instances": self.instances,
13690
      "target_groups": self.target_groups,
13691
      }
13692

    
13693
  def _BuildInputData(self, fn, keydata):
13694
    """Build input data structures.
13695

13696
    """
13697
    self._ComputeClusterData()
13698

    
13699
    request = fn()
13700
    request["type"] = self.mode
13701
    for keyname, keytype in keydata:
13702
      if keyname not in request:
13703
        raise errors.ProgrammerError("Request parameter %s is missing" %
13704
                                     keyname)
13705
      val = request[keyname]
13706
      if not keytype(val):
13707
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13708
                                     " validation, value %s, expected"
13709
                                     " type %s" % (keyname, val, keytype))
13710
    self.in_data["request"] = request
13711

    
13712
    self.in_text = serializer.Dump(self.in_data)
13713

    
13714
  _STRING_LIST = ht.TListOf(ht.TString)
13715
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13716
     # pylint: disable=E1101
13717
     # Class '...' has no 'OP_ID' member
13718
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13719
                          opcodes.OpInstanceMigrate.OP_ID,
13720
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13721
     })))
13722

    
13723
  _NEVAC_MOVED = \
13724
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13725
                       ht.TItems([ht.TNonEmptyString,
13726
                                  ht.TNonEmptyString,
13727
                                  ht.TListOf(ht.TNonEmptyString),
13728
                                 ])))
13729
  _NEVAC_FAILED = \
13730
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13731
                       ht.TItems([ht.TNonEmptyString,
13732
                                  ht.TMaybeString,
13733
                                 ])))
13734
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13735
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13736

    
13737
  _MODE_DATA = {
13738
    constants.IALLOCATOR_MODE_ALLOC:
13739
      (_AddNewInstance,
13740
       [
13741
        ("name", ht.TString),
13742
        ("memory", ht.TInt),
13743
        ("disks", ht.TListOf(ht.TDict)),
13744
        ("disk_template", ht.TString),
13745
        ("os", ht.TString),
13746
        ("tags", _STRING_LIST),
13747
        ("nics", ht.TListOf(ht.TDict)),
13748
        ("vcpus", ht.TInt),
13749
        ("hypervisor", ht.TString),
13750
        ], ht.TList),
13751
    constants.IALLOCATOR_MODE_RELOC:
13752
      (_AddRelocateInstance,
13753
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13754
       ht.TList),
13755
     constants.IALLOCATOR_MODE_NODE_EVAC:
13756
      (_AddNodeEvacuate, [
13757
        ("instances", _STRING_LIST),
13758
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13759
        ], _NEVAC_RESULT),
13760
     constants.IALLOCATOR_MODE_CHG_GROUP:
13761
      (_AddChangeGroup, [
13762
        ("instances", _STRING_LIST),
13763
        ("target_groups", _STRING_LIST),
13764
        ], _NEVAC_RESULT),
13765
    }
13766

    
13767
  def Run(self, name, validate=True, call_fn=None):
13768
    """Run an instance allocator and return the results.
13769

13770
    """
13771
    if call_fn is None:
13772
      call_fn = self.rpc.call_iallocator_runner
13773

    
13774
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13775
    result.Raise("Failure while running the iallocator script")
13776

    
13777
    self.out_text = result.payload
13778
    if validate:
13779
      self._ValidateResult()
13780

    
13781
  def _ValidateResult(self):
13782
    """Process the allocator results.
13783

13784
    This will process and if successful save the result in
13785
    self.out_data and the other parameters.
13786

13787
    """
13788
    try:
13789
      rdict = serializer.Load(self.out_text)
13790
    except Exception, err:
13791
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13792

    
13793
    if not isinstance(rdict, dict):
13794
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13795

    
13796
    # TODO: remove backwards compatiblity in later versions
13797
    if "nodes" in rdict and "result" not in rdict:
13798
      rdict["result"] = rdict["nodes"]
13799
      del rdict["nodes"]
13800

    
13801
    for key in "success", "info", "result":
13802
      if key not in rdict:
13803
        raise errors.OpExecError("Can't parse iallocator results:"
13804
                                 " missing key '%s'" % key)
13805
      setattr(self, key, rdict[key])
13806

    
13807
    if not self._result_check(self.result):
13808
      raise errors.OpExecError("Iallocator returned invalid result,"
13809
                               " expected %s, got %s" %
13810
                               (self._result_check, self.result),
13811
                               errors.ECODE_INVAL)
13812

    
13813
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13814
      assert self.relocate_from is not None
13815
      assert self.required_nodes == 1
13816

    
13817
      node2group = dict((name, ndata["group"])
13818
                        for (name, ndata) in self.in_data["nodes"].items())
13819

    
13820
      fn = compat.partial(self._NodesToGroups, node2group,
13821
                          self.in_data["nodegroups"])
13822

    
13823
      instance = self.cfg.GetInstanceInfo(self.name)
13824
      request_groups = fn(self.relocate_from + [instance.primary_node])
13825
      result_groups = fn(rdict["result"] + [instance.primary_node])
13826

    
13827
      if self.success and not set(result_groups).issubset(request_groups):
13828
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13829
                                 " differ from original groups (%s)" %
13830
                                 (utils.CommaJoin(result_groups),
13831
                                  utils.CommaJoin(request_groups)))
13832

    
13833
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13834
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13835

    
13836
    self.out_data = rdict
13837

    
13838
  @staticmethod
13839
  def _NodesToGroups(node2group, groups, nodes):
13840
    """Returns a list of unique group names for a list of nodes.
13841

13842
    @type node2group: dict
13843
    @param node2group: Map from node name to group UUID
13844
    @type groups: dict
13845
    @param groups: Group information
13846
    @type nodes: list
13847
    @param nodes: Node names
13848

13849
    """
13850
    result = set()
13851

    
13852
    for node in nodes:
13853
      try:
13854
        group_uuid = node2group[node]
13855
      except KeyError:
13856
        # Ignore unknown node
13857
        pass
13858
      else:
13859
        try:
13860
          group = groups[group_uuid]
13861
        except KeyError:
13862
          # Can't find group, let's use UUID
13863
          group_name = group_uuid
13864
        else:
13865
          group_name = group["name"]
13866

    
13867
        result.add(group_name)
13868

    
13869
    return sorted(result)
13870

    
13871

    
13872
class LUTestAllocator(NoHooksLU):
13873
  """Run allocator tests.
13874

13875
  This LU runs the allocator tests
13876

13877
  """
13878
  def CheckPrereq(self):
13879
    """Check prerequisites.
13880

13881
    This checks the opcode parameters depending on the director and mode test.
13882

13883
    """
13884
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13885
      for attr in ["memory", "disks", "disk_template",
13886
                   "os", "tags", "nics", "vcpus"]:
13887
        if not hasattr(self.op, attr):
13888
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13889
                                     attr, errors.ECODE_INVAL)
13890
      iname = self.cfg.ExpandInstanceName(self.op.name)
13891
      if iname is not None:
13892
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13893
                                   iname, errors.ECODE_EXISTS)
13894
      if not isinstance(self.op.nics, list):
13895
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13896
                                   errors.ECODE_INVAL)
13897
      if not isinstance(self.op.disks, list):
13898
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13899
                                   errors.ECODE_INVAL)
13900
      for row in self.op.disks:
13901
        if (not isinstance(row, dict) or
13902
            constants.IDISK_SIZE not in row or
13903
            not isinstance(row[constants.IDISK_SIZE], int) or
13904
            constants.IDISK_MODE not in row or
13905
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13906
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13907
                                     " parameter", errors.ECODE_INVAL)
13908
      if self.op.hypervisor is None:
13909
        self.op.hypervisor = self.cfg.GetHypervisorType()
13910
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13911
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13912
      self.op.name = fname
13913
      self.relocate_from = \
13914
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13915
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13916
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13917
      if not self.op.instances:
13918
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13919
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13920
    else:
13921
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13922
                                 self.op.mode, errors.ECODE_INVAL)
13923

    
13924
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13925
      if self.op.allocator is None:
13926
        raise errors.OpPrereqError("Missing allocator name",
13927
                                   errors.ECODE_INVAL)
13928
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13929
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13930
                                 self.op.direction, errors.ECODE_INVAL)
13931

    
13932
  def Exec(self, feedback_fn):
13933
    """Run the allocator test.
13934

13935
    """
13936
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13937
      ial = IAllocator(self.cfg, self.rpc,
13938
                       mode=self.op.mode,
13939
                       name=self.op.name,
13940
                       memory=self.op.memory,
13941
                       disks=self.op.disks,
13942
                       disk_template=self.op.disk_template,
13943
                       os=self.op.os,
13944
                       tags=self.op.tags,
13945
                       nics=self.op.nics,
13946
                       vcpus=self.op.vcpus,
13947
                       hypervisor=self.op.hypervisor,
13948
                       )
13949
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13950
      ial = IAllocator(self.cfg, self.rpc,
13951
                       mode=self.op.mode,
13952
                       name=self.op.name,
13953
                       relocate_from=list(self.relocate_from),
13954
                       )
13955
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13956
      ial = IAllocator(self.cfg, self.rpc,
13957
                       mode=self.op.mode,
13958
                       instances=self.op.instances,
13959
                       target_groups=self.op.target_groups)
13960
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13961
      ial = IAllocator(self.cfg, self.rpc,
13962
                       mode=self.op.mode,
13963
                       instances=self.op.instances,
13964
                       evac_mode=self.op.evac_mode)
13965
    else:
13966
      raise errors.ProgrammerError("Uncatched mode %s in"
13967
                                   " LUTestAllocator.Exec", self.op.mode)
13968

    
13969
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13970
      result = ial.in_text
13971
    else:
13972
      ial.Run(self.op.allocator, validate=False)
13973
      result = ial.out_text
13974
    return result
13975

    
13976

    
13977
#: Query type implementations
13978
_QUERY_IMPL = {
13979
  constants.QR_INSTANCE: _InstanceQuery,
13980
  constants.QR_NODE: _NodeQuery,
13981
  constants.QR_GROUP: _GroupQuery,
13982
  constants.QR_OS: _OsQuery,
13983
  }
13984

    
13985
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13986

    
13987

    
13988
def _GetQueryImplementation(name):
13989
  """Returns the implemtnation for a query type.
13990

13991
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13992

13993
  """
13994
  try:
13995
    return _QUERY_IMPL[name]
13996
  except KeyError:
13997
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13998
                               errors.ECODE_INVAL)