Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ b2e233a5

History | View | Annotate | Download (496.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70
# States of instance
71
INSTANCE_UP = [constants.ADMINST_UP]
72
INSTANCE_DOWN = [constants.ADMINST_DOWN]
73
INSTANCE_OFFLINE = [constants.ADMINST_OFFLINE]
74
INSTANCE_ONLINE = [constants.ADMINST_DOWN, constants.ADMINST_UP]
75
INSTANCE_NOT_RUNNING = [constants.ADMINST_DOWN, constants.ADMINST_OFFLINE]
76

    
77

    
78
class ResultWithJobs:
79
  """Data container for LU results with jobs.
80

81
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
82
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
83
  contained in the C{jobs} attribute and include the job IDs in the opcode
84
  result.
85

86
  """
87
  def __init__(self, jobs, **kwargs):
88
    """Initializes this class.
89

90
    Additional return values can be specified as keyword arguments.
91

92
    @type jobs: list of lists of L{opcode.OpCode}
93
    @param jobs: A list of lists of opcode objects
94

95
    """
96
    self.jobs = jobs
97
    self.other = kwargs
98

    
99

    
100
class LogicalUnit(object):
101
  """Logical Unit base class.
102

103
  Subclasses must follow these rules:
104
    - implement ExpandNames
105
    - implement CheckPrereq (except when tasklets are used)
106
    - implement Exec (except when tasklets are used)
107
    - implement BuildHooksEnv
108
    - implement BuildHooksNodes
109
    - redefine HPATH and HTYPE
110
    - optionally redefine their run requirements:
111
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
112

113
  Note that all commands require root permissions.
114

115
  @ivar dry_run_result: the value (if any) that will be returned to the caller
116
      in dry-run mode (signalled by opcode dry_run parameter)
117

118
  """
119
  HPATH = None
120
  HTYPE = None
121
  REQ_BGL = True
122

    
123
  def __init__(self, processor, op, context, rpc_runner):
124
    """Constructor for LogicalUnit.
125

126
    This needs to be overridden in derived classes in order to check op
127
    validity.
128

129
    """
130
    self.proc = processor
131
    self.op = op
132
    self.cfg = context.cfg
133
    self.glm = context.glm
134
    # readability alias
135
    self.owned_locks = context.glm.list_owned
136
    self.context = context
137
    self.rpc = rpc_runner
138
    # Dicts used to declare locking needs to mcpu
139
    self.needed_locks = None
140
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
141
    self.add_locks = {}
142
    self.remove_locks = {}
143
    # Used to force good behavior when calling helper functions
144
    self.recalculate_locks = {}
145
    # logging
146
    self.Log = processor.Log # pylint: disable=C0103
147
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
148
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
149
    self.LogStep = processor.LogStep # pylint: disable=C0103
150
    # support for dry-run
151
    self.dry_run_result = None
152
    # support for generic debug attribute
153
    if (not hasattr(self.op, "debug_level") or
154
        not isinstance(self.op.debug_level, int)):
155
      self.op.debug_level = 0
156

    
157
    # Tasklets
158
    self.tasklets = None
159

    
160
    # Validate opcode parameters and set defaults
161
    self.op.Validate(True)
162

    
163
    self.CheckArguments()
164

    
165
  def CheckArguments(self):
166
    """Check syntactic validity for the opcode arguments.
167

168
    This method is for doing a simple syntactic check and ensure
169
    validity of opcode parameters, without any cluster-related
170
    checks. While the same can be accomplished in ExpandNames and/or
171
    CheckPrereq, doing these separate is better because:
172

173
      - ExpandNames is left as as purely a lock-related function
174
      - CheckPrereq is run after we have acquired locks (and possible
175
        waited for them)
176

177
    The function is allowed to change the self.op attribute so that
178
    later methods can no longer worry about missing parameters.
179

180
    """
181
    pass
182

    
183
  def ExpandNames(self):
184
    """Expand names for this LU.
185

186
    This method is called before starting to execute the opcode, and it should
187
    update all the parameters of the opcode to their canonical form (e.g. a
188
    short node name must be fully expanded after this method has successfully
189
    completed). This way locking, hooks, logging, etc. can work correctly.
190

191
    LUs which implement this method must also populate the self.needed_locks
192
    member, as a dict with lock levels as keys, and a list of needed lock names
193
    as values. Rules:
194

195
      - use an empty dict if you don't need any lock
196
      - if you don't need any lock at a particular level omit that level
197
      - don't put anything for the BGL level
198
      - if you want all locks at a level use locking.ALL_SET as a value
199

200
    If you need to share locks (rather than acquire them exclusively) at one
201
    level you can modify self.share_locks, setting a true value (usually 1) for
202
    that level. By default locks are not shared.
203

204
    This function can also define a list of tasklets, which then will be
205
    executed in order instead of the usual LU-level CheckPrereq and Exec
206
    functions, if those are not defined by the LU.
207

208
    Examples::
209

210
      # Acquire all nodes and one instance
211
      self.needed_locks = {
212
        locking.LEVEL_NODE: locking.ALL_SET,
213
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
214
      }
215
      # Acquire just two nodes
216
      self.needed_locks = {
217
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
218
      }
219
      # Acquire no locks
220
      self.needed_locks = {} # No, you can't leave it to the default value None
221

222
    """
223
    # The implementation of this method is mandatory only if the new LU is
224
    # concurrent, so that old LUs don't need to be changed all at the same
225
    # time.
226
    if self.REQ_BGL:
227
      self.needed_locks = {} # Exclusive LUs don't need locks.
228
    else:
229
      raise NotImplementedError
230

    
231
  def DeclareLocks(self, level):
232
    """Declare LU locking needs for a level
233

234
    While most LUs can just declare their locking needs at ExpandNames time,
235
    sometimes there's the need to calculate some locks after having acquired
236
    the ones before. This function is called just before acquiring locks at a
237
    particular level, but after acquiring the ones at lower levels, and permits
238
    such calculations. It can be used to modify self.needed_locks, and by
239
    default it does nothing.
240

241
    This function is only called if you have something already set in
242
    self.needed_locks for the level.
243

244
    @param level: Locking level which is going to be locked
245
    @type level: member of ganeti.locking.LEVELS
246

247
    """
248

    
249
  def CheckPrereq(self):
250
    """Check prerequisites for this LU.
251

252
    This method should check that the prerequisites for the execution
253
    of this LU are fulfilled. It can do internode communication, but
254
    it should be idempotent - no cluster or system changes are
255
    allowed.
256

257
    The method should raise errors.OpPrereqError in case something is
258
    not fulfilled. Its return value is ignored.
259

260
    This method should also update all the parameters of the opcode to
261
    their canonical form if it hasn't been done by ExpandNames before.
262

263
    """
264
    if self.tasklets is not None:
265
      for (idx, tl) in enumerate(self.tasklets):
266
        logging.debug("Checking prerequisites for tasklet %s/%s",
267
                      idx + 1, len(self.tasklets))
268
        tl.CheckPrereq()
269
    else:
270
      pass
271

    
272
  def Exec(self, feedback_fn):
273
    """Execute the LU.
274

275
    This method should implement the actual work. It should raise
276
    errors.OpExecError for failures that are somewhat dealt with in
277
    code, or expected.
278

279
    """
280
    if self.tasklets is not None:
281
      for (idx, tl) in enumerate(self.tasklets):
282
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
283
        tl.Exec(feedback_fn)
284
    else:
285
      raise NotImplementedError
286

    
287
  def BuildHooksEnv(self):
288
    """Build hooks environment for this LU.
289

290
    @rtype: dict
291
    @return: Dictionary containing the environment that will be used for
292
      running the hooks for this LU. The keys of the dict must not be prefixed
293
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
294
      will extend the environment with additional variables. If no environment
295
      should be defined, an empty dictionary should be returned (not C{None}).
296
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
297
      will not be called.
298

299
    """
300
    raise NotImplementedError
301

    
302
  def BuildHooksNodes(self):
303
    """Build list of nodes to run LU's hooks.
304

305
    @rtype: tuple; (list, list)
306
    @return: Tuple containing a list of node names on which the hook
307
      should run before the execution and a list of node names on which the
308
      hook should run after the execution. No nodes should be returned as an
309
      empty list (and not None).
310
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
311
      will not be called.
312

313
    """
314
    raise NotImplementedError
315

    
316
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
317
    """Notify the LU about the results of its hooks.
318

319
    This method is called every time a hooks phase is executed, and notifies
320
    the Logical Unit about the hooks' result. The LU can then use it to alter
321
    its result based on the hooks.  By default the method does nothing and the
322
    previous result is passed back unchanged but any LU can define it if it
323
    wants to use the local cluster hook-scripts somehow.
324

325
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
326
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
327
    @param hook_results: the results of the multi-node hooks rpc call
328
    @param feedback_fn: function used send feedback back to the caller
329
    @param lu_result: the previous Exec result this LU had, or None
330
        in the PRE phase
331
    @return: the new Exec result, based on the previous result
332
        and hook results
333

334
    """
335
    # API must be kept, thus we ignore the unused argument and could
336
    # be a function warnings
337
    # pylint: disable=W0613,R0201
338
    return lu_result
339

    
340
  def _ExpandAndLockInstance(self):
341
    """Helper function to expand and lock an instance.
342

343
    Many LUs that work on an instance take its name in self.op.instance_name
344
    and need to expand it and then declare the expanded name for locking. This
345
    function does it, and then updates self.op.instance_name to the expanded
346
    name. It also initializes needed_locks as a dict, if this hasn't been done
347
    before.
348

349
    """
350
    if self.needed_locks is None:
351
      self.needed_locks = {}
352
    else:
353
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
354
        "_ExpandAndLockInstance called with instance-level locks set"
355
    self.op.instance_name = _ExpandInstanceName(self.cfg,
356
                                                self.op.instance_name)
357
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
358

    
359
  def _LockInstancesNodes(self, primary_only=False,
360
                          level=locking.LEVEL_NODE):
361
    """Helper function to declare instances' nodes for locking.
362

363
    This function should be called after locking one or more instances to lock
364
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
365
    with all primary or secondary nodes for instances already locked and
366
    present in self.needed_locks[locking.LEVEL_INSTANCE].
367

368
    It should be called from DeclareLocks, and for safety only works if
369
    self.recalculate_locks[locking.LEVEL_NODE] is set.
370

371
    In the future it may grow parameters to just lock some instance's nodes, or
372
    to just lock primaries or secondary nodes, if needed.
373

374
    If should be called in DeclareLocks in a way similar to::
375

376
      if level == locking.LEVEL_NODE:
377
        self._LockInstancesNodes()
378

379
    @type primary_only: boolean
380
    @param primary_only: only lock primary nodes of locked instances
381
    @param level: Which lock level to use for locking nodes
382

383
    """
384
    assert level in self.recalculate_locks, \
385
      "_LockInstancesNodes helper function called with no nodes to recalculate"
386

    
387
    # TODO: check if we're really been called with the instance locks held
388

    
389
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
390
    # future we might want to have different behaviors depending on the value
391
    # of self.recalculate_locks[locking.LEVEL_NODE]
392
    wanted_nodes = []
393
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
394
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
395
      wanted_nodes.append(instance.primary_node)
396
      if not primary_only:
397
        wanted_nodes.extend(instance.secondary_nodes)
398

    
399
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
400
      self.needed_locks[level] = wanted_nodes
401
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
402
      self.needed_locks[level].extend(wanted_nodes)
403
    else:
404
      raise errors.ProgrammerError("Unknown recalculation mode")
405

    
406
    del self.recalculate_locks[level]
407

    
408

    
409
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
410
  """Simple LU which runs no hooks.
411

412
  This LU is intended as a parent for other LogicalUnits which will
413
  run no hooks, in order to reduce duplicate code.
414

415
  """
416
  HPATH = None
417
  HTYPE = None
418

    
419
  def BuildHooksEnv(self):
420
    """Empty BuildHooksEnv for NoHooksLu.
421

422
    This just raises an error.
423

424
    """
425
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
426

    
427
  def BuildHooksNodes(self):
428
    """Empty BuildHooksNodes for NoHooksLU.
429

430
    """
431
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
432

    
433

    
434
class Tasklet:
435
  """Tasklet base class.
436

437
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
438
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
439
  tasklets know nothing about locks.
440

441
  Subclasses must follow these rules:
442
    - Implement CheckPrereq
443
    - Implement Exec
444

445
  """
446
  def __init__(self, lu):
447
    self.lu = lu
448

    
449
    # Shortcuts
450
    self.cfg = lu.cfg
451
    self.rpc = lu.rpc
452

    
453
  def CheckPrereq(self):
454
    """Check prerequisites for this tasklets.
455

456
    This method should check whether the prerequisites for the execution of
457
    this tasklet are fulfilled. It can do internode communication, but it
458
    should be idempotent - no cluster or system changes are allowed.
459

460
    The method should raise errors.OpPrereqError in case something is not
461
    fulfilled. Its return value is ignored.
462

463
    This method should also update all parameters to their canonical form if it
464
    hasn't been done before.
465

466
    """
467
    pass
468

    
469
  def Exec(self, feedback_fn):
470
    """Execute the tasklet.
471

472
    This method should implement the actual work. It should raise
473
    errors.OpExecError for failures that are somewhat dealt with in code, or
474
    expected.
475

476
    """
477
    raise NotImplementedError
478

    
479

    
480
class _QueryBase:
481
  """Base for query utility classes.
482

483
  """
484
  #: Attribute holding field definitions
485
  FIELDS = None
486

    
487
  def __init__(self, qfilter, fields, use_locking):
488
    """Initializes this class.
489

490
    """
491
    self.use_locking = use_locking
492

    
493
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
494
                             namefield="name")
495
    self.requested_data = self.query.RequestedData()
496
    self.names = self.query.RequestedNames()
497

    
498
    # Sort only if no names were requested
499
    self.sort_by_name = not self.names
500

    
501
    self.do_locking = None
502
    self.wanted = None
503

    
504
  def _GetNames(self, lu, all_names, lock_level):
505
    """Helper function to determine names asked for in the query.
506

507
    """
508
    if self.do_locking:
509
      names = lu.owned_locks(lock_level)
510
    else:
511
      names = all_names
512

    
513
    if self.wanted == locking.ALL_SET:
514
      assert not self.names
515
      # caller didn't specify names, so ordering is not important
516
      return utils.NiceSort(names)
517

    
518
    # caller specified names and we must keep the same order
519
    assert self.names
520
    assert not self.do_locking or lu.glm.is_owned(lock_level)
521

    
522
    missing = set(self.wanted).difference(names)
523
    if missing:
524
      raise errors.OpExecError("Some items were removed before retrieving"
525
                               " their data: %s" % missing)
526

    
527
    # Return expanded names
528
    return self.wanted
529

    
530
  def ExpandNames(self, lu):
531
    """Expand names for this query.
532

533
    See L{LogicalUnit.ExpandNames}.
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def DeclareLocks(self, lu, level):
539
    """Declare locks for this query.
540

541
    See L{LogicalUnit.DeclareLocks}.
542

543
    """
544
    raise NotImplementedError()
545

    
546
  def _GetQueryData(self, lu):
547
    """Collects all data for this query.
548

549
    @return: Query data object
550

551
    """
552
    raise NotImplementedError()
553

    
554
  def NewStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
559
                                  sort_by_name=self.sort_by_name)
560

    
561
  def OldStyleQuery(self, lu):
562
    """Collect data and execute query.
563

564
    """
565
    return self.query.OldStyleQuery(self._GetQueryData(lu),
566
                                    sort_by_name=self.sort_by_name)
567

    
568

    
569
def _ShareAll():
570
  """Returns a dict declaring all lock levels shared.
571

572
  """
573
  return dict.fromkeys(locking.LEVELS, 1)
574

    
575

    
576
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
577
  """Checks if the owned node groups are still correct for an instance.
578

579
  @type cfg: L{config.ConfigWriter}
580
  @param cfg: The cluster configuration
581
  @type instance_name: string
582
  @param instance_name: Instance name
583
  @type owned_groups: set or frozenset
584
  @param owned_groups: List of currently owned node groups
585

586
  """
587
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
588

    
589
  if not owned_groups.issuperset(inst_groups):
590
    raise errors.OpPrereqError("Instance %s's node groups changed since"
591
                               " locks were acquired, current groups are"
592
                               " are '%s', owning groups '%s'; retry the"
593
                               " operation" %
594
                               (instance_name,
595
                                utils.CommaJoin(inst_groups),
596
                                utils.CommaJoin(owned_groups)),
597
                               errors.ECODE_STATE)
598

    
599
  return inst_groups
600

    
601

    
602
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
603
  """Checks if the instances in a node group are still correct.
604

605
  @type cfg: L{config.ConfigWriter}
606
  @param cfg: The cluster configuration
607
  @type group_uuid: string
608
  @param group_uuid: Node group UUID
609
  @type owned_instances: set or frozenset
610
  @param owned_instances: List of currently owned instances
611

612
  """
613
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
614
  if owned_instances != wanted_instances:
615
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
616
                               " locks were acquired, wanted '%s', have '%s';"
617
                               " retry the operation" %
618
                               (group_uuid,
619
                                utils.CommaJoin(wanted_instances),
620
                                utils.CommaJoin(owned_instances)),
621
                               errors.ECODE_STATE)
622

    
623
  return wanted_instances
624

    
625

    
626
def _SupportsOob(cfg, node):
627
  """Tells if node supports OOB.
628

629
  @type cfg: L{config.ConfigWriter}
630
  @param cfg: The cluster configuration
631
  @type node: L{objects.Node}
632
  @param node: The node
633
  @return: The OOB script if supported or an empty string otherwise
634

635
  """
636
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
637

    
638

    
639
def _GetWantedNodes(lu, nodes):
640
  """Returns list of checked and expanded node names.
641

642
  @type lu: L{LogicalUnit}
643
  @param lu: the logical unit on whose behalf we execute
644
  @type nodes: list
645
  @param nodes: list of node names or None for all nodes
646
  @rtype: list
647
  @return: the list of nodes, sorted
648
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
649

650
  """
651
  if nodes:
652
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
653

    
654
  return utils.NiceSort(lu.cfg.GetNodeList())
655

    
656

    
657
def _GetWantedInstances(lu, instances):
658
  """Returns list of checked and expanded instance names.
659

660
  @type lu: L{LogicalUnit}
661
  @param lu: the logical unit on whose behalf we execute
662
  @type instances: list
663
  @param instances: list of instance names or None for all instances
664
  @rtype: list
665
  @return: the list of instances, sorted
666
  @raise errors.OpPrereqError: if the instances parameter is wrong type
667
  @raise errors.OpPrereqError: if any of the passed instances is not found
668

669
  """
670
  if instances:
671
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
672
  else:
673
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
674
  return wanted
675

    
676

    
677
def _GetUpdatedParams(old_params, update_dict,
678
                      use_default=True, use_none=False):
679
  """Return the new version of a parameter dictionary.
680

681
  @type old_params: dict
682
  @param old_params: old parameters
683
  @type update_dict: dict
684
  @param update_dict: dict containing new parameter values, or
685
      constants.VALUE_DEFAULT to reset the parameter to its default
686
      value
687
  @param use_default: boolean
688
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
689
      values as 'to be deleted' values
690
  @param use_none: boolean
691
  @type use_none: whether to recognise C{None} values as 'to be
692
      deleted' values
693
  @rtype: dict
694
  @return: the new parameter dictionary
695

696
  """
697
  params_copy = copy.deepcopy(old_params)
698
  for key, val in update_dict.iteritems():
699
    if ((use_default and val == constants.VALUE_DEFAULT) or
700
        (use_none and val is None)):
701
      try:
702
        del params_copy[key]
703
      except KeyError:
704
        pass
705
    else:
706
      params_copy[key] = val
707
  return params_copy
708

    
709

    
710
def _ReleaseLocks(lu, level, names=None, keep=None):
711
  """Releases locks owned by an LU.
712

713
  @type lu: L{LogicalUnit}
714
  @param level: Lock level
715
  @type names: list or None
716
  @param names: Names of locks to release
717
  @type keep: list or None
718
  @param keep: Names of locks to retain
719

720
  """
721
  assert not (keep is not None and names is not None), \
722
         "Only one of the 'names' and the 'keep' parameters can be given"
723

    
724
  if names is not None:
725
    should_release = names.__contains__
726
  elif keep:
727
    should_release = lambda name: name not in keep
728
  else:
729
    should_release = None
730

    
731
  owned = lu.owned_locks(level)
732
  if not owned:
733
    # Not owning any lock at this level, do nothing
734
    pass
735

    
736
  elif should_release:
737
    retain = []
738
    release = []
739

    
740
    # Determine which locks to release
741
    for name in owned:
742
      if should_release(name):
743
        release.append(name)
744
      else:
745
        retain.append(name)
746

    
747
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
748

    
749
    # Release just some locks
750
    lu.glm.release(level, names=release)
751

    
752
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
753
  else:
754
    # Release everything
755
    lu.glm.release(level)
756

    
757
    assert not lu.glm.is_owned(level), "No locks should be owned"
758

    
759

    
760
def _MapInstanceDisksToNodes(instances):
761
  """Creates a map from (node, volume) to instance name.
762

763
  @type instances: list of L{objects.Instance}
764
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
765

766
  """
767
  return dict(((node, vol), inst.name)
768
              for inst in instances
769
              for (node, vols) in inst.MapLVsByNode().items()
770
              for vol in vols)
771

    
772

    
773
def _RunPostHook(lu, node_name):
774
  """Runs the post-hook for an opcode on a single node.
775

776
  """
777
  hm = lu.proc.BuildHooksManager(lu)
778
  try:
779
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
780
  except:
781
    # pylint: disable=W0702
782
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
783

    
784

    
785
def _CheckOutputFields(static, dynamic, selected):
786
  """Checks whether all selected fields are valid.
787

788
  @type static: L{utils.FieldSet}
789
  @param static: static fields set
790
  @type dynamic: L{utils.FieldSet}
791
  @param dynamic: dynamic fields set
792

793
  """
794
  f = utils.FieldSet()
795
  f.Extend(static)
796
  f.Extend(dynamic)
797

    
798
  delta = f.NonMatching(selected)
799
  if delta:
800
    raise errors.OpPrereqError("Unknown output fields selected: %s"
801
                               % ",".join(delta), errors.ECODE_INVAL)
802

    
803

    
804
def _CheckGlobalHvParams(params):
805
  """Validates that given hypervisor params are not global ones.
806

807
  This will ensure that instances don't get customised versions of
808
  global params.
809

810
  """
811
  used_globals = constants.HVC_GLOBALS.intersection(params)
812
  if used_globals:
813
    msg = ("The following hypervisor parameters are global and cannot"
814
           " be customized at instance level, please modify them at"
815
           " cluster level: %s" % utils.CommaJoin(used_globals))
816
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
817

    
818

    
819
def _CheckNodeOnline(lu, node, msg=None):
820
  """Ensure that a given node is online.
821

822
  @param lu: the LU on behalf of which we make the check
823
  @param node: the node to check
824
  @param msg: if passed, should be a message to replace the default one
825
  @raise errors.OpPrereqError: if the node is offline
826

827
  """
828
  if msg is None:
829
    msg = "Can't use offline node"
830
  if lu.cfg.GetNodeInfo(node).offline:
831
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
832

    
833

    
834
def _CheckNodeNotDrained(lu, node):
835
  """Ensure that a given node is not drained.
836

837
  @param lu: the LU on behalf of which we make the check
838
  @param node: the node to check
839
  @raise errors.OpPrereqError: if the node is drained
840

841
  """
842
  if lu.cfg.GetNodeInfo(node).drained:
843
    raise errors.OpPrereqError("Can't use drained node %s" % node,
844
                               errors.ECODE_STATE)
845

    
846

    
847
def _CheckNodeVmCapable(lu, node):
848
  """Ensure that a given node is vm capable.
849

850
  @param lu: the LU on behalf of which we make the check
851
  @param node: the node to check
852
  @raise errors.OpPrereqError: if the node is not vm capable
853

854
  """
855
  if not lu.cfg.GetNodeInfo(node).vm_capable:
856
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
857
                               errors.ECODE_STATE)
858

    
859

    
860
def _CheckNodeHasOS(lu, node, os_name, force_variant):
861
  """Ensure that a node supports a given OS.
862

863
  @param lu: the LU on behalf of which we make the check
864
  @param node: the node to check
865
  @param os_name: the OS to query about
866
  @param force_variant: whether to ignore variant errors
867
  @raise errors.OpPrereqError: if the node is not supporting the OS
868

869
  """
870
  result = lu.rpc.call_os_get(node, os_name)
871
  result.Raise("OS '%s' not in supported OS list for node %s" %
872
               (os_name, node),
873
               prereq=True, ecode=errors.ECODE_INVAL)
874
  if not force_variant:
875
    _CheckOSVariant(result.payload, os_name)
876

    
877

    
878
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
879
  """Ensure that a node has the given secondary ip.
880

881
  @type lu: L{LogicalUnit}
882
  @param lu: the LU on behalf of which we make the check
883
  @type node: string
884
  @param node: the node to check
885
  @type secondary_ip: string
886
  @param secondary_ip: the ip to check
887
  @type prereq: boolean
888
  @param prereq: whether to throw a prerequisite or an execute error
889
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
890
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
891

892
  """
893
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
894
  result.Raise("Failure checking secondary ip on node %s" % node,
895
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
896
  if not result.payload:
897
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
898
           " please fix and re-run this command" % secondary_ip)
899
    if prereq:
900
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
901
    else:
902
      raise errors.OpExecError(msg)
903

    
904

    
905
def _GetClusterDomainSecret():
906
  """Reads the cluster domain secret.
907

908
  """
909
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
910
                               strict=True)
911

    
912

    
913
def _CheckInstanceState(lu, instance, req_states, msg=None):
914
  """Ensure that an instance is in one of the required states.
915

916
  @param lu: the LU on behalf of which we make the check
917
  @param instance: the instance to check
918
  @param msg: if passed, should be a message to replace the default one
919
  @raise errors.OpPrereqError: if the instance is not in the required state
920

921
  """
922
  if msg is None:
923
    msg = "can't use instance from outside %s states" % ", ".join(req_states)
924
  if instance.admin_state not in req_states:
925
    raise errors.OpPrereqError("Instance %s is marked to be %s, %s" %
926
                               (instance, instance.admin_state, msg),
927
                               errors.ECODE_STATE)
928

    
929
  if constants.ADMINST_UP not in req_states:
930
    pnode = instance.primary_node
931
    ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
932
    ins_l.Raise("Can't contact node %s for instance information" % pnode,
933
                prereq=True, ecode=errors.ECODE_ENVIRON)
934

    
935
    if instance.name in ins_l.payload:
936
      raise errors.OpPrereqError("Instance %s is running, %s" %
937
                                 (instance.name, msg), errors.ECODE_STATE)
938

    
939

    
940
def _ExpandItemName(fn, name, kind):
941
  """Expand an item name.
942

943
  @param fn: the function to use for expansion
944
  @param name: requested item name
945
  @param kind: text description ('Node' or 'Instance')
946
  @return: the resolved (full) name
947
  @raise errors.OpPrereqError: if the item is not found
948

949
  """
950
  full_name = fn(name)
951
  if full_name is None:
952
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
953
                               errors.ECODE_NOENT)
954
  return full_name
955

    
956

    
957
def _ExpandNodeName(cfg, name):
958
  """Wrapper over L{_ExpandItemName} for nodes."""
959
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
960

    
961

    
962
def _ExpandInstanceName(cfg, name):
963
  """Wrapper over L{_ExpandItemName} for instance."""
964
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
965

    
966

    
967
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
968
                          minmem, maxmem, vcpus, nics, disk_template, disks,
969
                          bep, hvp, hypervisor_name, tags):
970
  """Builds instance related env variables for hooks
971

972
  This builds the hook environment from individual variables.
973

974
  @type name: string
975
  @param name: the name of the instance
976
  @type primary_node: string
977
  @param primary_node: the name of the instance's primary node
978
  @type secondary_nodes: list
979
  @param secondary_nodes: list of secondary nodes as strings
980
  @type os_type: string
981
  @param os_type: the name of the instance's OS
982
  @type status: string
983
  @param status: the desired status of the instance
984
  @type minmem: string
985
  @param minmem: the minimum memory size of the instance
986
  @type maxmem: string
987
  @param maxmem: the maximum memory size of the instance
988
  @type vcpus: string
989
  @param vcpus: the count of VCPUs the instance has
990
  @type nics: list
991
  @param nics: list of tuples (ip, mac, mode, link) representing
992
      the NICs the instance has
993
  @type disk_template: string
994
  @param disk_template: the disk template of the instance
995
  @type disks: list
996
  @param disks: the list of (size, mode) pairs
997
  @type bep: dict
998
  @param bep: the backend parameters for the instance
999
  @type hvp: dict
1000
  @param hvp: the hypervisor parameters for the instance
1001
  @type hypervisor_name: string
1002
  @param hypervisor_name: the hypervisor for the instance
1003
  @type tags: list
1004
  @param tags: list of instance tags as strings
1005
  @rtype: dict
1006
  @return: the hook environment for this instance
1007

1008
  """
1009
  env = {
1010
    "OP_TARGET": name,
1011
    "INSTANCE_NAME": name,
1012
    "INSTANCE_PRIMARY": primary_node,
1013
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
1014
    "INSTANCE_OS_TYPE": os_type,
1015
    "INSTANCE_STATUS": status,
1016
    "INSTANCE_MINMEM": minmem,
1017
    "INSTANCE_MAXMEM": maxmem,
1018
    # TODO(2.7) remove deprecated "memory" value
1019
    "INSTANCE_MEMORY": maxmem,
1020
    "INSTANCE_VCPUS": vcpus,
1021
    "INSTANCE_DISK_TEMPLATE": disk_template,
1022
    "INSTANCE_HYPERVISOR": hypervisor_name,
1023
  }
1024
  if nics:
1025
    nic_count = len(nics)
1026
    for idx, (ip, mac, mode, link) in enumerate(nics):
1027
      if ip is None:
1028
        ip = ""
1029
      env["INSTANCE_NIC%d_IP" % idx] = ip
1030
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1031
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1032
      env["INSTANCE_NIC%d_LINK" % idx] = link
1033
      if mode == constants.NIC_MODE_BRIDGED:
1034
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1035
  else:
1036
    nic_count = 0
1037

    
1038
  env["INSTANCE_NIC_COUNT"] = nic_count
1039

    
1040
  if disks:
1041
    disk_count = len(disks)
1042
    for idx, (size, mode) in enumerate(disks):
1043
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1044
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1045
  else:
1046
    disk_count = 0
1047

    
1048
  env["INSTANCE_DISK_COUNT"] = disk_count
1049

    
1050
  if not tags:
1051
    tags = []
1052

    
1053
  env["INSTANCE_TAGS"] = " ".join(tags)
1054

    
1055
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1056
    for key, value in source.items():
1057
      env["INSTANCE_%s_%s" % (kind, key)] = value
1058

    
1059
  return env
1060

    
1061

    
1062
def _NICListToTuple(lu, nics):
1063
  """Build a list of nic information tuples.
1064

1065
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1066
  value in LUInstanceQueryData.
1067

1068
  @type lu:  L{LogicalUnit}
1069
  @param lu: the logical unit on whose behalf we execute
1070
  @type nics: list of L{objects.NIC}
1071
  @param nics: list of nics to convert to hooks tuples
1072

1073
  """
1074
  hooks_nics = []
1075
  cluster = lu.cfg.GetClusterInfo()
1076
  for nic in nics:
1077
    ip = nic.ip
1078
    mac = nic.mac
1079
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1080
    mode = filled_params[constants.NIC_MODE]
1081
    link = filled_params[constants.NIC_LINK]
1082
    hooks_nics.append((ip, mac, mode, link))
1083
  return hooks_nics
1084

    
1085

    
1086
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1087
  """Builds instance related env variables for hooks from an object.
1088

1089
  @type lu: L{LogicalUnit}
1090
  @param lu: the logical unit on whose behalf we execute
1091
  @type instance: L{objects.Instance}
1092
  @param instance: the instance for which we should build the
1093
      environment
1094
  @type override: dict
1095
  @param override: dictionary with key/values that will override
1096
      our values
1097
  @rtype: dict
1098
  @return: the hook environment dictionary
1099

1100
  """
1101
  cluster = lu.cfg.GetClusterInfo()
1102
  bep = cluster.FillBE(instance)
1103
  hvp = cluster.FillHV(instance)
1104
  args = {
1105
    "name": instance.name,
1106
    "primary_node": instance.primary_node,
1107
    "secondary_nodes": instance.secondary_nodes,
1108
    "os_type": instance.os,
1109
    "status": instance.admin_state,
1110
    "maxmem": bep[constants.BE_MAXMEM],
1111
    "minmem": bep[constants.BE_MINMEM],
1112
    "vcpus": bep[constants.BE_VCPUS],
1113
    "nics": _NICListToTuple(lu, instance.nics),
1114
    "disk_template": instance.disk_template,
1115
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1116
    "bep": bep,
1117
    "hvp": hvp,
1118
    "hypervisor_name": instance.hypervisor,
1119
    "tags": instance.tags,
1120
  }
1121
  if override:
1122
    args.update(override)
1123
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1124

    
1125

    
1126
def _AdjustCandidatePool(lu, exceptions):
1127
  """Adjust the candidate pool after node operations.
1128

1129
  """
1130
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1131
  if mod_list:
1132
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1133
               utils.CommaJoin(node.name for node in mod_list))
1134
    for name in mod_list:
1135
      lu.context.ReaddNode(name)
1136
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1137
  if mc_now > mc_max:
1138
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1139
               (mc_now, mc_max))
1140

    
1141

    
1142
def _DecideSelfPromotion(lu, exceptions=None):
1143
  """Decide whether I should promote myself as a master candidate.
1144

1145
  """
1146
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1147
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1148
  # the new node will increase mc_max with one, so:
1149
  mc_should = min(mc_should + 1, cp_size)
1150
  return mc_now < mc_should
1151

    
1152

    
1153
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1154
  """Check that the brigdes needed by a list of nics exist.
1155

1156
  """
1157
  cluster = lu.cfg.GetClusterInfo()
1158
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1159
  brlist = [params[constants.NIC_LINK] for params in paramslist
1160
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1161
  if brlist:
1162
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1163
    result.Raise("Error checking bridges on destination node '%s'" %
1164
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1165

    
1166

    
1167
def _CheckInstanceBridgesExist(lu, instance, node=None):
1168
  """Check that the brigdes needed by an instance exist.
1169

1170
  """
1171
  if node is None:
1172
    node = instance.primary_node
1173
  _CheckNicsBridgesExist(lu, instance.nics, node)
1174

    
1175

    
1176
def _CheckOSVariant(os_obj, name):
1177
  """Check whether an OS name conforms to the os variants specification.
1178

1179
  @type os_obj: L{objects.OS}
1180
  @param os_obj: OS object to check
1181
  @type name: string
1182
  @param name: OS name passed by the user, to check for validity
1183

1184
  """
1185
  variant = objects.OS.GetVariant(name)
1186
  if not os_obj.supported_variants:
1187
    if variant:
1188
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1189
                                 " passed)" % (os_obj.name, variant),
1190
                                 errors.ECODE_INVAL)
1191
    return
1192
  if not variant:
1193
    raise errors.OpPrereqError("OS name must include a variant",
1194
                               errors.ECODE_INVAL)
1195

    
1196
  if variant not in os_obj.supported_variants:
1197
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1198

    
1199

    
1200
def _GetNodeInstancesInner(cfg, fn):
1201
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1202

    
1203

    
1204
def _GetNodeInstances(cfg, node_name):
1205
  """Returns a list of all primary and secondary instances on a node.
1206

1207
  """
1208

    
1209
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1210

    
1211

    
1212
def _GetNodePrimaryInstances(cfg, node_name):
1213
  """Returns primary instances on a node.
1214

1215
  """
1216
  return _GetNodeInstancesInner(cfg,
1217
                                lambda inst: node_name == inst.primary_node)
1218

    
1219

    
1220
def _GetNodeSecondaryInstances(cfg, node_name):
1221
  """Returns secondary instances on a node.
1222

1223
  """
1224
  return _GetNodeInstancesInner(cfg,
1225
                                lambda inst: node_name in inst.secondary_nodes)
1226

    
1227

    
1228
def _GetStorageTypeArgs(cfg, storage_type):
1229
  """Returns the arguments for a storage type.
1230

1231
  """
1232
  # Special case for file storage
1233
  if storage_type == constants.ST_FILE:
1234
    # storage.FileStorage wants a list of storage directories
1235
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1236

    
1237
  return []
1238

    
1239

    
1240
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1241
  faulty = []
1242

    
1243
  for dev in instance.disks:
1244
    cfg.SetDiskID(dev, node_name)
1245

    
1246
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1247
  result.Raise("Failed to get disk status from node %s" % node_name,
1248
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1249

    
1250
  for idx, bdev_status in enumerate(result.payload):
1251
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1252
      faulty.append(idx)
1253

    
1254
  return faulty
1255

    
1256

    
1257
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1258
  """Check the sanity of iallocator and node arguments and use the
1259
  cluster-wide iallocator if appropriate.
1260

1261
  Check that at most one of (iallocator, node) is specified. If none is
1262
  specified, then the LU's opcode's iallocator slot is filled with the
1263
  cluster-wide default iallocator.
1264

1265
  @type iallocator_slot: string
1266
  @param iallocator_slot: the name of the opcode iallocator slot
1267
  @type node_slot: string
1268
  @param node_slot: the name of the opcode target node slot
1269

1270
  """
1271
  node = getattr(lu.op, node_slot, None)
1272
  iallocator = getattr(lu.op, iallocator_slot, None)
1273

    
1274
  if node is not None and iallocator is not None:
1275
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1276
                               errors.ECODE_INVAL)
1277
  elif node is None and iallocator is None:
1278
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1279
    if default_iallocator:
1280
      setattr(lu.op, iallocator_slot, default_iallocator)
1281
    else:
1282
      raise errors.OpPrereqError("No iallocator or node given and no"
1283
                                 " cluster-wide default iallocator found;"
1284
                                 " please specify either an iallocator or a"
1285
                                 " node, or set a cluster-wide default"
1286
                                 " iallocator")
1287

    
1288

    
1289
def _GetDefaultIAllocator(cfg, iallocator):
1290
  """Decides on which iallocator to use.
1291

1292
  @type cfg: L{config.ConfigWriter}
1293
  @param cfg: Cluster configuration object
1294
  @type iallocator: string or None
1295
  @param iallocator: Iallocator specified in opcode
1296
  @rtype: string
1297
  @return: Iallocator name
1298

1299
  """
1300
  if not iallocator:
1301
    # Use default iallocator
1302
    iallocator = cfg.GetDefaultIAllocator()
1303

    
1304
  if not iallocator:
1305
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1306
                               " opcode nor as a cluster-wide default",
1307
                               errors.ECODE_INVAL)
1308

    
1309
  return iallocator
1310

    
1311

    
1312
class LUClusterPostInit(LogicalUnit):
1313
  """Logical unit for running hooks after cluster initialization.
1314

1315
  """
1316
  HPATH = "cluster-init"
1317
  HTYPE = constants.HTYPE_CLUSTER
1318

    
1319
  def BuildHooksEnv(self):
1320
    """Build hooks env.
1321

1322
    """
1323
    return {
1324
      "OP_TARGET": self.cfg.GetClusterName(),
1325
      }
1326

    
1327
  def BuildHooksNodes(self):
1328
    """Build hooks nodes.
1329

1330
    """
1331
    return ([], [self.cfg.GetMasterNode()])
1332

    
1333
  def Exec(self, feedback_fn):
1334
    """Nothing to do.
1335

1336
    """
1337
    return True
1338

    
1339

    
1340
class LUClusterDestroy(LogicalUnit):
1341
  """Logical unit for destroying the cluster.
1342

1343
  """
1344
  HPATH = "cluster-destroy"
1345
  HTYPE = constants.HTYPE_CLUSTER
1346

    
1347
  def BuildHooksEnv(self):
1348
    """Build hooks env.
1349

1350
    """
1351
    return {
1352
      "OP_TARGET": self.cfg.GetClusterName(),
1353
      }
1354

    
1355
  def BuildHooksNodes(self):
1356
    """Build hooks nodes.
1357

1358
    """
1359
    return ([], [])
1360

    
1361
  def CheckPrereq(self):
1362
    """Check prerequisites.
1363

1364
    This checks whether the cluster is empty.
1365

1366
    Any errors are signaled by raising errors.OpPrereqError.
1367

1368
    """
1369
    master = self.cfg.GetMasterNode()
1370

    
1371
    nodelist = self.cfg.GetNodeList()
1372
    if len(nodelist) != 1 or nodelist[0] != master:
1373
      raise errors.OpPrereqError("There are still %d node(s) in"
1374
                                 " this cluster." % (len(nodelist) - 1),
1375
                                 errors.ECODE_INVAL)
1376
    instancelist = self.cfg.GetInstanceList()
1377
    if instancelist:
1378
      raise errors.OpPrereqError("There are still %d instance(s) in"
1379
                                 " this cluster." % len(instancelist),
1380
                                 errors.ECODE_INVAL)
1381

    
1382
  def Exec(self, feedback_fn):
1383
    """Destroys the cluster.
1384

1385
    """
1386
    master_params = self.cfg.GetMasterNetworkParameters()
1387

    
1388
    # Run post hooks on master node before it's removed
1389
    _RunPostHook(self, master_params.name)
1390

    
1391
    ems = self.cfg.GetUseExternalMipScript()
1392
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1393
                                                     master_params, ems)
1394
    result.Raise("Could not disable the master role")
1395

    
1396
    return master_params.name
1397

    
1398

    
1399
def _VerifyCertificate(filename):
1400
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1401

1402
  @type filename: string
1403
  @param filename: Path to PEM file
1404

1405
  """
1406
  try:
1407
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1408
                                           utils.ReadFile(filename))
1409
  except Exception, err: # pylint: disable=W0703
1410
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1411
            "Failed to load X509 certificate %s: %s" % (filename, err))
1412

    
1413
  (errcode, msg) = \
1414
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1415
                                constants.SSL_CERT_EXPIRATION_ERROR)
1416

    
1417
  if msg:
1418
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1419
  else:
1420
    fnamemsg = None
1421

    
1422
  if errcode is None:
1423
    return (None, fnamemsg)
1424
  elif errcode == utils.CERT_WARNING:
1425
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1426
  elif errcode == utils.CERT_ERROR:
1427
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1428

    
1429
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1430

    
1431

    
1432
def _GetAllHypervisorParameters(cluster, instances):
1433
  """Compute the set of all hypervisor parameters.
1434

1435
  @type cluster: L{objects.Cluster}
1436
  @param cluster: the cluster object
1437
  @param instances: list of L{objects.Instance}
1438
  @param instances: additional instances from which to obtain parameters
1439
  @rtype: list of (origin, hypervisor, parameters)
1440
  @return: a list with all parameters found, indicating the hypervisor they
1441
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1442

1443
  """
1444
  hvp_data = []
1445

    
1446
  for hv_name in cluster.enabled_hypervisors:
1447
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1448

    
1449
  for os_name, os_hvp in cluster.os_hvp.items():
1450
    for hv_name, hv_params in os_hvp.items():
1451
      if hv_params:
1452
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1453
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1454

    
1455
  # TODO: collapse identical parameter values in a single one
1456
  for instance in instances:
1457
    if instance.hvparams:
1458
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1459
                       cluster.FillHV(instance)))
1460

    
1461
  return hvp_data
1462

    
1463

    
1464
class _VerifyErrors(object):
1465
  """Mix-in for cluster/group verify LUs.
1466

1467
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1468
  self.op and self._feedback_fn to be available.)
1469

1470
  """
1471

    
1472
  ETYPE_FIELD = "code"
1473
  ETYPE_ERROR = "ERROR"
1474
  ETYPE_WARNING = "WARNING"
1475

    
1476
  def _Error(self, ecode, item, msg, *args, **kwargs):
1477
    """Format an error message.
1478

1479
    Based on the opcode's error_codes parameter, either format a
1480
    parseable error code, or a simpler error string.
1481

1482
    This must be called only from Exec and functions called from Exec.
1483

1484
    """
1485
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1486
    itype, etxt, _ = ecode
1487
    # first complete the msg
1488
    if args:
1489
      msg = msg % args
1490
    # then format the whole message
1491
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1492
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1493
    else:
1494
      if item:
1495
        item = " " + item
1496
      else:
1497
        item = ""
1498
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1499
    # and finally report it via the feedback_fn
1500
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1501

    
1502
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1503
    """Log an error message if the passed condition is True.
1504

1505
    """
1506
    cond = (bool(cond)
1507
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1508

    
1509
    # If the error code is in the list of ignored errors, demote the error to a
1510
    # warning
1511
    (_, etxt, _) = ecode
1512
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1513
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1514

    
1515
    if cond:
1516
      self._Error(ecode, *args, **kwargs)
1517

    
1518
    # do not mark the operation as failed for WARN cases only
1519
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1520
      self.bad = self.bad or cond
1521

    
1522

    
1523
class LUClusterVerify(NoHooksLU):
1524
  """Submits all jobs necessary to verify the cluster.
1525

1526
  """
1527
  REQ_BGL = False
1528

    
1529
  def ExpandNames(self):
1530
    self.needed_locks = {}
1531

    
1532
  def Exec(self, feedback_fn):
1533
    jobs = []
1534

    
1535
    if self.op.group_name:
1536
      groups = [self.op.group_name]
1537
      depends_fn = lambda: None
1538
    else:
1539
      groups = self.cfg.GetNodeGroupList()
1540

    
1541
      # Verify global configuration
1542
      jobs.append([
1543
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1544
        ])
1545

    
1546
      # Always depend on global verification
1547
      depends_fn = lambda: [(-len(jobs), [])]
1548

    
1549
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1550
                                            ignore_errors=self.op.ignore_errors,
1551
                                            depends=depends_fn())]
1552
                for group in groups)
1553

    
1554
    # Fix up all parameters
1555
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1556
      op.debug_simulate_errors = self.op.debug_simulate_errors
1557
      op.verbose = self.op.verbose
1558
      op.error_codes = self.op.error_codes
1559
      try:
1560
        op.skip_checks = self.op.skip_checks
1561
      except AttributeError:
1562
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1563

    
1564
    return ResultWithJobs(jobs)
1565

    
1566

    
1567
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1568
  """Verifies the cluster config.
1569

1570
  """
1571
  REQ_BGL = True
1572

    
1573
  def _VerifyHVP(self, hvp_data):
1574
    """Verifies locally the syntax of the hypervisor parameters.
1575

1576
    """
1577
    for item, hv_name, hv_params in hvp_data:
1578
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1579
             (item, hv_name))
1580
      try:
1581
        hv_class = hypervisor.GetHypervisor(hv_name)
1582
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1583
        hv_class.CheckParameterSyntax(hv_params)
1584
      except errors.GenericError, err:
1585
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1586

    
1587
  def ExpandNames(self):
1588
    # Information can be safely retrieved as the BGL is acquired in exclusive
1589
    # mode
1590
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1591
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1592
    self.all_node_info = self.cfg.GetAllNodesInfo()
1593
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1594
    self.needed_locks = {}
1595

    
1596
  def Exec(self, feedback_fn):
1597
    """Verify integrity of cluster, performing various test on nodes.
1598

1599
    """
1600
    self.bad = False
1601
    self._feedback_fn = feedback_fn
1602

    
1603
    feedback_fn("* Verifying cluster config")
1604

    
1605
    for msg in self.cfg.VerifyConfig():
1606
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1607

    
1608
    feedback_fn("* Verifying cluster certificate files")
1609

    
1610
    for cert_filename in constants.ALL_CERT_FILES:
1611
      (errcode, msg) = _VerifyCertificate(cert_filename)
1612
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1613

    
1614
    feedback_fn("* Verifying hypervisor parameters")
1615

    
1616
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1617
                                                self.all_inst_info.values()))
1618

    
1619
    feedback_fn("* Verifying all nodes belong to an existing group")
1620

    
1621
    # We do this verification here because, should this bogus circumstance
1622
    # occur, it would never be caught by VerifyGroup, which only acts on
1623
    # nodes/instances reachable from existing node groups.
1624

    
1625
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1626
                         if node.group not in self.all_group_info)
1627

    
1628
    dangling_instances = {}
1629
    no_node_instances = []
1630

    
1631
    for inst in self.all_inst_info.values():
1632
      if inst.primary_node in dangling_nodes:
1633
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1634
      elif inst.primary_node not in self.all_node_info:
1635
        no_node_instances.append(inst.name)
1636

    
1637
    pretty_dangling = [
1638
        "%s (%s)" %
1639
        (node.name,
1640
         utils.CommaJoin(dangling_instances.get(node.name,
1641
                                                ["no instances"])))
1642
        for node in dangling_nodes]
1643

    
1644
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1645
                  None,
1646
                  "the following nodes (and their instances) belong to a non"
1647
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1648

    
1649
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1650
                  None,
1651
                  "the following instances have a non-existing primary-node:"
1652
                  " %s", utils.CommaJoin(no_node_instances))
1653

    
1654
    return not self.bad
1655

    
1656

    
1657
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1658
  """Verifies the status of a node group.
1659

1660
  """
1661
  HPATH = "cluster-verify"
1662
  HTYPE = constants.HTYPE_CLUSTER
1663
  REQ_BGL = False
1664

    
1665
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1666

    
1667
  class NodeImage(object):
1668
    """A class representing the logical and physical status of a node.
1669

1670
    @type name: string
1671
    @ivar name: the node name to which this object refers
1672
    @ivar volumes: a structure as returned from
1673
        L{ganeti.backend.GetVolumeList} (runtime)
1674
    @ivar instances: a list of running instances (runtime)
1675
    @ivar pinst: list of configured primary instances (config)
1676
    @ivar sinst: list of configured secondary instances (config)
1677
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1678
        instances for which this node is secondary (config)
1679
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1680
    @ivar dfree: free disk, as reported by the node (runtime)
1681
    @ivar offline: the offline status (config)
1682
    @type rpc_fail: boolean
1683
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1684
        not whether the individual keys were correct) (runtime)
1685
    @type lvm_fail: boolean
1686
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1687
    @type hyp_fail: boolean
1688
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1689
    @type ghost: boolean
1690
    @ivar ghost: whether this is a known node or not (config)
1691
    @type os_fail: boolean
1692
    @ivar os_fail: whether the RPC call didn't return valid OS data
1693
    @type oslist: list
1694
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1695
    @type vm_capable: boolean
1696
    @ivar vm_capable: whether the node can host instances
1697

1698
    """
1699
    def __init__(self, offline=False, name=None, vm_capable=True):
1700
      self.name = name
1701
      self.volumes = {}
1702
      self.instances = []
1703
      self.pinst = []
1704
      self.sinst = []
1705
      self.sbp = {}
1706
      self.mfree = 0
1707
      self.dfree = 0
1708
      self.offline = offline
1709
      self.vm_capable = vm_capable
1710
      self.rpc_fail = False
1711
      self.lvm_fail = False
1712
      self.hyp_fail = False
1713
      self.ghost = False
1714
      self.os_fail = False
1715
      self.oslist = {}
1716

    
1717
  def ExpandNames(self):
1718
    # This raises errors.OpPrereqError on its own:
1719
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1720

    
1721
    # Get instances in node group; this is unsafe and needs verification later
1722
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1723

    
1724
    self.needed_locks = {
1725
      locking.LEVEL_INSTANCE: inst_names,
1726
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1727
      locking.LEVEL_NODE: [],
1728
      }
1729

    
1730
    self.share_locks = _ShareAll()
1731

    
1732
  def DeclareLocks(self, level):
1733
    if level == locking.LEVEL_NODE:
1734
      # Get members of node group; this is unsafe and needs verification later
1735
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1736

    
1737
      all_inst_info = self.cfg.GetAllInstancesInfo()
1738

    
1739
      # In Exec(), we warn about mirrored instances that have primary and
1740
      # secondary living in separate node groups. To fully verify that
1741
      # volumes for these instances are healthy, we will need to do an
1742
      # extra call to their secondaries. We ensure here those nodes will
1743
      # be locked.
1744
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1745
        # Important: access only the instances whose lock is owned
1746
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1747
          nodes.update(all_inst_info[inst].secondary_nodes)
1748

    
1749
      self.needed_locks[locking.LEVEL_NODE] = nodes
1750

    
1751
  def CheckPrereq(self):
1752
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1753
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1754

    
1755
    group_nodes = set(self.group_info.members)
1756
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1757

    
1758
    unlocked_nodes = \
1759
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1760

    
1761
    unlocked_instances = \
1762
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1763

    
1764
    if unlocked_nodes:
1765
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1766
                                 utils.CommaJoin(unlocked_nodes))
1767

    
1768
    if unlocked_instances:
1769
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1770
                                 utils.CommaJoin(unlocked_instances))
1771

    
1772
    self.all_node_info = self.cfg.GetAllNodesInfo()
1773
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1774

    
1775
    self.my_node_names = utils.NiceSort(group_nodes)
1776
    self.my_inst_names = utils.NiceSort(group_instances)
1777

    
1778
    self.my_node_info = dict((name, self.all_node_info[name])
1779
                             for name in self.my_node_names)
1780

    
1781
    self.my_inst_info = dict((name, self.all_inst_info[name])
1782
                             for name in self.my_inst_names)
1783

    
1784
    # We detect here the nodes that will need the extra RPC calls for verifying
1785
    # split LV volumes; they should be locked.
1786
    extra_lv_nodes = set()
1787

    
1788
    for inst in self.my_inst_info.values():
1789
      if inst.disk_template in constants.DTS_INT_MIRROR:
1790
        group = self.my_node_info[inst.primary_node].group
1791
        for nname in inst.secondary_nodes:
1792
          if self.all_node_info[nname].group != group:
1793
            extra_lv_nodes.add(nname)
1794

    
1795
    unlocked_lv_nodes = \
1796
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1797

    
1798
    if unlocked_lv_nodes:
1799
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1800
                                 utils.CommaJoin(unlocked_lv_nodes))
1801
    self.extra_lv_nodes = list(extra_lv_nodes)
1802

    
1803
  def _VerifyNode(self, ninfo, nresult):
1804
    """Perform some basic validation on data returned from a node.
1805

1806
      - check the result data structure is well formed and has all the
1807
        mandatory fields
1808
      - check ganeti version
1809

1810
    @type ninfo: L{objects.Node}
1811
    @param ninfo: the node to check
1812
    @param nresult: the results from the node
1813
    @rtype: boolean
1814
    @return: whether overall this call was successful (and we can expect
1815
         reasonable values in the respose)
1816

1817
    """
1818
    node = ninfo.name
1819
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1820

    
1821
    # main result, nresult should be a non-empty dict
1822
    test = not nresult or not isinstance(nresult, dict)
1823
    _ErrorIf(test, constants.CV_ENODERPC, node,
1824
                  "unable to verify node: no data returned")
1825
    if test:
1826
      return False
1827

    
1828
    # compares ganeti version
1829
    local_version = constants.PROTOCOL_VERSION
1830
    remote_version = nresult.get("version", None)
1831
    test = not (remote_version and
1832
                isinstance(remote_version, (list, tuple)) and
1833
                len(remote_version) == 2)
1834
    _ErrorIf(test, constants.CV_ENODERPC, node,
1835
             "connection to node returned invalid data")
1836
    if test:
1837
      return False
1838

    
1839
    test = local_version != remote_version[0]
1840
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1841
             "incompatible protocol versions: master %s,"
1842
             " node %s", local_version, remote_version[0])
1843
    if test:
1844
      return False
1845

    
1846
    # node seems compatible, we can actually try to look into its results
1847

    
1848
    # full package version
1849
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1850
                  constants.CV_ENODEVERSION, node,
1851
                  "software version mismatch: master %s, node %s",
1852
                  constants.RELEASE_VERSION, remote_version[1],
1853
                  code=self.ETYPE_WARNING)
1854

    
1855
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1856
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1857
      for hv_name, hv_result in hyp_result.iteritems():
1858
        test = hv_result is not None
1859
        _ErrorIf(test, constants.CV_ENODEHV, node,
1860
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1861

    
1862
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1863
    if ninfo.vm_capable and isinstance(hvp_result, list):
1864
      for item, hv_name, hv_result in hvp_result:
1865
        _ErrorIf(True, constants.CV_ENODEHV, node,
1866
                 "hypervisor %s parameter verify failure (source %s): %s",
1867
                 hv_name, item, hv_result)
1868

    
1869
    test = nresult.get(constants.NV_NODESETUP,
1870
                       ["Missing NODESETUP results"])
1871
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1872
             "; ".join(test))
1873

    
1874
    return True
1875

    
1876
  def _VerifyNodeTime(self, ninfo, nresult,
1877
                      nvinfo_starttime, nvinfo_endtime):
1878
    """Check the node time.
1879

1880
    @type ninfo: L{objects.Node}
1881
    @param ninfo: the node to check
1882
    @param nresult: the remote results for the node
1883
    @param nvinfo_starttime: the start time of the RPC call
1884
    @param nvinfo_endtime: the end time of the RPC call
1885

1886
    """
1887
    node = ninfo.name
1888
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1889

    
1890
    ntime = nresult.get(constants.NV_TIME, None)
1891
    try:
1892
      ntime_merged = utils.MergeTime(ntime)
1893
    except (ValueError, TypeError):
1894
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1895
      return
1896

    
1897
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1898
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1899
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1900
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1901
    else:
1902
      ntime_diff = None
1903

    
1904
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1905
             "Node time diverges by at least %s from master node time",
1906
             ntime_diff)
1907

    
1908
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1909
    """Check the node LVM results.
1910

1911
    @type ninfo: L{objects.Node}
1912
    @param ninfo: the node to check
1913
    @param nresult: the remote results for the node
1914
    @param vg_name: the configured VG name
1915

1916
    """
1917
    if vg_name is None:
1918
      return
1919

    
1920
    node = ninfo.name
1921
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1922

    
1923
    # checks vg existence and size > 20G
1924
    vglist = nresult.get(constants.NV_VGLIST, None)
1925
    test = not vglist
1926
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1927
    if not test:
1928
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1929
                                            constants.MIN_VG_SIZE)
1930
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1931

    
1932
    # check pv names
1933
    pvlist = nresult.get(constants.NV_PVLIST, None)
1934
    test = pvlist is None
1935
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1936
    if not test:
1937
      # check that ':' is not present in PV names, since it's a
1938
      # special character for lvcreate (denotes the range of PEs to
1939
      # use on the PV)
1940
      for _, pvname, owner_vg in pvlist:
1941
        test = ":" in pvname
1942
        _ErrorIf(test, constants.CV_ENODELVM, node,
1943
                 "Invalid character ':' in PV '%s' of VG '%s'",
1944
                 pvname, owner_vg)
1945

    
1946
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1947
    """Check the node bridges.
1948

1949
    @type ninfo: L{objects.Node}
1950
    @param ninfo: the node to check
1951
    @param nresult: the remote results for the node
1952
    @param bridges: the expected list of bridges
1953

1954
    """
1955
    if not bridges:
1956
      return
1957

    
1958
    node = ninfo.name
1959
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1960

    
1961
    missing = nresult.get(constants.NV_BRIDGES, None)
1962
    test = not isinstance(missing, list)
1963
    _ErrorIf(test, constants.CV_ENODENET, node,
1964
             "did not return valid bridge information")
1965
    if not test:
1966
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1967
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1968

    
1969
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1970
    """Check the results of user scripts presence and executability on the node
1971

1972
    @type ninfo: L{objects.Node}
1973
    @param ninfo: the node to check
1974
    @param nresult: the remote results for the node
1975

1976
    """
1977
    node = ninfo.name
1978

    
1979
    test = not constants.NV_USERSCRIPTS in nresult
1980
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1981
                  "did not return user scripts information")
1982

    
1983
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1984
    if not test:
1985
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1986
                    "user scripts not present or not executable: %s" %
1987
                    utils.CommaJoin(sorted(broken_scripts)))
1988

    
1989
  def _VerifyNodeNetwork(self, ninfo, nresult):
1990
    """Check the node network connectivity results.
1991

1992
    @type ninfo: L{objects.Node}
1993
    @param ninfo: the node to check
1994
    @param nresult: the remote results for the node
1995

1996
    """
1997
    node = ninfo.name
1998
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1999

    
2000
    test = constants.NV_NODELIST not in nresult
2001
    _ErrorIf(test, constants.CV_ENODESSH, node,
2002
             "node hasn't returned node ssh connectivity data")
2003
    if not test:
2004
      if nresult[constants.NV_NODELIST]:
2005
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
2006
          _ErrorIf(True, constants.CV_ENODESSH, node,
2007
                   "ssh communication with node '%s': %s", a_node, a_msg)
2008

    
2009
    test = constants.NV_NODENETTEST not in nresult
2010
    _ErrorIf(test, constants.CV_ENODENET, node,
2011
             "node hasn't returned node tcp connectivity data")
2012
    if not test:
2013
      if nresult[constants.NV_NODENETTEST]:
2014
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
2015
        for anode in nlist:
2016
          _ErrorIf(True, constants.CV_ENODENET, node,
2017
                   "tcp communication with node '%s': %s",
2018
                   anode, nresult[constants.NV_NODENETTEST][anode])
2019

    
2020
    test = constants.NV_MASTERIP not in nresult
2021
    _ErrorIf(test, constants.CV_ENODENET, node,
2022
             "node hasn't returned node master IP reachability data")
2023
    if not test:
2024
      if not nresult[constants.NV_MASTERIP]:
2025
        if node == self.master_node:
2026
          msg = "the master node cannot reach the master IP (not configured?)"
2027
        else:
2028
          msg = "cannot reach the master IP"
2029
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2030

    
2031
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2032
                      diskstatus):
2033
    """Verify an instance.
2034

2035
    This function checks to see if the required block devices are
2036
    available on the instance's node.
2037

2038
    """
2039
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2040
    node_current = instanceconfig.primary_node
2041

    
2042
    node_vol_should = {}
2043
    instanceconfig.MapLVsByNode(node_vol_should)
2044

    
2045
    for node in node_vol_should:
2046
      n_img = node_image[node]
2047
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2048
        # ignore missing volumes on offline or broken nodes
2049
        continue
2050
      for volume in node_vol_should[node]:
2051
        test = volume not in n_img.volumes
2052
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2053
                 "volume %s missing on node %s", volume, node)
2054

    
2055
    if instanceconfig.admin_state == constants.ADMINST_UP:
2056
      pri_img = node_image[node_current]
2057
      test = instance not in pri_img.instances and not pri_img.offline
2058
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2059
               "instance not running on its primary node %s",
2060
               node_current)
2061

    
2062
    diskdata = [(nname, success, status, idx)
2063
                for (nname, disks) in diskstatus.items()
2064
                for idx, (success, status) in enumerate(disks)]
2065

    
2066
    for nname, success, bdev_status, idx in diskdata:
2067
      # the 'ghost node' construction in Exec() ensures that we have a
2068
      # node here
2069
      snode = node_image[nname]
2070
      bad_snode = snode.ghost or snode.offline
2071
      _ErrorIf(instanceconfig.admin_state == constants.ADMINST_UP and
2072
               not success and not bad_snode,
2073
               constants.CV_EINSTANCEFAULTYDISK, instance,
2074
               "couldn't retrieve status for disk/%s on %s: %s",
2075
               idx, nname, bdev_status)
2076
      _ErrorIf((instanceconfig.admin_state == constants.ADMINST_UP and
2077
                success and bdev_status.ldisk_status == constants.LDS_FAULTY),
2078
               constants.CV_EINSTANCEFAULTYDISK, instance,
2079
               "disk/%s on %s is faulty", idx, nname)
2080

    
2081
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2082
    """Verify if there are any unknown volumes in the cluster.
2083

2084
    The .os, .swap and backup volumes are ignored. All other volumes are
2085
    reported as unknown.
2086

2087
    @type reserved: L{ganeti.utils.FieldSet}
2088
    @param reserved: a FieldSet of reserved volume names
2089

2090
    """
2091
    for node, n_img in node_image.items():
2092
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2093
        # skip non-healthy nodes
2094
        continue
2095
      for volume in n_img.volumes:
2096
        test = ((node not in node_vol_should or
2097
                volume not in node_vol_should[node]) and
2098
                not reserved.Matches(volume))
2099
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2100
                      "volume %s is unknown", volume)
2101

    
2102
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2103
    """Verify N+1 Memory Resilience.
2104

2105
    Check that if one single node dies we can still start all the
2106
    instances it was primary for.
2107

2108
    """
2109
    cluster_info = self.cfg.GetClusterInfo()
2110
    for node, n_img in node_image.items():
2111
      # This code checks that every node which is now listed as
2112
      # secondary has enough memory to host all instances it is
2113
      # supposed to should a single other node in the cluster fail.
2114
      # FIXME: not ready for failover to an arbitrary node
2115
      # FIXME: does not support file-backed instances
2116
      # WARNING: we currently take into account down instances as well
2117
      # as up ones, considering that even if they're down someone
2118
      # might want to start them even in the event of a node failure.
2119
      if n_img.offline:
2120
        # we're skipping offline nodes from the N+1 warning, since
2121
        # most likely we don't have good memory infromation from them;
2122
        # we already list instances living on such nodes, and that's
2123
        # enough warning
2124
        continue
2125
      #TODO(dynmem): use MINMEM for checking
2126
      #TODO(dynmem): also consider ballooning out other instances
2127
      for prinode, instances in n_img.sbp.items():
2128
        needed_mem = 0
2129
        for instance in instances:
2130
          bep = cluster_info.FillBE(instance_cfg[instance])
2131
          if bep[constants.BE_AUTO_BALANCE]:
2132
            needed_mem += bep[constants.BE_MAXMEM]
2133
        test = n_img.mfree < needed_mem
2134
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2135
                      "not enough memory to accomodate instance failovers"
2136
                      " should node %s fail (%dMiB needed, %dMiB available)",
2137
                      prinode, needed_mem, n_img.mfree)
2138

    
2139
  @classmethod
2140
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2141
                   (files_all, files_opt, files_mc, files_vm)):
2142
    """Verifies file checksums collected from all nodes.
2143

2144
    @param errorif: Callback for reporting errors
2145
    @param nodeinfo: List of L{objects.Node} objects
2146
    @param master_node: Name of master node
2147
    @param all_nvinfo: RPC results
2148

2149
    """
2150
    # Define functions determining which nodes to consider for a file
2151
    files2nodefn = [
2152
      (files_all, None),
2153
      (files_mc, lambda node: (node.master_candidate or
2154
                               node.name == master_node)),
2155
      (files_vm, lambda node: node.vm_capable),
2156
      ]
2157

    
2158
    # Build mapping from filename to list of nodes which should have the file
2159
    nodefiles = {}
2160
    for (files, fn) in files2nodefn:
2161
      if fn is None:
2162
        filenodes = nodeinfo
2163
      else:
2164
        filenodes = filter(fn, nodeinfo)
2165
      nodefiles.update((filename,
2166
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2167
                       for filename in files)
2168

    
2169
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2170

    
2171
    fileinfo = dict((filename, {}) for filename in nodefiles)
2172
    ignore_nodes = set()
2173

    
2174
    for node in nodeinfo:
2175
      if node.offline:
2176
        ignore_nodes.add(node.name)
2177
        continue
2178

    
2179
      nresult = all_nvinfo[node.name]
2180

    
2181
      if nresult.fail_msg or not nresult.payload:
2182
        node_files = None
2183
      else:
2184
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2185

    
2186
      test = not (node_files and isinstance(node_files, dict))
2187
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2188
              "Node did not return file checksum data")
2189
      if test:
2190
        ignore_nodes.add(node.name)
2191
        continue
2192

    
2193
      # Build per-checksum mapping from filename to nodes having it
2194
      for (filename, checksum) in node_files.items():
2195
        assert filename in nodefiles
2196
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2197

    
2198
    for (filename, checksums) in fileinfo.items():
2199
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2200

    
2201
      # Nodes having the file
2202
      with_file = frozenset(node_name
2203
                            for nodes in fileinfo[filename].values()
2204
                            for node_name in nodes) - ignore_nodes
2205

    
2206
      expected_nodes = nodefiles[filename] - ignore_nodes
2207

    
2208
      # Nodes missing file
2209
      missing_file = expected_nodes - with_file
2210

    
2211
      if filename in files_opt:
2212
        # All or no nodes
2213
        errorif(missing_file and missing_file != expected_nodes,
2214
                constants.CV_ECLUSTERFILECHECK, None,
2215
                "File %s is optional, but it must exist on all or no"
2216
                " nodes (not found on %s)",
2217
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2218
      else:
2219
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2220
                "File %s is missing from node(s) %s", filename,
2221
                utils.CommaJoin(utils.NiceSort(missing_file)))
2222

    
2223
        # Warn if a node has a file it shouldn't
2224
        unexpected = with_file - expected_nodes
2225
        errorif(unexpected,
2226
                constants.CV_ECLUSTERFILECHECK, None,
2227
                "File %s should not exist on node(s) %s",
2228
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2229

    
2230
      # See if there are multiple versions of the file
2231
      test = len(checksums) > 1
2232
      if test:
2233
        variants = ["variant %s on %s" %
2234
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2235
                    for (idx, (checksum, nodes)) in
2236
                      enumerate(sorted(checksums.items()))]
2237
      else:
2238
        variants = []
2239

    
2240
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2241
              "File %s found with %s different checksums (%s)",
2242
              filename, len(checksums), "; ".join(variants))
2243

    
2244
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2245
                      drbd_map):
2246
    """Verifies and the node DRBD status.
2247

2248
    @type ninfo: L{objects.Node}
2249
    @param ninfo: the node to check
2250
    @param nresult: the remote results for the node
2251
    @param instanceinfo: the dict of instances
2252
    @param drbd_helper: the configured DRBD usermode helper
2253
    @param drbd_map: the DRBD map as returned by
2254
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2255

2256
    """
2257
    node = ninfo.name
2258
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2259

    
2260
    if drbd_helper:
2261
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2262
      test = (helper_result == None)
2263
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2264
               "no drbd usermode helper returned")
2265
      if helper_result:
2266
        status, payload = helper_result
2267
        test = not status
2268
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2269
                 "drbd usermode helper check unsuccessful: %s", payload)
2270
        test = status and (payload != drbd_helper)
2271
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2272
                 "wrong drbd usermode helper: %s", payload)
2273

    
2274
    # compute the DRBD minors
2275
    node_drbd = {}
2276
    for minor, instance in drbd_map[node].items():
2277
      test = instance not in instanceinfo
2278
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2279
               "ghost instance '%s' in temporary DRBD map", instance)
2280
        # ghost instance should not be running, but otherwise we
2281
        # don't give double warnings (both ghost instance and
2282
        # unallocated minor in use)
2283
      if test:
2284
        node_drbd[minor] = (instance, False)
2285
      else:
2286
        instance = instanceinfo[instance]
2287
        node_drbd[minor] = (instance.name,
2288
                            instance.admin_state == constants.ADMINST_UP)
2289

    
2290
    # and now check them
2291
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2292
    test = not isinstance(used_minors, (tuple, list))
2293
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2294
             "cannot parse drbd status file: %s", str(used_minors))
2295
    if test:
2296
      # we cannot check drbd status
2297
      return
2298

    
2299
    for minor, (iname, must_exist) in node_drbd.items():
2300
      test = minor not in used_minors and must_exist
2301
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2302
               "drbd minor %d of instance %s is not active", minor, iname)
2303
    for minor in used_minors:
2304
      test = minor not in node_drbd
2305
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2306
               "unallocated drbd minor %d is in use", minor)
2307

    
2308
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2309
    """Builds the node OS structures.
2310

2311
    @type ninfo: L{objects.Node}
2312
    @param ninfo: the node to check
2313
    @param nresult: the remote results for the node
2314
    @param nimg: the node image object
2315

2316
    """
2317
    node = ninfo.name
2318
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2319

    
2320
    remote_os = nresult.get(constants.NV_OSLIST, None)
2321
    test = (not isinstance(remote_os, list) or
2322
            not compat.all(isinstance(v, list) and len(v) == 7
2323
                           for v in remote_os))
2324

    
2325
    _ErrorIf(test, constants.CV_ENODEOS, node,
2326
             "node hasn't returned valid OS data")
2327

    
2328
    nimg.os_fail = test
2329

    
2330
    if test:
2331
      return
2332

    
2333
    os_dict = {}
2334

    
2335
    for (name, os_path, status, diagnose,
2336
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2337

    
2338
      if name not in os_dict:
2339
        os_dict[name] = []
2340

    
2341
      # parameters is a list of lists instead of list of tuples due to
2342
      # JSON lacking a real tuple type, fix it:
2343
      parameters = [tuple(v) for v in parameters]
2344
      os_dict[name].append((os_path, status, diagnose,
2345
                            set(variants), set(parameters), set(api_ver)))
2346

    
2347
    nimg.oslist = os_dict
2348

    
2349
  def _VerifyNodeOS(self, ninfo, nimg, base):
2350
    """Verifies the node OS list.
2351

2352
    @type ninfo: L{objects.Node}
2353
    @param ninfo: the node to check
2354
    @param nimg: the node image object
2355
    @param base: the 'template' node we match against (e.g. from the master)
2356

2357
    """
2358
    node = ninfo.name
2359
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2360

    
2361
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2362

    
2363
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2364
    for os_name, os_data in nimg.oslist.items():
2365
      assert os_data, "Empty OS status for OS %s?!" % os_name
2366
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2367
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2368
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2369
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2370
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2371
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2372
      # comparisons with the 'base' image
2373
      test = os_name not in base.oslist
2374
      _ErrorIf(test, constants.CV_ENODEOS, node,
2375
               "Extra OS %s not present on reference node (%s)",
2376
               os_name, base.name)
2377
      if test:
2378
        continue
2379
      assert base.oslist[os_name], "Base node has empty OS status?"
2380
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2381
      if not b_status:
2382
        # base OS is invalid, skipping
2383
        continue
2384
      for kind, a, b in [("API version", f_api, b_api),
2385
                         ("variants list", f_var, b_var),
2386
                         ("parameters", beautify_params(f_param),
2387
                          beautify_params(b_param))]:
2388
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2389
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2390
                 kind, os_name, base.name,
2391
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2392

    
2393
    # check any missing OSes
2394
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2395
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2396
             "OSes present on reference node %s but missing on this node: %s",
2397
             base.name, utils.CommaJoin(missing))
2398

    
2399
  def _VerifyOob(self, ninfo, nresult):
2400
    """Verifies out of band functionality of a node.
2401

2402
    @type ninfo: L{objects.Node}
2403
    @param ninfo: the node to check
2404
    @param nresult: the remote results for the node
2405

2406
    """
2407
    node = ninfo.name
2408
    # We just have to verify the paths on master and/or master candidates
2409
    # as the oob helper is invoked on the master
2410
    if ((ninfo.master_candidate or ninfo.master_capable) and
2411
        constants.NV_OOB_PATHS in nresult):
2412
      for path_result in nresult[constants.NV_OOB_PATHS]:
2413
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2414

    
2415
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2416
    """Verifies and updates the node volume data.
2417

2418
    This function will update a L{NodeImage}'s internal structures
2419
    with data from the remote call.
2420

2421
    @type ninfo: L{objects.Node}
2422
    @param ninfo: the node to check
2423
    @param nresult: the remote results for the node
2424
    @param nimg: the node image object
2425
    @param vg_name: the configured VG name
2426

2427
    """
2428
    node = ninfo.name
2429
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2430

    
2431
    nimg.lvm_fail = True
2432
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2433
    if vg_name is None:
2434
      pass
2435
    elif isinstance(lvdata, basestring):
2436
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2437
               utils.SafeEncode(lvdata))
2438
    elif not isinstance(lvdata, dict):
2439
      _ErrorIf(True, constants.CV_ENODELVM, node,
2440
               "rpc call to node failed (lvlist)")
2441
    else:
2442
      nimg.volumes = lvdata
2443
      nimg.lvm_fail = False
2444

    
2445
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2446
    """Verifies and updates the node instance list.
2447

2448
    If the listing was successful, then updates this node's instance
2449
    list. Otherwise, it marks the RPC call as failed for the instance
2450
    list key.
2451

2452
    @type ninfo: L{objects.Node}
2453
    @param ninfo: the node to check
2454
    @param nresult: the remote results for the node
2455
    @param nimg: the node image object
2456

2457
    """
2458
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2459
    test = not isinstance(idata, list)
2460
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2461
                  "rpc call to node failed (instancelist): %s",
2462
                  utils.SafeEncode(str(idata)))
2463
    if test:
2464
      nimg.hyp_fail = True
2465
    else:
2466
      nimg.instances = idata
2467

    
2468
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2469
    """Verifies and computes a node information map
2470

2471
    @type ninfo: L{objects.Node}
2472
    @param ninfo: the node to check
2473
    @param nresult: the remote results for the node
2474
    @param nimg: the node image object
2475
    @param vg_name: the configured VG name
2476

2477
    """
2478
    node = ninfo.name
2479
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2480

    
2481
    # try to read free memory (from the hypervisor)
2482
    hv_info = nresult.get(constants.NV_HVINFO, None)
2483
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2484
    _ErrorIf(test, constants.CV_ENODEHV, node,
2485
             "rpc call to node failed (hvinfo)")
2486
    if not test:
2487
      try:
2488
        nimg.mfree = int(hv_info["memory_free"])
2489
      except (ValueError, TypeError):
2490
        _ErrorIf(True, constants.CV_ENODERPC, node,
2491
                 "node returned invalid nodeinfo, check hypervisor")
2492

    
2493
    # FIXME: devise a free space model for file based instances as well
2494
    if vg_name is not None:
2495
      test = (constants.NV_VGLIST not in nresult or
2496
              vg_name not in nresult[constants.NV_VGLIST])
2497
      _ErrorIf(test, constants.CV_ENODELVM, node,
2498
               "node didn't return data for the volume group '%s'"
2499
               " - it is either missing or broken", vg_name)
2500
      if not test:
2501
        try:
2502
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2503
        except (ValueError, TypeError):
2504
          _ErrorIf(True, constants.CV_ENODERPC, node,
2505
                   "node returned invalid LVM info, check LVM status")
2506

    
2507
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2508
    """Gets per-disk status information for all instances.
2509

2510
    @type nodelist: list of strings
2511
    @param nodelist: Node names
2512
    @type node_image: dict of (name, L{objects.Node})
2513
    @param node_image: Node objects
2514
    @type instanceinfo: dict of (name, L{objects.Instance})
2515
    @param instanceinfo: Instance objects
2516
    @rtype: {instance: {node: [(succes, payload)]}}
2517
    @return: a dictionary of per-instance dictionaries with nodes as
2518
        keys and disk information as values; the disk information is a
2519
        list of tuples (success, payload)
2520

2521
    """
2522
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2523

    
2524
    node_disks = {}
2525
    node_disks_devonly = {}
2526
    diskless_instances = set()
2527
    diskless = constants.DT_DISKLESS
2528

    
2529
    for nname in nodelist:
2530
      node_instances = list(itertools.chain(node_image[nname].pinst,
2531
                                            node_image[nname].sinst))
2532
      diskless_instances.update(inst for inst in node_instances
2533
                                if instanceinfo[inst].disk_template == diskless)
2534
      disks = [(inst, disk)
2535
               for inst in node_instances
2536
               for disk in instanceinfo[inst].disks]
2537

    
2538
      if not disks:
2539
        # No need to collect data
2540
        continue
2541

    
2542
      node_disks[nname] = disks
2543

    
2544
      # Creating copies as SetDiskID below will modify the objects and that can
2545
      # lead to incorrect data returned from nodes
2546
      devonly = [dev.Copy() for (_, dev) in disks]
2547

    
2548
      for dev in devonly:
2549
        self.cfg.SetDiskID(dev, nname)
2550

    
2551
      node_disks_devonly[nname] = devonly
2552

    
2553
    assert len(node_disks) == len(node_disks_devonly)
2554

    
2555
    # Collect data from all nodes with disks
2556
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2557
                                                          node_disks_devonly)
2558

    
2559
    assert len(result) == len(node_disks)
2560

    
2561
    instdisk = {}
2562

    
2563
    for (nname, nres) in result.items():
2564
      disks = node_disks[nname]
2565

    
2566
      if nres.offline:
2567
        # No data from this node
2568
        data = len(disks) * [(False, "node offline")]
2569
      else:
2570
        msg = nres.fail_msg
2571
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2572
                 "while getting disk information: %s", msg)
2573
        if msg:
2574
          # No data from this node
2575
          data = len(disks) * [(False, msg)]
2576
        else:
2577
          data = []
2578
          for idx, i in enumerate(nres.payload):
2579
            if isinstance(i, (tuple, list)) and len(i) == 2:
2580
              data.append(i)
2581
            else:
2582
              logging.warning("Invalid result from node %s, entry %d: %s",
2583
                              nname, idx, i)
2584
              data.append((False, "Invalid result from the remote node"))
2585

    
2586
      for ((inst, _), status) in zip(disks, data):
2587
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2588

    
2589
    # Add empty entries for diskless instances.
2590
    for inst in diskless_instances:
2591
      assert inst not in instdisk
2592
      instdisk[inst] = {}
2593

    
2594
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2595
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2596
                      compat.all(isinstance(s, (tuple, list)) and
2597
                                 len(s) == 2 for s in statuses)
2598
                      for inst, nnames in instdisk.items()
2599
                      for nname, statuses in nnames.items())
2600
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2601

    
2602
    return instdisk
2603

    
2604
  @staticmethod
2605
  def _SshNodeSelector(group_uuid, all_nodes):
2606
    """Create endless iterators for all potential SSH check hosts.
2607

2608
    """
2609
    nodes = [node for node in all_nodes
2610
             if (node.group != group_uuid and
2611
                 not node.offline)]
2612
    keyfunc = operator.attrgetter("group")
2613

    
2614
    return map(itertools.cycle,
2615
               [sorted(map(operator.attrgetter("name"), names))
2616
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2617
                                                  keyfunc)])
2618

    
2619
  @classmethod
2620
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2621
    """Choose which nodes should talk to which other nodes.
2622

2623
    We will make nodes contact all nodes in their group, and one node from
2624
    every other group.
2625

2626
    @warning: This algorithm has a known issue if one node group is much
2627
      smaller than others (e.g. just one node). In such a case all other
2628
      nodes will talk to the single node.
2629

2630
    """
2631
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2632
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2633

    
2634
    return (online_nodes,
2635
            dict((name, sorted([i.next() for i in sel]))
2636
                 for name in online_nodes))
2637

    
2638
  def BuildHooksEnv(self):
2639
    """Build hooks env.
2640

2641
    Cluster-Verify hooks just ran in the post phase and their failure makes
2642
    the output be logged in the verify output and the verification to fail.
2643

2644
    """
2645
    env = {
2646
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2647
      }
2648

    
2649
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2650
               for node in self.my_node_info.values())
2651

    
2652
    return env
2653

    
2654
  def BuildHooksNodes(self):
2655
    """Build hooks nodes.
2656

2657
    """
2658
    return ([], self.my_node_names)
2659

    
2660
  def Exec(self, feedback_fn):
2661
    """Verify integrity of the node group, performing various test on nodes.
2662

2663
    """
2664
    # This method has too many local variables. pylint: disable=R0914
2665
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2666

    
2667
    if not self.my_node_names:
2668
      # empty node group
2669
      feedback_fn("* Empty node group, skipping verification")
2670
      return True
2671

    
2672
    self.bad = False
2673
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2674
    verbose = self.op.verbose
2675
    self._feedback_fn = feedback_fn
2676

    
2677
    vg_name = self.cfg.GetVGName()
2678
    drbd_helper = self.cfg.GetDRBDHelper()
2679
    cluster = self.cfg.GetClusterInfo()
2680
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2681
    hypervisors = cluster.enabled_hypervisors
2682
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2683

    
2684
    i_non_redundant = [] # Non redundant instances
2685
    i_non_a_balanced = [] # Non auto-balanced instances
2686
    i_offline = 0 # Count of offline instances
2687
    n_offline = 0 # Count of offline nodes
2688
    n_drained = 0 # Count of nodes being drained
2689
    node_vol_should = {}
2690

    
2691
    # FIXME: verify OS list
2692

    
2693
    # File verification
2694
    filemap = _ComputeAncillaryFiles(cluster, False)
2695

    
2696
    # do local checksums
2697
    master_node = self.master_node = self.cfg.GetMasterNode()
2698
    master_ip = self.cfg.GetMasterIP()
2699

    
2700
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2701

    
2702
    user_scripts = []
2703
    if self.cfg.GetUseExternalMipScript():
2704
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2705

    
2706
    node_verify_param = {
2707
      constants.NV_FILELIST:
2708
        utils.UniqueSequence(filename
2709
                             for files in filemap
2710
                             for filename in files),
2711
      constants.NV_NODELIST:
2712
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2713
                                  self.all_node_info.values()),
2714
      constants.NV_HYPERVISOR: hypervisors,
2715
      constants.NV_HVPARAMS:
2716
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2717
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2718
                                 for node in node_data_list
2719
                                 if not node.offline],
2720
      constants.NV_INSTANCELIST: hypervisors,
2721
      constants.NV_VERSION: None,
2722
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2723
      constants.NV_NODESETUP: None,
2724
      constants.NV_TIME: None,
2725
      constants.NV_MASTERIP: (master_node, master_ip),
2726
      constants.NV_OSLIST: None,
2727
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2728
      constants.NV_USERSCRIPTS: user_scripts,
2729
      }
2730

    
2731
    if vg_name is not None:
2732
      node_verify_param[constants.NV_VGLIST] = None
2733
      node_verify_param[constants.NV_LVLIST] = vg_name
2734
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2735
      node_verify_param[constants.NV_DRBDLIST] = None
2736

    
2737
    if drbd_helper:
2738
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2739

    
2740
    # bridge checks
2741
    # FIXME: this needs to be changed per node-group, not cluster-wide
2742
    bridges = set()
2743
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2744
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2745
      bridges.add(default_nicpp[constants.NIC_LINK])
2746
    for instance in self.my_inst_info.values():
2747
      for nic in instance.nics:
2748
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2749
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2750
          bridges.add(full_nic[constants.NIC_LINK])
2751

    
2752
    if bridges:
2753
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2754

    
2755
    # Build our expected cluster state
2756
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2757
                                                 name=node.name,
2758
                                                 vm_capable=node.vm_capable))
2759
                      for node in node_data_list)
2760

    
2761
    # Gather OOB paths
2762
    oob_paths = []
2763
    for node in self.all_node_info.values():
2764
      path = _SupportsOob(self.cfg, node)
2765
      if path and path not in oob_paths:
2766
        oob_paths.append(path)
2767

    
2768
    if oob_paths:
2769
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2770

    
2771
    for instance in self.my_inst_names:
2772
      inst_config = self.my_inst_info[instance]
2773

    
2774
      for nname in inst_config.all_nodes:
2775
        if nname not in node_image:
2776
          gnode = self.NodeImage(name=nname)
2777
          gnode.ghost = (nname not in self.all_node_info)
2778
          node_image[nname] = gnode
2779

    
2780
      inst_config.MapLVsByNode(node_vol_should)
2781

    
2782
      pnode = inst_config.primary_node
2783
      node_image[pnode].pinst.append(instance)
2784

    
2785
      for snode in inst_config.secondary_nodes:
2786
        nimg = node_image[snode]
2787
        nimg.sinst.append(instance)
2788
        if pnode not in nimg.sbp:
2789
          nimg.sbp[pnode] = []
2790
        nimg.sbp[pnode].append(instance)
2791

    
2792
    # At this point, we have the in-memory data structures complete,
2793
    # except for the runtime information, which we'll gather next
2794

    
2795
    # Due to the way our RPC system works, exact response times cannot be
2796
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2797
    # time before and after executing the request, we can at least have a time
2798
    # window.
2799
    nvinfo_starttime = time.time()
2800
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2801
                                           node_verify_param,
2802
                                           self.cfg.GetClusterName())
2803
    nvinfo_endtime = time.time()
2804

    
2805
    if self.extra_lv_nodes and vg_name is not None:
2806
      extra_lv_nvinfo = \
2807
          self.rpc.call_node_verify(self.extra_lv_nodes,
2808
                                    {constants.NV_LVLIST: vg_name},
2809
                                    self.cfg.GetClusterName())
2810
    else:
2811
      extra_lv_nvinfo = {}
2812

    
2813
    all_drbd_map = self.cfg.ComputeDRBDMap()
2814

    
2815
    feedback_fn("* Gathering disk information (%s nodes)" %
2816
                len(self.my_node_names))
2817
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2818
                                     self.my_inst_info)
2819

    
2820
    feedback_fn("* Verifying configuration file consistency")
2821

    
2822
    # If not all nodes are being checked, we need to make sure the master node
2823
    # and a non-checked vm_capable node are in the list.
2824
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2825
    if absent_nodes:
2826
      vf_nvinfo = all_nvinfo.copy()
2827
      vf_node_info = list(self.my_node_info.values())
2828
      additional_nodes = []
2829
      if master_node not in self.my_node_info:
2830
        additional_nodes.append(master_node)
2831
        vf_node_info.append(self.all_node_info[master_node])
2832
      # Add the first vm_capable node we find which is not included
2833
      for node in absent_nodes:
2834
        nodeinfo = self.all_node_info[node]
2835
        if nodeinfo.vm_capable and not nodeinfo.offline:
2836
          additional_nodes.append(node)
2837
          vf_node_info.append(self.all_node_info[node])
2838
          break
2839
      key = constants.NV_FILELIST
2840
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2841
                                                 {key: node_verify_param[key]},
2842
                                                 self.cfg.GetClusterName()))
2843
    else:
2844
      vf_nvinfo = all_nvinfo
2845
      vf_node_info = self.my_node_info.values()
2846

    
2847
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2848

    
2849
    feedback_fn("* Verifying node status")
2850

    
2851
    refos_img = None
2852

    
2853
    for node_i in node_data_list:
2854
      node = node_i.name
2855
      nimg = node_image[node]
2856

    
2857
      if node_i.offline:
2858
        if verbose:
2859
          feedback_fn("* Skipping offline node %s" % (node,))
2860
        n_offline += 1
2861
        continue
2862

    
2863
      if node == master_node:
2864
        ntype = "master"
2865
      elif node_i.master_candidate:
2866
        ntype = "master candidate"
2867
      elif node_i.drained:
2868
        ntype = "drained"
2869
        n_drained += 1
2870
      else:
2871
        ntype = "regular"
2872
      if verbose:
2873
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2874

    
2875
      msg = all_nvinfo[node].fail_msg
2876
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2877
               msg)
2878
      if msg:
2879
        nimg.rpc_fail = True
2880
        continue
2881

    
2882
      nresult = all_nvinfo[node].payload
2883

    
2884
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2885
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2886
      self._VerifyNodeNetwork(node_i, nresult)
2887
      self._VerifyNodeUserScripts(node_i, nresult)
2888
      self._VerifyOob(node_i, nresult)
2889

    
2890
      if nimg.vm_capable:
2891
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2892
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2893
                             all_drbd_map)
2894

    
2895
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2896
        self._UpdateNodeInstances(node_i, nresult, nimg)
2897
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2898
        self._UpdateNodeOS(node_i, nresult, nimg)
2899

    
2900
        if not nimg.os_fail:
2901
          if refos_img is None:
2902
            refos_img = nimg
2903
          self._VerifyNodeOS(node_i, nimg, refos_img)
2904
        self._VerifyNodeBridges(node_i, nresult, bridges)
2905

    
2906
        # Check whether all running instancies are primary for the node. (This
2907
        # can no longer be done from _VerifyInstance below, since some of the
2908
        # wrong instances could be from other node groups.)
2909
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2910

    
2911
        for inst in non_primary_inst:
2912
          # FIXME: investigate best way to handle offline insts
2913
          if inst.admin_state == constants.ADMINST_OFFLINE:
2914
            if verbose:
2915
              feedback_fn("* Skipping offline instance %s" % inst.name)
2916
            i_offline += 1
2917
            continue
2918
          test = inst in self.all_inst_info
2919
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2920
                   "instance should not run on node %s", node_i.name)
2921
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2922
                   "node is running unknown instance %s", inst)
2923

    
2924
    for node, result in extra_lv_nvinfo.items():
2925
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2926
                              node_image[node], vg_name)
2927

    
2928
    feedback_fn("* Verifying instance status")
2929
    for instance in self.my_inst_names:
2930
      if verbose:
2931
        feedback_fn("* Verifying instance %s" % instance)
2932
      inst_config = self.my_inst_info[instance]
2933
      self._VerifyInstance(instance, inst_config, node_image,
2934
                           instdisk[instance])
2935
      inst_nodes_offline = []
2936

    
2937
      pnode = inst_config.primary_node
2938
      pnode_img = node_image[pnode]
2939
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2940
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2941
               " primary node failed", instance)
2942

    
2943
      _ErrorIf(inst_config.admin_state == constants.ADMINST_UP and
2944
               pnode_img.offline,
2945
               constants.CV_EINSTANCEBADNODE, instance,
2946
               "instance is marked as running and lives on offline node %s",
2947
               inst_config.primary_node)
2948

    
2949
      # If the instance is non-redundant we cannot survive losing its primary
2950
      # node, so we are not N+1 compliant. On the other hand we have no disk
2951
      # templates with more than one secondary so that situation is not well
2952
      # supported either.
2953
      # FIXME: does not support file-backed instances
2954
      if not inst_config.secondary_nodes:
2955
        i_non_redundant.append(instance)
2956

    
2957
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2958
               constants.CV_EINSTANCELAYOUT,
2959
               instance, "instance has multiple secondary nodes: %s",
2960
               utils.CommaJoin(inst_config.secondary_nodes),
2961
               code=self.ETYPE_WARNING)
2962

    
2963
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2964
        pnode = inst_config.primary_node
2965
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2966
        instance_groups = {}
2967

    
2968
        for node in instance_nodes:
2969
          instance_groups.setdefault(self.all_node_info[node].group,
2970
                                     []).append(node)
2971

    
2972
        pretty_list = [
2973
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2974
          # Sort so that we always list the primary node first.
2975
          for group, nodes in sorted(instance_groups.items(),
2976
                                     key=lambda (_, nodes): pnode in nodes,
2977
                                     reverse=True)]
2978

    
2979
        self._ErrorIf(len(instance_groups) > 1,
2980
                      constants.CV_EINSTANCESPLITGROUPS,
2981
                      instance, "instance has primary and secondary nodes in"
2982
                      " different groups: %s", utils.CommaJoin(pretty_list),
2983
                      code=self.ETYPE_WARNING)
2984

    
2985
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2986
        i_non_a_balanced.append(instance)
2987

    
2988
      for snode in inst_config.secondary_nodes:
2989
        s_img = node_image[snode]
2990
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2991
                 snode, "instance %s, connection to secondary node failed",
2992
                 instance)
2993

    
2994
        if s_img.offline:
2995
          inst_nodes_offline.append(snode)
2996

    
2997
      # warn that the instance lives on offline nodes
2998
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2999
               "instance has offline secondary node(s) %s",
3000
               utils.CommaJoin(inst_nodes_offline))
3001
      # ... or ghost/non-vm_capable nodes
3002
      for node in inst_config.all_nodes:
3003
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
3004
                 instance, "instance lives on ghost node %s", node)
3005
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
3006
                 instance, "instance lives on non-vm_capable node %s", node)
3007

    
3008
    feedback_fn("* Verifying orphan volumes")
3009
    reserved = utils.FieldSet(*cluster.reserved_lvs)
3010

    
3011
    # We will get spurious "unknown volume" warnings if any node of this group
3012
    # is secondary for an instance whose primary is in another group. To avoid
3013
    # them, we find these instances and add their volumes to node_vol_should.
3014
    for inst in self.all_inst_info.values():
3015
      for secondary in inst.secondary_nodes:
3016
        if (secondary in self.my_node_info
3017
            and inst.name not in self.my_inst_info):
3018
          inst.MapLVsByNode(node_vol_should)
3019
          break
3020

    
3021
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
3022

    
3023
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
3024
      feedback_fn("* Verifying N+1 Memory redundancy")
3025
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
3026

    
3027
    feedback_fn("* Other Notes")
3028
    if i_non_redundant:
3029
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
3030
                  % len(i_non_redundant))
3031

    
3032
    if i_non_a_balanced:
3033
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
3034
                  % len(i_non_a_balanced))
3035

    
3036
    if i_offline:
3037
      feedback_fn("  - NOTICE: %d offline instance(s) found." % i_offline)
3038

    
3039
    if n_offline:
3040
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3041

    
3042
    if n_drained:
3043
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3044

    
3045
    return not self.bad
3046

    
3047
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3048
    """Analyze the post-hooks' result
3049

3050
    This method analyses the hook result, handles it, and sends some
3051
    nicely-formatted feedback back to the user.
3052

3053
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3054
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3055
    @param hooks_results: the results of the multi-node hooks rpc call
3056
    @param feedback_fn: function used send feedback back to the caller
3057
    @param lu_result: previous Exec result
3058
    @return: the new Exec result, based on the previous result
3059
        and hook results
3060

3061
    """
3062
    # We only really run POST phase hooks, only for non-empty groups,
3063
    # and are only interested in their results
3064
    if not self.my_node_names:
3065
      # empty node group
3066
      pass
3067
    elif phase == constants.HOOKS_PHASE_POST:
3068
      # Used to change hooks' output to proper indentation
3069
      feedback_fn("* Hooks Results")
3070
      assert hooks_results, "invalid result from hooks"
3071

    
3072
      for node_name in hooks_results:
3073
        res = hooks_results[node_name]
3074
        msg = res.fail_msg
3075
        test = msg and not res.offline
3076
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3077
                      "Communication failure in hooks execution: %s", msg)
3078
        if res.offline or msg:
3079
          # No need to investigate payload if node is offline or gave
3080
          # an error.
3081
          continue
3082
        for script, hkr, output in res.payload:
3083
          test = hkr == constants.HKR_FAIL
3084
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3085
                        "Script %s failed, output:", script)
3086
          if test:
3087
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3088
            feedback_fn("%s" % output)
3089
            lu_result = False
3090

    
3091
    return lu_result
3092

    
3093

    
3094
class LUClusterVerifyDisks(NoHooksLU):
3095
  """Verifies the cluster disks status.
3096

3097
  """
3098
  REQ_BGL = False
3099

    
3100
  def ExpandNames(self):
3101
    self.share_locks = _ShareAll()
3102
    self.needed_locks = {
3103
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3104
      }
3105

    
3106
  def Exec(self, feedback_fn):
3107
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3108

    
3109
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3110
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3111
                           for group in group_names])
3112

    
3113

    
3114
class LUGroupVerifyDisks(NoHooksLU):
3115
  """Verifies the status of all disks in a node group.
3116

3117
  """
3118
  REQ_BGL = False
3119

    
3120
  def ExpandNames(self):
3121
    # Raises errors.OpPrereqError on its own if group can't be found
3122
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3123

    
3124
    self.share_locks = _ShareAll()
3125
    self.needed_locks = {
3126
      locking.LEVEL_INSTANCE: [],
3127
      locking.LEVEL_NODEGROUP: [],
3128
      locking.LEVEL_NODE: [],
3129
      }
3130

    
3131
  def DeclareLocks(self, level):
3132
    if level == locking.LEVEL_INSTANCE:
3133
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3134

    
3135
      # Lock instances optimistically, needs verification once node and group
3136
      # locks have been acquired
3137
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3138
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3139

    
3140
    elif level == locking.LEVEL_NODEGROUP:
3141
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3142

    
3143
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3144
        set([self.group_uuid] +
3145
            # Lock all groups used by instances optimistically; this requires
3146
            # going via the node before it's locked, requiring verification
3147
            # later on
3148
            [group_uuid
3149
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3150
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3151

    
3152
    elif level == locking.LEVEL_NODE:
3153
      # This will only lock the nodes in the group to be verified which contain
3154
      # actual instances
3155
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3156
      self._LockInstancesNodes()
3157

    
3158
      # Lock all nodes in group to be verified
3159
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3160
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3161
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3162

    
3163
  def CheckPrereq(self):
3164
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3165
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3166
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3167

    
3168
    assert self.group_uuid in owned_groups
3169

    
3170
    # Check if locked instances are still correct
3171
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3172

    
3173
    # Get instance information
3174
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3175

    
3176
    # Check if node groups for locked instances are still correct
3177
    for (instance_name, inst) in self.instances.items():
3178
      assert owned_nodes.issuperset(inst.all_nodes), \
3179
        "Instance %s's nodes changed while we kept the lock" % instance_name
3180

    
3181
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3182
                                             owned_groups)
3183

    
3184
      assert self.group_uuid in inst_groups, \
3185
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3186

    
3187
  def Exec(self, feedback_fn):
3188
    """Verify integrity of cluster disks.
3189

3190
    @rtype: tuple of three items
3191
    @return: a tuple of (dict of node-to-node_error, list of instances
3192
        which need activate-disks, dict of instance: (node, volume) for
3193
        missing volumes
3194

3195
    """
3196
    res_nodes = {}
3197
    res_instances = set()
3198
    res_missing = {}
3199

    
3200
    nv_dict = _MapInstanceDisksToNodes([inst
3201
            for inst in self.instances.values()
3202
            if inst.admin_state == constants.ADMINST_UP])
3203

    
3204
    if nv_dict:
3205
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3206
                             set(self.cfg.GetVmCapableNodeList()))
3207

    
3208
      node_lvs = self.rpc.call_lv_list(nodes, [])
3209

    
3210
      for (node, node_res) in node_lvs.items():
3211
        if node_res.offline:
3212
          continue
3213

    
3214
        msg = node_res.fail_msg
3215
        if msg:
3216
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3217
          res_nodes[node] = msg
3218
          continue
3219

    
3220
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3221
          inst = nv_dict.pop((node, lv_name), None)
3222
          if not (lv_online or inst is None):
3223
            res_instances.add(inst)
3224

    
3225
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3226
      # better
3227
      for key, inst in nv_dict.iteritems():
3228
        res_missing.setdefault(inst, []).append(list(key))
3229

    
3230
    return (res_nodes, list(res_instances), res_missing)
3231

    
3232

    
3233
class LUClusterRepairDiskSizes(NoHooksLU):
3234
  """Verifies the cluster disks sizes.
3235

3236
  """
3237
  REQ_BGL = False
3238

    
3239
  def ExpandNames(self):
3240
    if self.op.instances:
3241
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3242
      self.needed_locks = {
3243
        locking.LEVEL_NODE_RES: [],
3244
        locking.LEVEL_INSTANCE: self.wanted_names,
3245
        }
3246
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3247
    else:
3248
      self.wanted_names = None
3249
      self.needed_locks = {
3250
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3251
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3252
        }
3253
    self.share_locks = {
3254
      locking.LEVEL_NODE_RES: 1,
3255
      locking.LEVEL_INSTANCE: 0,
3256
      }
3257

    
3258
  def DeclareLocks(self, level):
3259
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3260
      self._LockInstancesNodes(primary_only=True, level=level)
3261

    
3262
  def CheckPrereq(self):
3263
    """Check prerequisites.
3264

3265
    This only checks the optional instance list against the existing names.
3266

3267
    """
3268
    if self.wanted_names is None:
3269
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3270

    
3271
    self.wanted_instances = \
3272
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3273

    
3274
  def _EnsureChildSizes(self, disk):
3275
    """Ensure children of the disk have the needed disk size.
3276

3277
    This is valid mainly for DRBD8 and fixes an issue where the
3278
    children have smaller disk size.
3279

3280
    @param disk: an L{ganeti.objects.Disk} object
3281

3282
    """
3283
    if disk.dev_type == constants.LD_DRBD8:
3284
      assert disk.children, "Empty children for DRBD8?"
3285
      fchild = disk.children[0]
3286
      mismatch = fchild.size < disk.size
3287
      if mismatch:
3288
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3289
                     fchild.size, disk.size)
3290
        fchild.size = disk.size
3291

    
3292
      # and we recurse on this child only, not on the metadev
3293
      return self._EnsureChildSizes(fchild) or mismatch
3294
    else:
3295
      return False
3296

    
3297
  def Exec(self, feedback_fn):
3298
    """Verify the size of cluster disks.
3299

3300
    """
3301
    # TODO: check child disks too
3302
    # TODO: check differences in size between primary/secondary nodes
3303
    per_node_disks = {}
3304
    for instance in self.wanted_instances:
3305
      pnode = instance.primary_node
3306
      if pnode not in per_node_disks:
3307
        per_node_disks[pnode] = []
3308
      for idx, disk in enumerate(instance.disks):
3309
        per_node_disks[pnode].append((instance, idx, disk))
3310

    
3311
    assert not (frozenset(per_node_disks.keys()) -
3312
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3313
      "Not owning correct locks"
3314
    assert not self.owned_locks(locking.LEVEL_NODE)
3315

    
3316
    changed = []
3317
    for node, dskl in per_node_disks.items():
3318
      newl = [v[2].Copy() for v in dskl]
3319
      for dsk in newl:
3320
        self.cfg.SetDiskID(dsk, node)
3321
      result = self.rpc.call_blockdev_getsize(node, newl)
3322
      if result.fail_msg:
3323
        self.LogWarning("Failure in blockdev_getsize call to node"
3324
                        " %s, ignoring", node)
3325
        continue
3326
      if len(result.payload) != len(dskl):
3327
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3328
                        " result.payload=%s", node, len(dskl), result.payload)
3329
        self.LogWarning("Invalid result from node %s, ignoring node results",
3330
                        node)
3331
        continue
3332
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3333
        if size is None:
3334
          self.LogWarning("Disk %d of instance %s did not return size"
3335
                          " information, ignoring", idx, instance.name)
3336
          continue
3337
        if not isinstance(size, (int, long)):
3338
          self.LogWarning("Disk %d of instance %s did not return valid"
3339
                          " size information, ignoring", idx, instance.name)
3340
          continue
3341
        size = size >> 20
3342
        if size != disk.size:
3343
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3344
                       " correcting: recorded %d, actual %d", idx,
3345
                       instance.name, disk.size, size)
3346
          disk.size = size
3347
          self.cfg.Update(instance, feedback_fn)
3348
          changed.append((instance.name, idx, size))
3349
        if self._EnsureChildSizes(disk):
3350
          self.cfg.Update(instance, feedback_fn)
3351
          changed.append((instance.name, idx, disk.size))
3352
    return changed
3353

    
3354

    
3355
class LUClusterRename(LogicalUnit):
3356
  """Rename the cluster.
3357

3358
  """
3359
  HPATH = "cluster-rename"
3360
  HTYPE = constants.HTYPE_CLUSTER
3361

    
3362
  def BuildHooksEnv(self):
3363
    """Build hooks env.
3364

3365
    """
3366
    return {
3367
      "OP_TARGET": self.cfg.GetClusterName(),
3368
      "NEW_NAME": self.op.name,
3369
      }
3370

    
3371
  def BuildHooksNodes(self):
3372
    """Build hooks nodes.
3373

3374
    """
3375
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3376

    
3377
  def CheckPrereq(self):
3378
    """Verify that the passed name is a valid one.
3379

3380
    """
3381
    hostname = netutils.GetHostname(name=self.op.name,
3382
                                    family=self.cfg.GetPrimaryIPFamily())
3383

    
3384
    new_name = hostname.name
3385
    self.ip = new_ip = hostname.ip
3386
    old_name = self.cfg.GetClusterName()
3387
    old_ip = self.cfg.GetMasterIP()
3388
    if new_name == old_name and new_ip == old_ip:
3389
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3390
                                 " cluster has changed",
3391
                                 errors.ECODE_INVAL)
3392
    if new_ip != old_ip:
3393
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3394
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3395
                                   " reachable on the network" %
3396
                                   new_ip, errors.ECODE_NOTUNIQUE)
3397

    
3398
    self.op.name = new_name
3399

    
3400
  def Exec(self, feedback_fn):
3401
    """Rename the cluster.
3402

3403
    """
3404
    clustername = self.op.name
3405
    new_ip = self.ip
3406

    
3407
    # shutdown the master IP
3408
    master_params = self.cfg.GetMasterNetworkParameters()
3409
    ems = self.cfg.GetUseExternalMipScript()
3410
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3411
                                                     master_params, ems)
3412
    result.Raise("Could not disable the master role")
3413

    
3414
    try:
3415
      cluster = self.cfg.GetClusterInfo()
3416
      cluster.cluster_name = clustername
3417
      cluster.master_ip = new_ip
3418
      self.cfg.Update(cluster, feedback_fn)
3419

    
3420
      # update the known hosts file
3421
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3422
      node_list = self.cfg.GetOnlineNodeList()
3423
      try:
3424
        node_list.remove(master_params.name)
3425
      except ValueError:
3426
        pass
3427
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3428
    finally:
3429
      master_params.ip = new_ip
3430
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3431
                                                     master_params, ems)
3432
      msg = result.fail_msg
3433
      if msg:
3434
        self.LogWarning("Could not re-enable the master role on"
3435
                        " the master, please restart manually: %s", msg)
3436

    
3437
    return clustername
3438

    
3439

    
3440
def _ValidateNetmask(cfg, netmask):
3441
  """Checks if a netmask is valid.
3442

3443
  @type cfg: L{config.ConfigWriter}
3444
  @param cfg: The cluster configuration
3445
  @type netmask: int
3446
  @param netmask: the netmask to be verified
3447
  @raise errors.OpPrereqError: if the validation fails
3448

3449
  """
3450
  ip_family = cfg.GetPrimaryIPFamily()
3451
  try:
3452
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3453
  except errors.ProgrammerError:
3454
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3455
                               ip_family)
3456
  if not ipcls.ValidateNetmask(netmask):
3457
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3458
                                (netmask))
3459

    
3460

    
3461
class LUClusterSetParams(LogicalUnit):
3462
  """Change the parameters of the cluster.
3463

3464
  """
3465
  HPATH = "cluster-modify"
3466
  HTYPE = constants.HTYPE_CLUSTER
3467
  REQ_BGL = False
3468

    
3469
  def CheckArguments(self):
3470
    """Check parameters
3471

3472
    """
3473
    if self.op.uid_pool:
3474
      uidpool.CheckUidPool(self.op.uid_pool)
3475

    
3476
    if self.op.add_uids:
3477
      uidpool.CheckUidPool(self.op.add_uids)
3478

    
3479
    if self.op.remove_uids:
3480
      uidpool.CheckUidPool(self.op.remove_uids)
3481

    
3482
    if self.op.master_netmask is not None:
3483
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3484

    
3485
  def ExpandNames(self):
3486
    # FIXME: in the future maybe other cluster params won't require checking on
3487
    # all nodes to be modified.
3488
    self.needed_locks = {
3489
      locking.LEVEL_NODE: locking.ALL_SET,
3490
    }
3491
    self.share_locks[locking.LEVEL_NODE] = 1
3492

    
3493
  def BuildHooksEnv(self):
3494
    """Build hooks env.
3495

3496
    """
3497
    return {
3498
      "OP_TARGET": self.cfg.GetClusterName(),
3499
      "NEW_VG_NAME": self.op.vg_name,
3500
      }
3501

    
3502
  def BuildHooksNodes(self):
3503
    """Build hooks nodes.
3504

3505
    """
3506
    mn = self.cfg.GetMasterNode()
3507
    return ([mn], [mn])
3508

    
3509
  def CheckPrereq(self):
3510
    """Check prerequisites.
3511

3512
    This checks whether the given params don't conflict and
3513
    if the given volume group is valid.
3514

3515
    """
3516
    if self.op.vg_name is not None and not self.op.vg_name:
3517
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3518
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3519
                                   " instances exist", errors.ECODE_INVAL)
3520

    
3521
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3522
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3523
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3524
                                   " drbd-based instances exist",
3525
                                   errors.ECODE_INVAL)
3526

    
3527
    node_list = self.owned_locks(locking.LEVEL_NODE)
3528

    
3529
    # if vg_name not None, checks given volume group on all nodes
3530
    if self.op.vg_name:
3531
      vglist = self.rpc.call_vg_list(node_list)
3532
      for node in node_list:
3533
        msg = vglist[node].fail_msg
3534
        if msg:
3535
          # ignoring down node
3536
          self.LogWarning("Error while gathering data on node %s"
3537
                          " (ignoring node): %s", node, msg)
3538
          continue
3539
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3540
                                              self.op.vg_name,
3541
                                              constants.MIN_VG_SIZE)
3542
        if vgstatus:
3543
          raise errors.OpPrereqError("Error on node '%s': %s" %
3544
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3545

    
3546
    if self.op.drbd_helper:
3547
      # checks given drbd helper on all nodes
3548
      helpers = self.rpc.call_drbd_helper(node_list)
3549
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3550
        if ninfo.offline:
3551
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3552
          continue
3553
        msg = helpers[node].fail_msg
3554
        if msg:
3555
          raise errors.OpPrereqError("Error checking drbd helper on node"
3556
                                     " '%s': %s" % (node, msg),
3557
                                     errors.ECODE_ENVIRON)
3558
        node_helper = helpers[node].payload
3559
        if node_helper != self.op.drbd_helper:
3560
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3561
                                     (node, node_helper), errors.ECODE_ENVIRON)
3562

    
3563
    self.cluster = cluster = self.cfg.GetClusterInfo()
3564
    # validate params changes
3565
    if self.op.beparams:
3566
      objects.UpgradeBeParams(self.op.beparams)
3567
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3568
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3569

    
3570
    if self.op.ndparams:
3571
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3572
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3573

    
3574
      # TODO: we need a more general way to handle resetting
3575
      # cluster-level parameters to default values
3576
      if self.new_ndparams["oob_program"] == "":
3577
        self.new_ndparams["oob_program"] = \
3578
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3579

    
3580
    if self.op.nicparams:
3581
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3582
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3583
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3584
      nic_errors = []
3585

    
3586
      # check all instances for consistency
3587
      for instance in self.cfg.GetAllInstancesInfo().values():
3588
        for nic_idx, nic in enumerate(instance.nics):
3589
          params_copy = copy.deepcopy(nic.nicparams)
3590
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3591

    
3592
          # check parameter syntax
3593
          try:
3594
            objects.NIC.CheckParameterSyntax(params_filled)
3595
          except errors.ConfigurationError, err:
3596
            nic_errors.append("Instance %s, nic/%d: %s" %
3597
                              (instance.name, nic_idx, err))
3598

    
3599
          # if we're moving instances to routed, check that they have an ip
3600
          target_mode = params_filled[constants.NIC_MODE]
3601
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3602
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3603
                              " address" % (instance.name, nic_idx))
3604
      if nic_errors:
3605
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3606
                                   "\n".join(nic_errors))
3607

    
3608
    # hypervisor list/parameters
3609
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3610
    if self.op.hvparams:
3611
      for hv_name, hv_dict in self.op.hvparams.items():
3612
        if hv_name not in self.new_hvparams:
3613
          self.new_hvparams[hv_name] = hv_dict
3614
        else:
3615
          self.new_hvparams[hv_name].update(hv_dict)
3616

    
3617
    # os hypervisor parameters
3618
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3619
    if self.op.os_hvp:
3620
      for os_name, hvs in self.op.os_hvp.items():
3621
        if os_name not in self.new_os_hvp:
3622
          self.new_os_hvp[os_name] = hvs
3623
        else:
3624
          for hv_name, hv_dict in hvs.items():
3625
            if hv_name not in self.new_os_hvp[os_name]:
3626
              self.new_os_hvp[os_name][hv_name] = hv_dict
3627
            else:
3628
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3629

    
3630
    # os parameters
3631
    self.new_osp = objects.FillDict(cluster.osparams, {})
3632
    if self.op.osparams:
3633
      for os_name, osp in self.op.osparams.items():
3634
        if os_name not in self.new_osp:
3635
          self.new_osp[os_name] = {}
3636

    
3637
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3638
                                                  use_none=True)
3639

    
3640
        if not self.new_osp[os_name]:
3641
          # we removed all parameters
3642
          del self.new_osp[os_name]
3643
        else:
3644
          # check the parameter validity (remote check)
3645
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3646
                         os_name, self.new_osp[os_name])
3647

    
3648
    # changes to the hypervisor list
3649
    if self.op.enabled_hypervisors is not None:
3650
      self.hv_list = self.op.enabled_hypervisors
3651
      for hv in self.hv_list:
3652
        # if the hypervisor doesn't already exist in the cluster
3653
        # hvparams, we initialize it to empty, and then (in both
3654
        # cases) we make sure to fill the defaults, as we might not
3655
        # have a complete defaults list if the hypervisor wasn't
3656
        # enabled before
3657
        if hv not in new_hvp:
3658
          new_hvp[hv] = {}
3659
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3660
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3661
    else:
3662
      self.hv_list = cluster.enabled_hypervisors
3663

    
3664
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3665
      # either the enabled list has changed, or the parameters have, validate
3666
      for hv_name, hv_params in self.new_hvparams.items():
3667
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3668
            (self.op.enabled_hypervisors and
3669
             hv_name in self.op.enabled_hypervisors)):
3670
          # either this is a new hypervisor, or its parameters have changed
3671
          hv_class = hypervisor.GetHypervisor(hv_name)
3672
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3673
          hv_class.CheckParameterSyntax(hv_params)
3674
          _CheckHVParams(self, node_list, hv_name, hv_params)
3675

    
3676
    if self.op.os_hvp:
3677
      # no need to check any newly-enabled hypervisors, since the
3678
      # defaults have already been checked in the above code-block
3679
      for os_name, os_hvp in self.new_os_hvp.items():
3680
        for hv_name, hv_params in os_hvp.items():
3681
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3682
          # we need to fill in the new os_hvp on top of the actual hv_p
3683
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3684
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3685
          hv_class = hypervisor.GetHypervisor(hv_name)
3686
          hv_class.CheckParameterSyntax(new_osp)
3687
          _CheckHVParams(self, node_list, hv_name, new_osp)
3688

    
3689
    if self.op.default_iallocator:
3690
      alloc_script = utils.FindFile(self.op.default_iallocator,
3691
                                    constants.IALLOCATOR_SEARCH_PATH,
3692
                                    os.path.isfile)
3693
      if alloc_script is None:
3694
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3695
                                   " specified" % self.op.default_iallocator,
3696
                                   errors.ECODE_INVAL)
3697

    
3698
  def Exec(self, feedback_fn):
3699
    """Change the parameters of the cluster.
3700

3701
    """
3702
    if self.op.vg_name is not None:
3703
      new_volume = self.op.vg_name
3704
      if not new_volume:
3705
        new_volume = None
3706
      if new_volume != self.cfg.GetVGName():
3707
        self.cfg.SetVGName(new_volume)
3708
      else:
3709
        feedback_fn("Cluster LVM configuration already in desired"
3710
                    " state, not changing")
3711
    if self.op.drbd_helper is not None:
3712
      new_helper = self.op.drbd_helper
3713
      if not new_helper:
3714
        new_helper = None
3715
      if new_helper != self.cfg.GetDRBDHelper():
3716
        self.cfg.SetDRBDHelper(new_helper)
3717
      else:
3718
        feedback_fn("Cluster DRBD helper already in desired state,"
3719
                    " not changing")
3720
    if self.op.hvparams:
3721
      self.cluster.hvparams = self.new_hvparams
3722
    if self.op.os_hvp:
3723
      self.cluster.os_hvp = self.new_os_hvp
3724
    if self.op.enabled_hypervisors is not None:
3725
      self.cluster.hvparams = self.new_hvparams
3726
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3727
    if self.op.beparams:
3728
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3729
    if self.op.nicparams:
3730
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3731
    if self.op.osparams:
3732
      self.cluster.osparams = self.new_osp
3733
    if self.op.ndparams:
3734
      self.cluster.ndparams = self.new_ndparams
3735

    
3736
    if self.op.candidate_pool_size is not None:
3737
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3738
      # we need to update the pool size here, otherwise the save will fail
3739
      _AdjustCandidatePool(self, [])
3740

    
3741
    if self.op.maintain_node_health is not None:
3742
      if self.op.maintain_node_health and not constants.ENABLE_CONFD:
3743
        feedback_fn("Note: CONFD was disabled at build time, node health"
3744
                    " maintenance is not useful (still enabling it)")
3745
      self.cluster.maintain_node_health = self.op.maintain_node_health
3746

    
3747
    if self.op.prealloc_wipe_disks is not None:
3748
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3749

    
3750
    if self.op.add_uids is not None:
3751
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3752

    
3753
    if self.op.remove_uids is not None:
3754
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3755

    
3756
    if self.op.uid_pool is not None:
3757
      self.cluster.uid_pool = self.op.uid_pool
3758

    
3759
    if self.op.default_iallocator is not None:
3760
      self.cluster.default_iallocator = self.op.default_iallocator
3761

    
3762
    if self.op.reserved_lvs is not None:
3763
      self.cluster.reserved_lvs = self.op.reserved_lvs
3764

    
3765
    if self.op.use_external_mip_script is not None:
3766
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3767

    
3768
    def helper_os(aname, mods, desc):
3769
      desc += " OS list"
3770
      lst = getattr(self.cluster, aname)
3771
      for key, val in mods:
3772
        if key == constants.DDM_ADD:
3773
          if val in lst:
3774
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3775
          else:
3776
            lst.append(val)
3777
        elif key == constants.DDM_REMOVE:
3778
          if val in lst:
3779
            lst.remove(val)
3780
          else:
3781
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3782
        else:
3783
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3784

    
3785
    if self.op.hidden_os:
3786
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3787

    
3788
    if self.op.blacklisted_os:
3789
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3790

    
3791
    if self.op.master_netdev:
3792
      master_params = self.cfg.GetMasterNetworkParameters()
3793
      ems = self.cfg.GetUseExternalMipScript()
3794
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3795
                  self.cluster.master_netdev)
3796
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3797
                                                       master_params, ems)
3798
      result.Raise("Could not disable the master ip")
3799
      feedback_fn("Changing master_netdev from %s to %s" %
3800
                  (master_params.netdev, self.op.master_netdev))
3801
      self.cluster.master_netdev = self.op.master_netdev
3802

    
3803
    if self.op.master_netmask:
3804
      master_params = self.cfg.GetMasterNetworkParameters()
3805
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3806
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3807
                                                        master_params.netmask,
3808
                                                        self.op.master_netmask,
3809
                                                        master_params.ip,
3810
                                                        master_params.netdev)
3811
      if result.fail_msg:
3812
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3813
        feedback_fn(msg)
3814

    
3815
      self.cluster.master_netmask = self.op.master_netmask
3816

    
3817
    self.cfg.Update(self.cluster, feedback_fn)
3818

    
3819
    if self.op.master_netdev:
3820
      master_params = self.cfg.GetMasterNetworkParameters()
3821
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3822
                  self.op.master_netdev)
3823
      ems = self.cfg.GetUseExternalMipScript()
3824
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3825
                                                     master_params, ems)
3826
      if result.fail_msg:
3827
        self.LogWarning("Could not re-enable the master ip on"
3828
                        " the master, please restart manually: %s",
3829
                        result.fail_msg)
3830

    
3831

    
3832
def _UploadHelper(lu, nodes, fname):
3833
  """Helper for uploading a file and showing warnings.
3834

3835
  """
3836
  if os.path.exists(fname):
3837
    result = lu.rpc.call_upload_file(nodes, fname)
3838
    for to_node, to_result in result.items():
3839
      msg = to_result.fail_msg
3840
      if msg:
3841
        msg = ("Copy of file %s to node %s failed: %s" %
3842
               (fname, to_node, msg))
3843
        lu.proc.LogWarning(msg)
3844

    
3845

    
3846
def _ComputeAncillaryFiles(cluster, redist):
3847
  """Compute files external to Ganeti which need to be consistent.
3848

3849
  @type redist: boolean
3850
  @param redist: Whether to include files which need to be redistributed
3851

3852
  """
3853
  # Compute files for all nodes
3854
  files_all = set([
3855
    constants.SSH_KNOWN_HOSTS_FILE,
3856
    constants.CONFD_HMAC_KEY,
3857
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3858
    constants.SPICE_CERT_FILE,
3859
    constants.SPICE_CACERT_FILE,
3860
    constants.RAPI_USERS_FILE,
3861
    ])
3862

    
3863
  if not redist:
3864
    files_all.update(constants.ALL_CERT_FILES)
3865
    files_all.update(ssconf.SimpleStore().GetFileList())
3866
  else:
3867
    # we need to ship at least the RAPI certificate
3868
    files_all.add(constants.RAPI_CERT_FILE)
3869

    
3870
  if cluster.modify_etc_hosts:
3871
    files_all.add(constants.ETC_HOSTS)
3872

    
3873
  # Files which are optional, these must:
3874
  # - be present in one other category as well
3875
  # - either exist or not exist on all nodes of that category (mc, vm all)
3876
  files_opt = set([
3877
    constants.RAPI_USERS_FILE,
3878
    ])
3879

    
3880
  # Files which should only be on master candidates
3881
  files_mc = set()
3882

    
3883
  if not redist:
3884
    files_mc.add(constants.CLUSTER_CONF_FILE)
3885

    
3886
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3887
    # replication
3888
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3889

    
3890
  # Files which should only be on VM-capable nodes
3891
  files_vm = set(filename
3892
    for hv_name in cluster.enabled_hypervisors
3893
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3894

    
3895
  files_opt |= set(filename
3896
    for hv_name in cluster.enabled_hypervisors
3897
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3898

    
3899
  # Filenames in each category must be unique
3900
  all_files_set = files_all | files_mc | files_vm
3901
  assert (len(all_files_set) ==
3902
          sum(map(len, [files_all, files_mc, files_vm]))), \
3903
         "Found file listed in more than one file list"
3904

    
3905
  # Optional files must be present in one other category
3906
  assert all_files_set.issuperset(files_opt), \
3907
         "Optional file not in a different required list"
3908

    
3909
  return (files_all, files_opt, files_mc, files_vm)
3910

    
3911

    
3912
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3913
  """Distribute additional files which are part of the cluster configuration.
3914

3915
  ConfigWriter takes care of distributing the config and ssconf files, but
3916
  there are more files which should be distributed to all nodes. This function
3917
  makes sure those are copied.
3918

3919
  @param lu: calling logical unit
3920
  @param additional_nodes: list of nodes not in the config to distribute to
3921
  @type additional_vm: boolean
3922
  @param additional_vm: whether the additional nodes are vm-capable or not
3923

3924
  """
3925
  # Gather target nodes
3926
  cluster = lu.cfg.GetClusterInfo()
3927
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3928

    
3929
  online_nodes = lu.cfg.GetOnlineNodeList()
3930
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3931

    
3932
  if additional_nodes is not None:
3933
    online_nodes.extend(additional_nodes)
3934
    if additional_vm:
3935
      vm_nodes.extend(additional_nodes)
3936

    
3937
  # Never distribute to master node
3938
  for nodelist in [online_nodes, vm_nodes]:
3939
    if master_info.name in nodelist:
3940
      nodelist.remove(master_info.name)
3941

    
3942
  # Gather file lists
3943
  (files_all, _, files_mc, files_vm) = \
3944
    _ComputeAncillaryFiles(cluster, True)
3945

    
3946
  # Never re-distribute configuration file from here
3947
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3948
              constants.CLUSTER_CONF_FILE in files_vm)
3949
  assert not files_mc, "Master candidates not handled in this function"
3950

    
3951
  filemap = [
3952
    (online_nodes, files_all),
3953
    (vm_nodes, files_vm),
3954
    ]
3955

    
3956
  # Upload the files
3957
  for (node_list, files) in filemap:
3958
    for fname in files:
3959
      _UploadHelper(lu, node_list, fname)
3960

    
3961

    
3962
class LUClusterRedistConf(NoHooksLU):
3963
  """Force the redistribution of cluster configuration.
3964

3965
  This is a very simple LU.
3966

3967
  """
3968
  REQ_BGL = False
3969

    
3970
  def ExpandNames(self):
3971
    self.needed_locks = {
3972
      locking.LEVEL_NODE: locking.ALL_SET,
3973
    }
3974
    self.share_locks[locking.LEVEL_NODE] = 1
3975

    
3976
  def Exec(self, feedback_fn):
3977
    """Redistribute the configuration.
3978

3979
    """
3980
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3981
    _RedistributeAncillaryFiles(self)
3982

    
3983

    
3984
class LUClusterActivateMasterIp(NoHooksLU):
3985
  """Activate the master IP on the master node.
3986

3987
  """
3988
  def Exec(self, feedback_fn):
3989
    """Activate the master IP.
3990

3991
    """
3992
    master_params = self.cfg.GetMasterNetworkParameters()
3993
    ems = self.cfg.GetUseExternalMipScript()
3994
    result = self.rpc.call_node_activate_master_ip(master_params.name,
3995
                                                   master_params, ems)
3996
    result.Raise("Could not activate the master IP")
3997

    
3998

    
3999
class LUClusterDeactivateMasterIp(NoHooksLU):
4000
  """Deactivate the master IP on the master node.
4001

4002
  """
4003
  def Exec(self, feedback_fn):
4004
    """Deactivate the master IP.
4005

4006
    """
4007
    master_params = self.cfg.GetMasterNetworkParameters()
4008
    ems = self.cfg.GetUseExternalMipScript()
4009
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
4010
                                                     master_params, ems)
4011
    result.Raise("Could not deactivate the master IP")
4012

    
4013

    
4014
def _WaitForSync(lu, instance, disks=None, oneshot=False):
4015
  """Sleep and poll for an instance's disk to sync.
4016

4017
  """
4018
  if not instance.disks or disks is not None and not disks:
4019
    return True
4020

    
4021
  disks = _ExpandCheckDisks(instance, disks)
4022

    
4023
  if not oneshot:
4024
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
4025

    
4026
  node = instance.primary_node
4027

    
4028
  for dev in disks:
4029
    lu.cfg.SetDiskID(dev, node)
4030

    
4031
  # TODO: Convert to utils.Retry
4032

    
4033
  retries = 0
4034
  degr_retries = 10 # in seconds, as we sleep 1 second each time
4035
  while True:
4036
    max_time = 0
4037
    done = True
4038
    cumul_degraded = False
4039
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
4040
    msg = rstats.fail_msg
4041
    if msg:
4042
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
4043
      retries += 1
4044
      if retries >= 10:
4045
        raise errors.RemoteError("Can't contact node %s for mirror data,"
4046
                                 " aborting." % node)
4047
      time.sleep(6)
4048
      continue
4049
    rstats = rstats.payload
4050
    retries = 0
4051
    for i, mstat in enumerate(rstats):
4052
      if mstat is None:
4053
        lu.LogWarning("Can't compute data for node %s/%s",
4054
                           node, disks[i].iv_name)
4055
        continue
4056

    
4057
      cumul_degraded = (cumul_degraded or
4058
                        (mstat.is_degraded and mstat.sync_percent is None))
4059
      if mstat.sync_percent is not None:
4060
        done = False
4061
        if mstat.estimated_time is not None:
4062
          rem_time = ("%s remaining (estimated)" %
4063
                      utils.FormatSeconds(mstat.estimated_time))
4064
          max_time = mstat.estimated_time
4065
        else:
4066
          rem_time = "no time estimate"
4067
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4068
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4069

    
4070
    # if we're done but degraded, let's do a few small retries, to
4071
    # make sure we see a stable and not transient situation; therefore
4072
    # we force restart of the loop
4073
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4074
      logging.info("Degraded disks found, %d retries left", degr_retries)
4075
      degr_retries -= 1
4076
      time.sleep(1)
4077
      continue
4078

    
4079
    if done or oneshot:
4080
      break
4081

    
4082
    time.sleep(min(60, max_time))
4083

    
4084
  if done:
4085
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4086
  return not cumul_degraded
4087

    
4088

    
4089
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4090
  """Check that mirrors are not degraded.
4091

4092
  The ldisk parameter, if True, will change the test from the
4093
  is_degraded attribute (which represents overall non-ok status for
4094
  the device(s)) to the ldisk (representing the local storage status).
4095

4096
  """
4097
  lu.cfg.SetDiskID(dev, node)
4098

    
4099
  result = True
4100

    
4101
  if on_primary or dev.AssembleOnSecondary():
4102
    rstats = lu.rpc.call_blockdev_find(node, dev)
4103
    msg = rstats.fail_msg
4104
    if msg:
4105
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4106
      result = False
4107
    elif not rstats.payload:
4108
      lu.LogWarning("Can't find disk on node %s", node)
4109
      result = False
4110
    else:
4111
      if ldisk:
4112
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4113
      else:
4114
        result = result and not rstats.payload.is_degraded
4115

    
4116
  if dev.children:
4117
    for child in dev.children:
4118
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4119

    
4120
  return result
4121

    
4122

    
4123
class LUOobCommand(NoHooksLU):
4124
  """Logical unit for OOB handling.
4125

4126
  """
4127
  REG_BGL = False
4128
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4129

    
4130
  def ExpandNames(self):
4131
    """Gather locks we need.
4132

4133
    """
4134
    if self.op.node_names:
4135
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4136
      lock_names = self.op.node_names
4137
    else:
4138
      lock_names = locking.ALL_SET
4139

    
4140
    self.needed_locks = {
4141
      locking.LEVEL_NODE: lock_names,
4142
      }
4143

    
4144
  def CheckPrereq(self):
4145
    """Check prerequisites.
4146

4147
    This checks:
4148
     - the node exists in the configuration
4149
     - OOB is supported
4150

4151
    Any errors are signaled by raising errors.OpPrereqError.
4152

4153
    """
4154
    self.nodes = []
4155
    self.master_node = self.cfg.GetMasterNode()
4156

    
4157
    assert self.op.power_delay >= 0.0
4158

    
4159
    if self.op.node_names:
4160
      if (self.op.command in self._SKIP_MASTER and
4161
          self.master_node in self.op.node_names):
4162
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4163
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4164

    
4165
        if master_oob_handler:
4166
          additional_text = ("run '%s %s %s' if you want to operate on the"
4167
                             " master regardless") % (master_oob_handler,
4168
                                                      self.op.command,
4169
                                                      self.master_node)
4170
        else:
4171
          additional_text = "it does not support out-of-band operations"
4172

    
4173
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4174
                                    " allowed for %s; %s") %
4175
                                   (self.master_node, self.op.command,
4176
                                    additional_text), errors.ECODE_INVAL)
4177
    else:
4178
      self.op.node_names = self.cfg.GetNodeList()
4179
      if self.op.command in self._SKIP_MASTER:
4180
        self.op.node_names.remove(self.master_node)
4181

    
4182
    if self.op.command in self._SKIP_MASTER:
4183
      assert self.master_node not in self.op.node_names
4184

    
4185
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4186
      if node is None:
4187
        raise errors.OpPrereqError("Node %s not found" % node_name,
4188
                                   errors.ECODE_NOENT)
4189
      else:
4190
        self.nodes.append(node)
4191

    
4192
      if (not self.op.ignore_status and
4193
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4194
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4195
                                    " not marked offline") % node_name,
4196
                                   errors.ECODE_STATE)
4197

    
4198
  def Exec(self, feedback_fn):
4199
    """Execute OOB and return result if we expect any.
4200

4201
    """
4202
    master_node = self.master_node
4203
    ret = []
4204

    
4205
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4206
                                              key=lambda node: node.name)):
4207
      node_entry = [(constants.RS_NORMAL, node.name)]
4208
      ret.append(node_entry)
4209

    
4210
      oob_program = _SupportsOob(self.cfg, node)
4211

    
4212
      if not oob_program:
4213
        node_entry.append((constants.RS_UNAVAIL, None))
4214
        continue
4215

    
4216
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4217
                   self.op.command, oob_program, node.name)
4218
      result = self.rpc.call_run_oob(master_node, oob_program,
4219
                                     self.op.command, node.name,
4220
                                     self.op.timeout)
4221

    
4222
      if result.fail_msg:
4223
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4224
                        node.name, result.fail_msg)
4225
        node_entry.append((constants.RS_NODATA, None))
4226
      else:
4227
        try:
4228
          self._CheckPayload(result)
4229
        except errors.OpExecError, err:
4230
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4231
                          node.name, err)
4232
          node_entry.append((constants.RS_NODATA, None))
4233
        else:
4234
          if self.op.command == constants.OOB_HEALTH:
4235
            # For health we should log important events
4236
            for item, status in result.payload:
4237
              if status in [constants.OOB_STATUS_WARNING,
4238
                            constants.OOB_STATUS_CRITICAL]:
4239
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4240
                                item, node.name, status)
4241

    
4242
          if self.op.command == constants.OOB_POWER_ON:
4243
            node.powered = True
4244
          elif self.op.command == constants.OOB_POWER_OFF:
4245
            node.powered = False
4246
          elif self.op.command == constants.OOB_POWER_STATUS:
4247
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4248
            if powered != node.powered:
4249
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4250
                               " match actual power state (%s)"), node.powered,
4251
                              node.name, powered)
4252

    
4253
          # For configuration changing commands we should update the node
4254
          if self.op.command in (constants.OOB_POWER_ON,
4255
                                 constants.OOB_POWER_OFF):
4256
            self.cfg.Update(node, feedback_fn)
4257

    
4258
          node_entry.append((constants.RS_NORMAL, result.payload))
4259

    
4260
          if (self.op.command == constants.OOB_POWER_ON and
4261
              idx < len(self.nodes) - 1):
4262
            time.sleep(self.op.power_delay)
4263

    
4264
    return ret
4265

    
4266
  def _CheckPayload(self, result):
4267
    """Checks if the payload is valid.
4268

4269
    @param result: RPC result
4270
    @raises errors.OpExecError: If payload is not valid
4271

4272
    """
4273
    errs = []
4274
    if self.op.command == constants.OOB_HEALTH:
4275
      if not isinstance(result.payload, list):
4276
        errs.append("command 'health' is expected to return a list but got %s" %
4277
                    type(result.payload))
4278
      else:
4279
        for item, status in result.payload:
4280
          if status not in constants.OOB_STATUSES:
4281
            errs.append("health item '%s' has invalid status '%s'" %
4282
                        (item, status))
4283

    
4284
    if self.op.command == constants.OOB_POWER_STATUS:
4285
      if not isinstance(result.payload, dict):
4286
        errs.append("power-status is expected to return a dict but got %s" %
4287
                    type(result.payload))
4288

    
4289
    if self.op.command in [
4290
        constants.OOB_POWER_ON,
4291
        constants.OOB_POWER_OFF,
4292
        constants.OOB_POWER_CYCLE,
4293
        ]:
4294
      if result.payload is not None:
4295
        errs.append("%s is expected to not return payload but got '%s'" %
4296
                    (self.op.command, result.payload))
4297

    
4298
    if errs:
4299
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4300
                               utils.CommaJoin(errs))
4301

    
4302

    
4303
class _OsQuery(_QueryBase):
4304
  FIELDS = query.OS_FIELDS
4305

    
4306
  def ExpandNames(self, lu):
4307
    # Lock all nodes in shared mode
4308
    # Temporary removal of locks, should be reverted later
4309
    # TODO: reintroduce locks when they are lighter-weight
4310
    lu.needed_locks = {}
4311
    #self.share_locks[locking.LEVEL_NODE] = 1
4312
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4313

    
4314
    # The following variables interact with _QueryBase._GetNames
4315
    if self.names:
4316
      self.wanted = self.names
4317
    else:
4318
      self.wanted = locking.ALL_SET
4319

    
4320
    self.do_locking = self.use_locking
4321

    
4322
  def DeclareLocks(self, lu, level):
4323
    pass
4324

    
4325
  @staticmethod
4326
  def _DiagnoseByOS(rlist):
4327
    """Remaps a per-node return list into an a per-os per-node dictionary
4328

4329
    @param rlist: a map with node names as keys and OS objects as values
4330

4331
    @rtype: dict
4332
    @return: a dictionary with osnames as keys and as value another
4333
        map, with nodes as keys and tuples of (path, status, diagnose,
4334
        variants, parameters, api_versions) as values, eg::
4335

4336
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4337
                                     (/srv/..., False, "invalid api")],
4338
                           "node2": [(/srv/..., True, "", [], [])]}
4339
          }
4340

4341
    """
4342
    all_os = {}
4343
    # we build here the list of nodes that didn't fail the RPC (at RPC
4344
    # level), so that nodes with a non-responding node daemon don't
4345
    # make all OSes invalid
4346
    good_nodes = [node_name for node_name in rlist
4347
                  if not rlist[node_name].fail_msg]
4348
    for node_name, nr in rlist.items():
4349
      if nr.fail_msg or not nr.payload:
4350
        continue
4351
      for (name, path, status, diagnose, variants,
4352
           params, api_versions) in nr.payload:
4353
        if name not in all_os:
4354
          # build a list of nodes for this os containing empty lists
4355
          # for each node in node_list
4356
          all_os[name] = {}
4357
          for nname in good_nodes:
4358
            all_os[name][nname] = []
4359
        # convert params from [name, help] to (name, help)
4360
        params = [tuple(v) for v in params]
4361
        all_os[name][node_name].append((path, status, diagnose,
4362
                                        variants, params, api_versions))
4363
    return all_os
4364

    
4365
  def _GetQueryData(self, lu):
4366
    """Computes the list of nodes and their attributes.
4367

4368
    """
4369
    # Locking is not used
4370
    assert not (compat.any(lu.glm.is_owned(level)
4371
                           for level in locking.LEVELS
4372
                           if level != locking.LEVEL_CLUSTER) or
4373
                self.do_locking or self.use_locking)
4374

    
4375
    valid_nodes = [node.name
4376
                   for node in lu.cfg.GetAllNodesInfo().values()
4377
                   if not node.offline and node.vm_capable]
4378
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4379
    cluster = lu.cfg.GetClusterInfo()
4380

    
4381
    data = {}
4382

    
4383
    for (os_name, os_data) in pol.items():
4384
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4385
                          hidden=(os_name in cluster.hidden_os),
4386
                          blacklisted=(os_name in cluster.blacklisted_os))
4387

    
4388
      variants = set()
4389
      parameters = set()
4390
      api_versions = set()
4391

    
4392
      for idx, osl in enumerate(os_data.values()):
4393
        info.valid = bool(info.valid and osl and osl[0][1])
4394
        if not info.valid:
4395
          break
4396

    
4397
        (node_variants, node_params, node_api) = osl[0][3:6]
4398
        if idx == 0:
4399
          # First entry
4400
          variants.update(node_variants)
4401
          parameters.update(node_params)
4402
          api_versions.update(node_api)
4403
        else:
4404
          # Filter out inconsistent values
4405
          variants.intersection_update(node_variants)
4406
          parameters.intersection_update(node_params)
4407
          api_versions.intersection_update(node_api)
4408

    
4409
      info.variants = list(variants)
4410
      info.parameters = list(parameters)
4411
      info.api_versions = list(api_versions)
4412

    
4413
      data[os_name] = info
4414

    
4415
    # Prepare data in requested order
4416
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4417
            if name in data]
4418

    
4419

    
4420
class LUOsDiagnose(NoHooksLU):
4421
  """Logical unit for OS diagnose/query.
4422

4423
  """
4424
  REQ_BGL = False
4425

    
4426
  @staticmethod
4427
  def _BuildFilter(fields, names):
4428
    """Builds a filter for querying OSes.
4429

4430
    """
4431
    name_filter = qlang.MakeSimpleFilter("name", names)
4432

    
4433
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4434
    # respective field is not requested
4435
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4436
                     for fname in ["hidden", "blacklisted"]
4437
                     if fname not in fields]
4438
    if "valid" not in fields:
4439
      status_filter.append([qlang.OP_TRUE, "valid"])
4440

    
4441
    if status_filter:
4442
      status_filter.insert(0, qlang.OP_AND)
4443
    else:
4444
      status_filter = None
4445

    
4446
    if name_filter and status_filter:
4447
      return [qlang.OP_AND, name_filter, status_filter]
4448
    elif name_filter:
4449
      return name_filter
4450
    else:
4451
      return status_filter
4452

    
4453
  def CheckArguments(self):
4454
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4455
                       self.op.output_fields, False)
4456

    
4457
  def ExpandNames(self):
4458
    self.oq.ExpandNames(self)
4459

    
4460
  def Exec(self, feedback_fn):
4461
    return self.oq.OldStyleQuery(self)
4462

    
4463

    
4464
class LUNodeRemove(LogicalUnit):
4465
  """Logical unit for removing a node.
4466

4467
  """
4468
  HPATH = "node-remove"
4469
  HTYPE = constants.HTYPE_NODE
4470

    
4471
  def BuildHooksEnv(self):
4472
    """Build hooks env.
4473

4474
    This doesn't run on the target node in the pre phase as a failed
4475
    node would then be impossible to remove.
4476

4477
    """
4478
    return {
4479
      "OP_TARGET": self.op.node_name,
4480
      "NODE_NAME": self.op.node_name,
4481
      }
4482

    
4483
  def BuildHooksNodes(self):
4484
    """Build hooks nodes.
4485

4486
    """
4487
    all_nodes = self.cfg.GetNodeList()
4488
    try:
4489
      all_nodes.remove(self.op.node_name)
4490
    except ValueError:
4491
      logging.warning("Node '%s', which is about to be removed, was not found"
4492
                      " in the list of all nodes", self.op.node_name)
4493
    return (all_nodes, all_nodes)
4494

    
4495
  def CheckPrereq(self):
4496
    """Check prerequisites.
4497

4498
    This checks:
4499
     - the node exists in the configuration
4500
     - it does not have primary or secondary instances
4501
     - it's not the master
4502

4503
    Any errors are signaled by raising errors.OpPrereqError.
4504

4505
    """
4506
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4507
    node = self.cfg.GetNodeInfo(self.op.node_name)
4508
    assert node is not None
4509

    
4510
    masternode = self.cfg.GetMasterNode()
4511
    if node.name == masternode:
4512
      raise errors.OpPrereqError("Node is the master node, failover to another"
4513
                                 " node is required", errors.ECODE_INVAL)
4514

    
4515
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4516
      if node.name in instance.all_nodes:
4517
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4518
                                   " please remove first" % instance_name,
4519
                                   errors.ECODE_INVAL)
4520
    self.op.node_name = node.name
4521
    self.node = node
4522

    
4523
  def Exec(self, feedback_fn):
4524
    """Removes the node from the cluster.
4525

4526
    """
4527
    node = self.node
4528
    logging.info("Stopping the node daemon and removing configs from node %s",
4529
                 node.name)
4530

    
4531
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4532

    
4533
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4534
      "Not owning BGL"
4535

    
4536
    # Promote nodes to master candidate as needed
4537
    _AdjustCandidatePool(self, exceptions=[node.name])
4538
    self.context.RemoveNode(node.name)
4539

    
4540
    # Run post hooks on the node before it's removed
4541
    _RunPostHook(self, node.name)
4542

    
4543
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4544
    msg = result.fail_msg
4545
    if msg:
4546
      self.LogWarning("Errors encountered on the remote node while leaving"
4547
                      " the cluster: %s", msg)
4548

    
4549
    # Remove node from our /etc/hosts
4550
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4551
      master_node = self.cfg.GetMasterNode()
4552
      result = self.rpc.call_etc_hosts_modify(master_node,
4553
                                              constants.ETC_HOSTS_REMOVE,
4554
                                              node.name, None)
4555
      result.Raise("Can't update hosts file with new host data")
4556
      _RedistributeAncillaryFiles(self)
4557

    
4558

    
4559
class _NodeQuery(_QueryBase):
4560
  FIELDS = query.NODE_FIELDS
4561

    
4562
  def ExpandNames(self, lu):
4563
    lu.needed_locks = {}
4564
    lu.share_locks = _ShareAll()
4565

    
4566
    if self.names:
4567
      self.wanted = _GetWantedNodes(lu, self.names)
4568
    else:
4569
      self.wanted = locking.ALL_SET
4570

    
4571
    self.do_locking = (self.use_locking and
4572
                       query.NQ_LIVE in self.requested_data)
4573

    
4574
    if self.do_locking:
4575
      # If any non-static field is requested we need to lock the nodes
4576
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4577

    
4578
  def DeclareLocks(self, lu, level):
4579
    pass
4580

    
4581
  def _GetQueryData(self, lu):
4582
    """Computes the list of nodes and their attributes.
4583

4584
    """
4585
    all_info = lu.cfg.GetAllNodesInfo()
4586

    
4587
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4588

    
4589
    # Gather data as requested
4590
    if query.NQ_LIVE in self.requested_data:
4591
      # filter out non-vm_capable nodes
4592
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4593

    
4594
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4595
                                        lu.cfg.GetHypervisorType())
4596
      live_data = dict((name, nresult.payload)
4597
                       for (name, nresult) in node_data.items()
4598
                       if not nresult.fail_msg and nresult.payload)
4599
    else:
4600
      live_data = None
4601

    
4602
    if query.NQ_INST in self.requested_data:
4603
      node_to_primary = dict([(name, set()) for name in nodenames])
4604
      node_to_secondary = dict([(name, set()) for name in nodenames])
4605

    
4606
      inst_data = lu.cfg.GetAllInstancesInfo()
4607

    
4608
      for inst in inst_data.values():
4609
        if inst.primary_node in node_to_primary:
4610
          node_to_primary[inst.primary_node].add(inst.name)
4611
        for secnode in inst.secondary_nodes:
4612
          if secnode in node_to_secondary:
4613
            node_to_secondary[secnode].add(inst.name)
4614
    else:
4615
      node_to_primary = None
4616
      node_to_secondary = None
4617

    
4618
    if query.NQ_OOB in self.requested_data:
4619
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4620
                         for name, node in all_info.iteritems())
4621
    else:
4622
      oob_support = None
4623

    
4624
    if query.NQ_GROUP in self.requested_data:
4625
      groups = lu.cfg.GetAllNodeGroupsInfo()
4626
    else:
4627
      groups = {}
4628

    
4629
    return query.NodeQueryData([all_info[name] for name in nodenames],
4630
                               live_data, lu.cfg.GetMasterNode(),
4631
                               node_to_primary, node_to_secondary, groups,
4632
                               oob_support, lu.cfg.GetClusterInfo())
4633

    
4634

    
4635
class LUNodeQuery(NoHooksLU):
4636
  """Logical unit for querying nodes.
4637

4638
  """
4639
  # pylint: disable=W0142
4640
  REQ_BGL = False
4641

    
4642
  def CheckArguments(self):
4643
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4644
                         self.op.output_fields, self.op.use_locking)
4645

    
4646
  def ExpandNames(self):
4647
    self.nq.ExpandNames(self)
4648

    
4649
  def DeclareLocks(self, level):
4650
    self.nq.DeclareLocks(self, level)
4651

    
4652
  def Exec(self, feedback_fn):
4653
    return self.nq.OldStyleQuery(self)
4654

    
4655

    
4656
class LUNodeQueryvols(NoHooksLU):
4657
  """Logical unit for getting volumes on node(s).
4658

4659
  """
4660
  REQ_BGL = False
4661
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4662
  _FIELDS_STATIC = utils.FieldSet("node")
4663

    
4664
  def CheckArguments(self):
4665
    _CheckOutputFields(static=self._FIELDS_STATIC,
4666
                       dynamic=self._FIELDS_DYNAMIC,
4667
                       selected=self.op.output_fields)
4668

    
4669
  def ExpandNames(self):
4670
    self.share_locks = _ShareAll()
4671
    self.needed_locks = {}
4672

    
4673
    if not self.op.nodes:
4674
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4675
    else:
4676
      self.needed_locks[locking.LEVEL_NODE] = \
4677
        _GetWantedNodes(self, self.op.nodes)
4678

    
4679
  def Exec(self, feedback_fn):
4680
    """Computes the list of nodes and their attributes.
4681

4682
    """
4683
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4684
    volumes = self.rpc.call_node_volumes(nodenames)
4685

    
4686
    ilist = self.cfg.GetAllInstancesInfo()
4687
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4688

    
4689
    output = []
4690
    for node in nodenames:
4691
      nresult = volumes[node]
4692
      if nresult.offline:
4693
        continue
4694
      msg = nresult.fail_msg
4695
      if msg:
4696
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4697
        continue
4698

    
4699
      node_vols = sorted(nresult.payload,
4700
                         key=operator.itemgetter("dev"))
4701

    
4702
      for vol in node_vols:
4703
        node_output = []
4704
        for field in self.op.output_fields:
4705
          if field == "node":
4706
            val = node
4707
          elif field == "phys":
4708
            val = vol["dev"]
4709
          elif field == "vg":
4710
            val = vol["vg"]
4711
          elif field == "name":
4712
            val = vol["name"]
4713
          elif field == "size":
4714
            val = int(float(vol["size"]))
4715
          elif field == "instance":
4716
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4717
          else:
4718
            raise errors.ParameterError(field)
4719
          node_output.append(str(val))
4720

    
4721
        output.append(node_output)
4722

    
4723
    return output
4724

    
4725

    
4726
class LUNodeQueryStorage(NoHooksLU):
4727
  """Logical unit for getting information on storage units on node(s).
4728

4729
  """
4730
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4731
  REQ_BGL = False
4732

    
4733
  def CheckArguments(self):
4734
    _CheckOutputFields(static=self._FIELDS_STATIC,
4735
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4736
                       selected=self.op.output_fields)
4737

    
4738
  def ExpandNames(self):
4739
    self.share_locks = _ShareAll()
4740
    self.needed_locks = {}
4741

    
4742
    if self.op.nodes:
4743
      self.needed_locks[locking.LEVEL_NODE] = \
4744
        _GetWantedNodes(self, self.op.nodes)
4745
    else:
4746
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4747

    
4748
  def Exec(self, feedback_fn):
4749
    """Computes the list of nodes and their attributes.
4750

4751
    """
4752
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4753

    
4754
    # Always get name to sort by
4755
    if constants.SF_NAME in self.op.output_fields:
4756
      fields = self.op.output_fields[:]
4757
    else:
4758
      fields = [constants.SF_NAME] + self.op.output_fields
4759

    
4760
    # Never ask for node or type as it's only known to the LU
4761
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4762
      while extra in fields:
4763
        fields.remove(extra)
4764

    
4765
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4766
    name_idx = field_idx[constants.SF_NAME]
4767

    
4768
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4769
    data = self.rpc.call_storage_list(self.nodes,
4770
                                      self.op.storage_type, st_args,
4771
                                      self.op.name, fields)
4772

    
4773
    result = []
4774

    
4775
    for node in utils.NiceSort(self.nodes):
4776
      nresult = data[node]
4777
      if nresult.offline:
4778
        continue
4779

    
4780
      msg = nresult.fail_msg
4781
      if msg:
4782
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4783
        continue
4784

    
4785
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4786

    
4787
      for name in utils.NiceSort(rows.keys()):
4788
        row = rows[name]
4789

    
4790
        out = []
4791

    
4792
        for field in self.op.output_fields:
4793
          if field == constants.SF_NODE:
4794
            val = node
4795
          elif field == constants.SF_TYPE:
4796
            val = self.op.storage_type
4797
          elif field in field_idx:
4798
            val = row[field_idx[field]]
4799
          else:
4800
            raise errors.ParameterError(field)
4801

    
4802
          out.append(val)
4803

    
4804
        result.append(out)
4805

    
4806
    return result
4807

    
4808

    
4809
class _InstanceQuery(_QueryBase):
4810
  FIELDS = query.INSTANCE_FIELDS
4811

    
4812
  def ExpandNames(self, lu):
4813
    lu.needed_locks = {}
4814
    lu.share_locks = _ShareAll()
4815

    
4816
    if self.names:
4817
      self.wanted = _GetWantedInstances(lu, self.names)
4818
    else:
4819
      self.wanted = locking.ALL_SET
4820

    
4821
    self.do_locking = (self.use_locking and
4822
                       query.IQ_LIVE in self.requested_data)
4823
    if self.do_locking:
4824
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4825
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4826
      lu.needed_locks[locking.LEVEL_NODE] = []
4827
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4828

    
4829
    self.do_grouplocks = (self.do_locking and
4830
                          query.IQ_NODES in self.requested_data)
4831

    
4832
  def DeclareLocks(self, lu, level):
4833
    if self.do_locking:
4834
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4835
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4836

    
4837
        # Lock all groups used by instances optimistically; this requires going
4838
        # via the node before it's locked, requiring verification later on
4839
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4840
          set(group_uuid
4841
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4842
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4843
      elif level == locking.LEVEL_NODE:
4844
        lu._LockInstancesNodes() # pylint: disable=W0212
4845

    
4846
  @staticmethod
4847
  def _CheckGroupLocks(lu):
4848
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4849
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4850

    
4851
    # Check if node groups for locked instances are still correct
4852
    for instance_name in owned_instances:
4853
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4854

    
4855
  def _GetQueryData(self, lu):
4856
    """Computes the list of instances and their attributes.
4857

4858
    """
4859
    if self.do_grouplocks:
4860
      self._CheckGroupLocks(lu)
4861

    
4862
    cluster = lu.cfg.GetClusterInfo()
4863
    all_info = lu.cfg.GetAllInstancesInfo()
4864

    
4865
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4866

    
4867
    instance_list = [all_info[name] for name in instance_names]
4868
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4869
                                        for inst in instance_list)))
4870
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4871
    bad_nodes = []
4872
    offline_nodes = []
4873
    wrongnode_inst = set()
4874

    
4875
    # Gather data as requested
4876
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4877
      live_data = {}
4878
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4879
      for name in nodes:
4880
        result = node_data[name]
4881
        if result.offline:
4882
          # offline nodes will be in both lists
4883
          assert result.fail_msg
4884
          offline_nodes.append(name)
4885
        if result.fail_msg:
4886
          bad_nodes.append(name)
4887
        elif result.payload:
4888
          for inst in result.payload:
4889
            if inst in all_info:
4890
              if all_info[inst].primary_node == name:
4891
                live_data.update(result.payload)
4892
              else:
4893
                wrongnode_inst.add(inst)
4894
            else:
4895
              # orphan instance; we don't list it here as we don't
4896
              # handle this case yet in the output of instance listing
4897
              logging.warning("Orphan instance '%s' found on node %s",
4898
                              inst, name)
4899
        # else no instance is alive
4900
    else:
4901
      live_data = {}
4902

    
4903
    if query.IQ_DISKUSAGE in self.requested_data:
4904
      disk_usage = dict((inst.name,
4905
                         _ComputeDiskSize(inst.disk_template,
4906
                                          [{constants.IDISK_SIZE: disk.size}
4907
                                           for disk in inst.disks]))
4908
                        for inst in instance_list)
4909
    else:
4910
      disk_usage = None
4911

    
4912
    if query.IQ_CONSOLE in self.requested_data:
4913
      consinfo = {}
4914
      for inst in instance_list:
4915
        if inst.name in live_data:
4916
          # Instance is running
4917
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4918
        else:
4919
          consinfo[inst.name] = None
4920
      assert set(consinfo.keys()) == set(instance_names)
4921
    else:
4922
      consinfo = None
4923

    
4924
    if query.IQ_NODES in self.requested_data:
4925
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4926
                                            instance_list)))
4927
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4928
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4929
                    for uuid in set(map(operator.attrgetter("group"),
4930
                                        nodes.values())))
4931
    else:
4932
      nodes = None
4933
      groups = None
4934

    
4935
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4936
                                   disk_usage, offline_nodes, bad_nodes,
4937
                                   live_data, wrongnode_inst, consinfo,
4938
                                   nodes, groups)
4939

    
4940

    
4941
class LUQuery(NoHooksLU):
4942
  """Query for resources/items of a certain kind.
4943

4944
  """
4945
  # pylint: disable=W0142
4946
  REQ_BGL = False
4947

    
4948
  def CheckArguments(self):
4949
    qcls = _GetQueryImplementation(self.op.what)
4950

    
4951
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4952

    
4953
  def ExpandNames(self):
4954
    self.impl.ExpandNames(self)
4955

    
4956
  def DeclareLocks(self, level):
4957
    self.impl.DeclareLocks(self, level)
4958

    
4959
  def Exec(self, feedback_fn):
4960
    return self.impl.NewStyleQuery(self)
4961

    
4962

    
4963
class LUQueryFields(NoHooksLU):
4964
  """Query for resources/items of a certain kind.
4965

4966
  """
4967
  # pylint: disable=W0142
4968
  REQ_BGL = False
4969

    
4970
  def CheckArguments(self):
4971
    self.qcls = _GetQueryImplementation(self.op.what)
4972

    
4973
  def ExpandNames(self):
4974
    self.needed_locks = {}
4975

    
4976
  def Exec(self, feedback_fn):
4977
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4978

    
4979

    
4980
class LUNodeModifyStorage(NoHooksLU):
4981
  """Logical unit for modifying a storage volume on a node.
4982

4983
  """
4984
  REQ_BGL = False
4985

    
4986
  def CheckArguments(self):
4987
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4988

    
4989
    storage_type = self.op.storage_type
4990

    
4991
    try:
4992
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4993
    except KeyError:
4994
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4995
                                 " modified" % storage_type,
4996
                                 errors.ECODE_INVAL)
4997

    
4998
    diff = set(self.op.changes.keys()) - modifiable
4999
    if diff:
5000
      raise errors.OpPrereqError("The following fields can not be modified for"
5001
                                 " storage units of type '%s': %r" %
5002
                                 (storage_type, list(diff)),
5003
                                 errors.ECODE_INVAL)
5004

    
5005
  def ExpandNames(self):
5006
    self.needed_locks = {
5007
      locking.LEVEL_NODE: self.op.node_name,
5008
      }
5009

    
5010
  def Exec(self, feedback_fn):
5011
    """Computes the list of nodes and their attributes.
5012

5013
    """
5014
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
5015
    result = self.rpc.call_storage_modify(self.op.node_name,
5016
                                          self.op.storage_type, st_args,
5017
                                          self.op.name, self.op.changes)
5018
    result.Raise("Failed to modify storage unit '%s' on %s" %
5019
                 (self.op.name, self.op.node_name))
5020

    
5021

    
5022
class LUNodeAdd(LogicalUnit):
5023
  """Logical unit for adding node to the cluster.
5024

5025
  """
5026
  HPATH = "node-add"
5027
  HTYPE = constants.HTYPE_NODE
5028
  _NFLAGS = ["master_capable", "vm_capable"]
5029

    
5030
  def CheckArguments(self):
5031
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
5032
    # validate/normalize the node name
5033
    self.hostname = netutils.GetHostname(name=self.op.node_name,
5034
                                         family=self.primary_ip_family)
5035
    self.op.node_name = self.hostname.name
5036

    
5037
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
5038
      raise errors.OpPrereqError("Cannot readd the master node",
5039
                                 errors.ECODE_STATE)
5040

    
5041
    if self.op.readd and self.op.group:
5042
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
5043
                                 " being readded", errors.ECODE_INVAL)
5044

    
5045
  def BuildHooksEnv(self):
5046
    """Build hooks env.
5047

5048
    This will run on all nodes before, and on all nodes + the new node after.
5049

5050
    """
5051
    return {
5052
      "OP_TARGET": self.op.node_name,
5053
      "NODE_NAME": self.op.node_name,
5054
      "NODE_PIP": self.op.primary_ip,
5055
      "NODE_SIP": self.op.secondary_ip,
5056
      "MASTER_CAPABLE": str(self.op.master_capable),
5057
      "VM_CAPABLE": str(self.op.vm_capable),
5058
      }
5059

    
5060
  def BuildHooksNodes(self):
5061
    """Build hooks nodes.
5062

5063
    """
5064
    # Exclude added node
5065
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5066
    post_nodes = pre_nodes + [self.op.node_name, ]
5067

    
5068
    return (pre_nodes, post_nodes)
5069

    
5070
  def CheckPrereq(self):
5071
    """Check prerequisites.
5072

5073
    This checks:
5074
     - the new node is not already in the config
5075
     - it is resolvable
5076
     - its parameters (single/dual homed) matches the cluster
5077

5078
    Any errors are signaled by raising errors.OpPrereqError.
5079

5080
    """
5081
    cfg = self.cfg
5082
    hostname = self.hostname
5083
    node = hostname.name
5084
    primary_ip = self.op.primary_ip = hostname.ip
5085
    if self.op.secondary_ip is None:
5086
      if self.primary_ip_family == netutils.IP6Address.family:
5087
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5088
                                   " IPv4 address must be given as secondary",
5089
                                   errors.ECODE_INVAL)
5090
      self.op.secondary_ip = primary_ip
5091

    
5092
    secondary_ip = self.op.secondary_ip
5093
    if not netutils.IP4Address.IsValid(secondary_ip):
5094
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5095
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5096

    
5097
    node_list = cfg.GetNodeList()
5098
    if not self.op.readd and node in node_list:
5099
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5100
                                 node, errors.ECODE_EXISTS)
5101
    elif self.op.readd and node not in node_list:
5102
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5103
                                 errors.ECODE_NOENT)
5104

    
5105
    self.changed_primary_ip = False
5106

    
5107
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5108
      if self.op.readd and node == existing_node_name:
5109
        if existing_node.secondary_ip != secondary_ip:
5110
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5111
                                     " address configuration as before",
5112
                                     errors.ECODE_INVAL)
5113
        if existing_node.primary_ip != primary_ip:
5114
          self.changed_primary_ip = True
5115

    
5116
        continue
5117

    
5118
      if (existing_node.primary_ip == primary_ip or
5119
          existing_node.secondary_ip == primary_ip or
5120
          existing_node.primary_ip == secondary_ip or
5121
          existing_node.secondary_ip == secondary_ip):
5122
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5123
                                   " existing node %s" % existing_node.name,
5124
                                   errors.ECODE_NOTUNIQUE)
5125

    
5126
    # After this 'if' block, None is no longer a valid value for the
5127
    # _capable op attributes
5128
    if self.op.readd:
5129
      old_node = self.cfg.GetNodeInfo(node)
5130
      assert old_node is not None, "Can't retrieve locked node %s" % node
5131
      for attr in self._NFLAGS:
5132
        if getattr(self.op, attr) is None:
5133
          setattr(self.op, attr, getattr(old_node, attr))
5134
    else:
5135
      for attr in self._NFLAGS:
5136
        if getattr(self.op, attr) is None:
5137
          setattr(self.op, attr, True)
5138

    
5139
    if self.op.readd and not self.op.vm_capable:
5140
      pri, sec = cfg.GetNodeInstances(node)
5141
      if pri or sec:
5142
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5143
                                   " flag set to false, but it already holds"
5144
                                   " instances" % node,
5145
                                   errors.ECODE_STATE)
5146

    
5147
    # check that the type of the node (single versus dual homed) is the
5148
    # same as for the master
5149
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5150
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5151
    newbie_singlehomed = secondary_ip == primary_ip
5152
    if master_singlehomed != newbie_singlehomed:
5153
      if master_singlehomed:
5154
        raise errors.OpPrereqError("The master has no secondary ip but the"
5155
                                   " new node has one",
5156
                                   errors.ECODE_INVAL)
5157
      else:
5158
        raise errors.OpPrereqError("The master has a secondary ip but the"
5159
                                   " new node doesn't have one",
5160
                                   errors.ECODE_INVAL)
5161

    
5162
    # checks reachability
5163
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5164
      raise errors.OpPrereqError("Node not reachable by ping",
5165
                                 errors.ECODE_ENVIRON)
5166

    
5167
    if not newbie_singlehomed:
5168
      # check reachability from my secondary ip to newbie's secondary ip
5169
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5170
                           source=myself.secondary_ip):
5171
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5172
                                   " based ping to node daemon port",
5173
                                   errors.ECODE_ENVIRON)
5174

    
5175
    if self.op.readd:
5176
      exceptions = [node]
5177
    else:
5178
      exceptions = []
5179

    
5180
    if self.op.master_capable:
5181
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5182
    else:
5183
      self.master_candidate = False
5184

    
5185
    if self.op.readd:
5186
      self.new_node = old_node
5187
    else:
5188
      node_group = cfg.LookupNodeGroup(self.op.group)
5189
      self.new_node = objects.Node(name=node,
5190
                                   primary_ip=primary_ip,
5191
                                   secondary_ip=secondary_ip,
5192
                                   master_candidate=self.master_candidate,
5193
                                   offline=False, drained=False,
5194
                                   group=node_group)
5195

    
5196
    if self.op.ndparams:
5197
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5198

    
5199
  def Exec(self, feedback_fn):
5200
    """Adds the new node to the cluster.
5201

5202
    """
5203
    new_node = self.new_node
5204
    node = new_node.name
5205

    
5206
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5207
      "Not owning BGL"
5208

    
5209
    # We adding a new node so we assume it's powered
5210
    new_node.powered = True
5211

    
5212
    # for re-adds, reset the offline/drained/master-candidate flags;
5213
    # we need to reset here, otherwise offline would prevent RPC calls
5214
    # later in the procedure; this also means that if the re-add
5215
    # fails, we are left with a non-offlined, broken node
5216
    if self.op.readd:
5217
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5218
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5219
      # if we demote the node, we do cleanup later in the procedure
5220
      new_node.master_candidate = self.master_candidate
5221
      if self.changed_primary_ip:
5222
        new_node.primary_ip = self.op.primary_ip
5223

    
5224
    # copy the master/vm_capable flags
5225
    for attr in self._NFLAGS:
5226
      setattr(new_node, attr, getattr(self.op, attr))
5227

    
5228
    # notify the user about any possible mc promotion
5229
    if new_node.master_candidate:
5230
      self.LogInfo("Node will be a master candidate")
5231

    
5232
    if self.op.ndparams:
5233
      new_node.ndparams = self.op.ndparams
5234
    else:
5235
      new_node.ndparams = {}
5236

    
5237
    # check connectivity
5238
    result = self.rpc.call_version([node])[node]
5239
    result.Raise("Can't get version information from node %s" % node)
5240
    if constants.PROTOCOL_VERSION == result.payload:
5241
      logging.info("Communication to node %s fine, sw version %s match",
5242
                   node, result.payload)
5243
    else:
5244
      raise errors.OpExecError("Version mismatch master version %s,"
5245
                               " node version %s" %
5246
                               (constants.PROTOCOL_VERSION, result.payload))
5247

    
5248
    # Add node to our /etc/hosts, and add key to known_hosts
5249
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5250
      master_node = self.cfg.GetMasterNode()
5251
      result = self.rpc.call_etc_hosts_modify(master_node,
5252
                                              constants.ETC_HOSTS_ADD,
5253
                                              self.hostname.name,
5254
                                              self.hostname.ip)
5255
      result.Raise("Can't update hosts file with new host data")
5256

    
5257
    if new_node.secondary_ip != new_node.primary_ip:
5258
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5259
                               False)
5260

    
5261
    node_verify_list = [self.cfg.GetMasterNode()]
5262
    node_verify_param = {
5263
      constants.NV_NODELIST: ([node], {}),
5264
      # TODO: do a node-net-test as well?
5265
    }
5266

    
5267
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5268
                                       self.cfg.GetClusterName())
5269
    for verifier in node_verify_list:
5270
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5271
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5272
      if nl_payload:
5273
        for failed in nl_payload:
5274
          feedback_fn("ssh/hostname verification failed"
5275
                      " (checking from %s): %s" %
5276
                      (verifier, nl_payload[failed]))
5277
        raise errors.OpExecError("ssh/hostname verification failed")
5278

    
5279
    if self.op.readd:
5280
      _RedistributeAncillaryFiles(self)
5281
      self.context.ReaddNode(new_node)
5282
      # make sure we redistribute the config
5283
      self.cfg.Update(new_node, feedback_fn)
5284
      # and make sure the new node will not have old files around
5285
      if not new_node.master_candidate:
5286
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5287
        msg = result.fail_msg
5288
        if msg:
5289
          self.LogWarning("Node failed to demote itself from master"
5290
                          " candidate status: %s" % msg)
5291
    else:
5292
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5293
                                  additional_vm=self.op.vm_capable)
5294
      self.context.AddNode(new_node, self.proc.GetECId())
5295

    
5296

    
5297
class LUNodeSetParams(LogicalUnit):
5298
  """Modifies the parameters of a node.
5299

5300
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5301
      to the node role (as _ROLE_*)
5302
  @cvar _R2F: a dictionary from node role to tuples of flags
5303
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5304

5305
  """
5306
  HPATH = "node-modify"
5307
  HTYPE = constants.HTYPE_NODE
5308
  REQ_BGL = False
5309
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5310
  _F2R = {
5311
    (True, False, False): _ROLE_CANDIDATE,
5312
    (False, True, False): _ROLE_DRAINED,
5313
    (False, False, True): _ROLE_OFFLINE,
5314
    (False, False, False): _ROLE_REGULAR,
5315
    }
5316
  _R2F = dict((v, k) for k, v in _F2R.items())
5317
  _FLAGS = ["master_candidate", "drained", "offline"]
5318

    
5319
  def CheckArguments(self):
5320
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5321
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5322
                self.op.master_capable, self.op.vm_capable,
5323
                self.op.secondary_ip, self.op.ndparams]
5324
    if all_mods.count(None) == len(all_mods):
5325
      raise errors.OpPrereqError("Please pass at least one modification",
5326
                                 errors.ECODE_INVAL)
5327
    if all_mods.count(True) > 1:
5328
      raise errors.OpPrereqError("Can't set the node into more than one"
5329
                                 " state at the same time",
5330
                                 errors.ECODE_INVAL)
5331

    
5332
    # Boolean value that tells us whether we might be demoting from MC
5333
    self.might_demote = (self.op.master_candidate == False or
5334
                         self.op.offline == True or
5335
                         self.op.drained == True or
5336
                         self.op.master_capable == False)
5337

    
5338
    if self.op.secondary_ip:
5339
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5340
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5341
                                   " address" % self.op.secondary_ip,
5342
                                   errors.ECODE_INVAL)
5343

    
5344
    self.lock_all = self.op.auto_promote and self.might_demote
5345
    self.lock_instances = self.op.secondary_ip is not None
5346

    
5347
  def _InstanceFilter(self, instance):
5348
    """Filter for getting affected instances.
5349

5350
    """
5351
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5352
            self.op.node_name in instance.all_nodes)
5353

    
5354
  def ExpandNames(self):
5355
    if self.lock_all:
5356
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5357
    else:
5358
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5359

    
5360
    # Since modifying a node can have severe effects on currently running
5361
    # operations the resource lock is at least acquired in shared mode
5362
    self.needed_locks[locking.LEVEL_NODE_RES] = \
5363
      self.needed_locks[locking.LEVEL_NODE]
5364

    
5365
    # Get node resource and instance locks in shared mode; they are not used
5366
    # for anything but read-only access
5367
    self.share_locks[locking.LEVEL_NODE_RES] = 1
5368
    self.share_locks[locking.LEVEL_INSTANCE] = 1
5369

    
5370
    if self.lock_instances:
5371
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5372
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5373

    
5374
  def BuildHooksEnv(self):
5375
    """Build hooks env.
5376

5377
    This runs on the master node.
5378

5379
    """
5380
    return {
5381
      "OP_TARGET": self.op.node_name,
5382
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5383
      "OFFLINE": str(self.op.offline),
5384
      "DRAINED": str(self.op.drained),
5385
      "MASTER_CAPABLE": str(self.op.master_capable),
5386
      "VM_CAPABLE": str(self.op.vm_capable),
5387
      }
5388

    
5389
  def BuildHooksNodes(self):
5390
    """Build hooks nodes.
5391

5392
    """
5393
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5394
    return (nl, nl)
5395

    
5396
  def CheckPrereq(self):
5397
    """Check prerequisites.
5398

5399
    This only checks the instance list against the existing names.
5400

5401
    """
5402
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5403

    
5404
    if self.lock_instances:
5405
      affected_instances = \
5406
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5407

    
5408
      # Verify instance locks
5409
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5410
      wanted_instances = frozenset(affected_instances.keys())
5411
      if wanted_instances - owned_instances:
5412
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5413
                                   " secondary IP address have changed since"
5414
                                   " locks were acquired, wanted '%s', have"
5415
                                   " '%s'; retry the operation" %
5416
                                   (self.op.node_name,
5417
                                    utils.CommaJoin(wanted_instances),
5418
                                    utils.CommaJoin(owned_instances)),
5419
                                   errors.ECODE_STATE)
5420
    else:
5421
      affected_instances = None
5422

    
5423
    if (self.op.master_candidate is not None or
5424
        self.op.drained is not None or
5425
        self.op.offline is not None):
5426
      # we can't change the master's node flags
5427
      if self.op.node_name == self.cfg.GetMasterNode():
5428
        raise errors.OpPrereqError("The master role can be changed"
5429
                                   " only via master-failover",
5430
                                   errors.ECODE_INVAL)
5431

    
5432
    if self.op.master_candidate and not node.master_capable:
5433
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5434
                                 " it a master candidate" % node.name,
5435
                                 errors.ECODE_STATE)
5436

    
5437
    if self.op.vm_capable == False:
5438
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5439
      if ipri or isec:
5440
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5441
                                   " the vm_capable flag" % node.name,
5442
                                   errors.ECODE_STATE)
5443

    
5444
    if node.master_candidate and self.might_demote and not self.lock_all:
5445
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5446
      # check if after removing the current node, we're missing master
5447
      # candidates
5448
      (mc_remaining, mc_should, _) = \
5449
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5450
      if mc_remaining < mc_should:
5451
        raise errors.OpPrereqError("Not enough master candidates, please"
5452
                                   " pass auto promote option to allow"
5453
                                   " promotion", errors.ECODE_STATE)
5454

    
5455
    self.old_flags = old_flags = (node.master_candidate,
5456
                                  node.drained, node.offline)
5457
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5458
    self.old_role = old_role = self._F2R[old_flags]
5459

    
5460
    # Check for ineffective changes
5461
    for attr in self._FLAGS:
5462
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5463
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5464
        setattr(self.op, attr, None)
5465

    
5466
    # Past this point, any flag change to False means a transition
5467
    # away from the respective state, as only real changes are kept
5468

    
5469
    # TODO: We might query the real power state if it supports OOB
5470
    if _SupportsOob(self.cfg, node):
5471
      if self.op.offline is False and not (node.powered or
5472
                                           self.op.powered == True):
5473
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5474
                                    " offline status can be reset") %
5475
                                   self.op.node_name)
5476
    elif self.op.powered is not None:
5477
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5478
                                  " as it does not support out-of-band"
5479
                                  " handling") % self.op.node_name)
5480

    
5481
    # If we're being deofflined/drained, we'll MC ourself if needed
5482
    if (self.op.drained == False or self.op.offline == False or
5483
        (self.op.master_capable and not node.master_capable)):
5484
      if _DecideSelfPromotion(self):
5485
        self.op.master_candidate = True
5486
        self.LogInfo("Auto-promoting node to master candidate")
5487

    
5488
    # If we're no longer master capable, we'll demote ourselves from MC
5489
    if self.op.master_capable == False and node.master_candidate:
5490
      self.LogInfo("Demoting from master candidate")
5491
      self.op.master_candidate = False
5492

    
5493
    # Compute new role
5494
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5495
    if self.op.master_candidate:
5496
      new_role = self._ROLE_CANDIDATE
5497
    elif self.op.drained:
5498
      new_role = self._ROLE_DRAINED
5499
    elif self.op.offline:
5500
      new_role = self._ROLE_OFFLINE
5501
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5502
      # False is still in new flags, which means we're un-setting (the
5503
      # only) True flag
5504
      new_role = self._ROLE_REGULAR
5505
    else: # no new flags, nothing, keep old role
5506
      new_role = old_role
5507

    
5508
    self.new_role = new_role
5509

    
5510
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5511
      # Trying to transition out of offline status
5512
      # TODO: Use standard RPC runner, but make sure it works when the node is
5513
      # still marked offline
5514
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5515
      if result.fail_msg:
5516
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5517
                                   " to report its version: %s" %
5518
                                   (node.name, result.fail_msg),
5519
                                   errors.ECODE_STATE)
5520
      else:
5521
        self.LogWarning("Transitioning node from offline to online state"
5522
                        " without using re-add. Please make sure the node"
5523
                        " is healthy!")
5524

    
5525
    if self.op.secondary_ip:
5526
      # Ok even without locking, because this can't be changed by any LU
5527
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5528
      master_singlehomed = master.secondary_ip == master.primary_ip
5529
      if master_singlehomed and self.op.secondary_ip:
5530
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5531
                                   " homed cluster", errors.ECODE_INVAL)
5532

    
5533
      assert not (frozenset(affected_instances) -
5534
                  self.owned_locks(locking.LEVEL_INSTANCE))
5535

    
5536
      if node.offline:
5537
        if affected_instances:
5538
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5539
                                     " offline node has instances (%s)"
5540
                                     " configured to use it" %
5541
                                     utils.CommaJoin(affected_instances.keys()))
5542
      else:
5543
        # On online nodes, check that no instances are running, and that
5544
        # the node has the new ip and we can reach it.
5545
        for instance in affected_instances.values():
5546
          _CheckInstanceState(self, instance, INSTANCE_DOWN,
5547
                              msg="cannot change secondary ip")
5548

    
5549
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5550
        if master.name != node.name:
5551
          # check reachability from master secondary ip to new secondary ip
5552
          if not netutils.TcpPing(self.op.secondary_ip,
5553
                                  constants.DEFAULT_NODED_PORT,
5554
                                  source=master.secondary_ip):
5555
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5556
                                       " based ping to node daemon port",
5557
                                       errors.ECODE_ENVIRON)
5558

    
5559
    if self.op.ndparams:
5560
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5561
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5562
      self.new_ndparams = new_ndparams
5563

    
5564
  def Exec(self, feedback_fn):
5565
    """Modifies a node.
5566

5567
    """
5568
    node = self.node
5569
    old_role = self.old_role
5570
    new_role = self.new_role
5571

    
5572
    result = []
5573

    
5574
    if self.op.ndparams:
5575
      node.ndparams = self.new_ndparams
5576

    
5577
    if self.op.powered is not None:
5578
      node.powered = self.op.powered
5579

    
5580
    for attr in ["master_capable", "vm_capable"]:
5581
      val = getattr(self.op, attr)
5582
      if val is not None:
5583
        setattr(node, attr, val)
5584
        result.append((attr, str(val)))
5585

    
5586
    if new_role != old_role:
5587
      # Tell the node to demote itself, if no longer MC and not offline
5588
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5589
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5590
        if msg:
5591
          self.LogWarning("Node failed to demote itself: %s", msg)
5592

    
5593
      new_flags = self._R2F[new_role]
5594
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5595
        if of != nf:
5596
          result.append((desc, str(nf)))
5597
      (node.master_candidate, node.drained, node.offline) = new_flags
5598

    
5599
      # we locked all nodes, we adjust the CP before updating this node
5600
      if self.lock_all:
5601
        _AdjustCandidatePool(self, [node.name])
5602

    
5603
    if self.op.secondary_ip:
5604
      node.secondary_ip = self.op.secondary_ip
5605
      result.append(("secondary_ip", self.op.secondary_ip))
5606

    
5607
    # this will trigger configuration file update, if needed
5608
    self.cfg.Update(node, feedback_fn)
5609

    
5610
    # this will trigger job queue propagation or cleanup if the mc
5611
    # flag changed
5612
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5613
      self.context.ReaddNode(node)
5614

    
5615
    return result
5616

    
5617

    
5618
class LUNodePowercycle(NoHooksLU):
5619
  """Powercycles a node.
5620

5621
  """
5622
  REQ_BGL = False
5623

    
5624
  def CheckArguments(self):
5625
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5626
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5627
      raise errors.OpPrereqError("The node is the master and the force"
5628
                                 " parameter was not set",
5629
                                 errors.ECODE_INVAL)
5630

    
5631
  def ExpandNames(self):
5632
    """Locking for PowercycleNode.
5633

5634
    This is a last-resort option and shouldn't block on other
5635
    jobs. Therefore, we grab no locks.
5636

5637
    """
5638
    self.needed_locks = {}
5639

    
5640
  def Exec(self, feedback_fn):
5641
    """Reboots a node.
5642

5643
    """
5644
    result = self.rpc.call_node_powercycle(self.op.node_name,
5645
                                           self.cfg.GetHypervisorType())
5646
    result.Raise("Failed to schedule the reboot")
5647
    return result.payload
5648

    
5649

    
5650
class LUClusterQuery(NoHooksLU):
5651
  """Query cluster configuration.
5652

5653
  """
5654
  REQ_BGL = False
5655

    
5656
  def ExpandNames(self):
5657
    self.needed_locks = {}
5658

    
5659
  def Exec(self, feedback_fn):
5660
    """Return cluster config.
5661

5662
    """
5663
    cluster = self.cfg.GetClusterInfo()
5664
    os_hvp = {}
5665

    
5666
    # Filter just for enabled hypervisors
5667
    for os_name, hv_dict in cluster.os_hvp.items():
5668
      os_hvp[os_name] = {}
5669
      for hv_name, hv_params in hv_dict.items():
5670
        if hv_name in cluster.enabled_hypervisors:
5671
          os_hvp[os_name][hv_name] = hv_params
5672

    
5673
    # Convert ip_family to ip_version
5674
    primary_ip_version = constants.IP4_VERSION
5675
    if cluster.primary_ip_family == netutils.IP6Address.family:
5676
      primary_ip_version = constants.IP6_VERSION
5677

    
5678
    result = {
5679
      "software_version": constants.RELEASE_VERSION,
5680
      "protocol_version": constants.PROTOCOL_VERSION,
5681
      "config_version": constants.CONFIG_VERSION,
5682
      "os_api_version": max(constants.OS_API_VERSIONS),
5683
      "export_version": constants.EXPORT_VERSION,
5684
      "architecture": (platform.architecture()[0], platform.machine()),
5685
      "name": cluster.cluster_name,
5686
      "master": cluster.master_node,
5687
      "default_hypervisor": cluster.enabled_hypervisors[0],
5688
      "enabled_hypervisors": cluster.enabled_hypervisors,
5689
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5690
                        for hypervisor_name in cluster.enabled_hypervisors]),
5691
      "os_hvp": os_hvp,
5692
      "beparams": cluster.beparams,
5693
      "osparams": cluster.osparams,
5694
      "nicparams": cluster.nicparams,
5695
      "ndparams": cluster.ndparams,
5696
      "candidate_pool_size": cluster.candidate_pool_size,
5697
      "master_netdev": cluster.master_netdev,
5698
      "master_netmask": cluster.master_netmask,
5699
      "use_external_mip_script": cluster.use_external_mip_script,
5700
      "volume_group_name": cluster.volume_group_name,
5701
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5702
      "file_storage_dir": cluster.file_storage_dir,
5703
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5704
      "maintain_node_health": cluster.maintain_node_health,
5705
      "ctime": cluster.ctime,
5706
      "mtime": cluster.mtime,
5707
      "uuid": cluster.uuid,
5708
      "tags": list(cluster.GetTags()),
5709
      "uid_pool": cluster.uid_pool,
5710
      "default_iallocator": cluster.default_iallocator,
5711
      "reserved_lvs": cluster.reserved_lvs,
5712
      "primary_ip_version": primary_ip_version,
5713
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5714
      "hidden_os": cluster.hidden_os,
5715
      "blacklisted_os": cluster.blacklisted_os,
5716
      }
5717

    
5718
    return result
5719

    
5720

    
5721
class LUClusterConfigQuery(NoHooksLU):
5722
  """Return configuration values.
5723

5724
  """
5725
  REQ_BGL = False
5726
  _FIELDS_DYNAMIC = utils.FieldSet()
5727
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5728
                                  "watcher_pause", "volume_group_name")
5729

    
5730
  def CheckArguments(self):
5731
    _CheckOutputFields(static=self._FIELDS_STATIC,
5732
                       dynamic=self._FIELDS_DYNAMIC,
5733
                       selected=self.op.output_fields)
5734

    
5735
  def ExpandNames(self):
5736
    self.needed_locks = {}
5737

    
5738
  def Exec(self, feedback_fn):
5739
    """Dump a representation of the cluster config to the standard output.
5740

5741
    """
5742
    values = []
5743
    for field in self.op.output_fields:
5744
      if field == "cluster_name":
5745
        entry = self.cfg.GetClusterName()
5746
      elif field == "master_node":
5747
        entry = self.cfg.GetMasterNode()
5748
      elif field == "drain_flag":
5749
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5750
      elif field == "watcher_pause":
5751
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5752
      elif field == "volume_group_name":
5753
        entry = self.cfg.GetVGName()
5754
      else:
5755
        raise errors.ParameterError(field)
5756
      values.append(entry)
5757
    return values
5758

    
5759

    
5760
class LUInstanceActivateDisks(NoHooksLU):
5761
  """Bring up an instance's disks.
5762

5763
  """
5764
  REQ_BGL = False
5765

    
5766
  def ExpandNames(self):
5767
    self._ExpandAndLockInstance()
5768
    self.needed_locks[locking.LEVEL_NODE] = []
5769
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5770

    
5771
  def DeclareLocks(self, level):
5772
    if level == locking.LEVEL_NODE:
5773
      self._LockInstancesNodes()
5774

    
5775
  def CheckPrereq(self):
5776
    """Check prerequisites.
5777

5778
    This checks that the instance is in the cluster.
5779

5780
    """
5781
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5782
    assert self.instance is not None, \
5783
      "Cannot retrieve locked instance %s" % self.op.instance_name
5784
    _CheckNodeOnline(self, self.instance.primary_node)
5785

    
5786
  def Exec(self, feedback_fn):
5787
    """Activate the disks.
5788

5789
    """
5790
    disks_ok, disks_info = \
5791
              _AssembleInstanceDisks(self, self.instance,
5792
                                     ignore_size=self.op.ignore_size)
5793
    if not disks_ok:
5794
      raise errors.OpExecError("Cannot activate block devices")
5795

    
5796
    return disks_info
5797

    
5798

    
5799
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5800
                           ignore_size=False):
5801
  """Prepare the block devices for an instance.
5802

5803
  This sets up the block devices on all nodes.
5804

5805
  @type lu: L{LogicalUnit}
5806
  @param lu: the logical unit on whose behalf we execute
5807
  @type instance: L{objects.Instance}
5808
  @param instance: the instance for whose disks we assemble
5809
  @type disks: list of L{objects.Disk} or None
5810
  @param disks: which disks to assemble (or all, if None)
5811
  @type ignore_secondaries: boolean
5812
  @param ignore_secondaries: if true, errors on secondary nodes
5813
      won't result in an error return from the function
5814
  @type ignore_size: boolean
5815
  @param ignore_size: if true, the current known size of the disk
5816
      will not be used during the disk activation, useful for cases
5817
      when the size is wrong
5818
  @return: False if the operation failed, otherwise a list of
5819
      (host, instance_visible_name, node_visible_name)
5820
      with the mapping from node devices to instance devices
5821

5822
  """
5823
  device_info = []
5824
  disks_ok = True
5825
  iname = instance.name
5826
  disks = _ExpandCheckDisks(instance, disks)
5827

    
5828
  # With the two passes mechanism we try to reduce the window of
5829
  # opportunity for the race condition of switching DRBD to primary
5830
  # before handshaking occured, but we do not eliminate it
5831

    
5832
  # The proper fix would be to wait (with some limits) until the
5833
  # connection has been made and drbd transitions from WFConnection
5834
  # into any other network-connected state (Connected, SyncTarget,
5835
  # SyncSource, etc.)
5836

    
5837
  # 1st pass, assemble on all nodes in secondary mode
5838
  for idx, inst_disk in enumerate(disks):
5839
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5840
      if ignore_size:
5841
        node_disk = node_disk.Copy()
5842
        node_disk.UnsetSize()
5843
      lu.cfg.SetDiskID(node_disk, node)
5844
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5845
      msg = result.fail_msg
5846
      if msg:
5847
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5848
                           " (is_primary=False, pass=1): %s",
5849
                           inst_disk.iv_name, node, msg)
5850
        if not ignore_secondaries:
5851
          disks_ok = False
5852

    
5853
  # FIXME: race condition on drbd migration to primary
5854

    
5855
  # 2nd pass, do only the primary node
5856
  for idx, inst_disk in enumerate(disks):
5857
    dev_path = None
5858

    
5859
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5860
      if node != instance.primary_node:
5861
        continue
5862
      if ignore_size:
5863
        node_disk = node_disk.Copy()
5864
        node_disk.UnsetSize()
5865
      lu.cfg.SetDiskID(node_disk, node)
5866
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5867
      msg = result.fail_msg
5868
      if msg:
5869
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5870
                           " (is_primary=True, pass=2): %s",
5871
                           inst_disk.iv_name, node, msg)
5872
        disks_ok = False
5873
      else:
5874
        dev_path = result.payload
5875

    
5876
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5877

    
5878
  # leave the disks configured for the primary node
5879
  # this is a workaround that would be fixed better by
5880
  # improving the logical/physical id handling
5881
  for disk in disks:
5882
    lu.cfg.SetDiskID(disk, instance.primary_node)
5883

    
5884
  return disks_ok, device_info
5885

    
5886

    
5887
def _StartInstanceDisks(lu, instance, force):
5888
  """Start the disks of an instance.
5889

5890
  """
5891
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5892
                                           ignore_secondaries=force)
5893
  if not disks_ok:
5894
    _ShutdownInstanceDisks(lu, instance)
5895
    if force is not None and not force:
5896
      lu.proc.LogWarning("", hint="If the message above refers to a"
5897
                         " secondary node,"
5898
                         " you can retry the operation using '--force'.")
5899
    raise errors.OpExecError("Disk consistency error")
5900

    
5901

    
5902
class LUInstanceDeactivateDisks(NoHooksLU):
5903
  """Shutdown an instance's disks.
5904

5905
  """
5906
  REQ_BGL = False
5907

    
5908
  def ExpandNames(self):
5909
    self._ExpandAndLockInstance()
5910
    self.needed_locks[locking.LEVEL_NODE] = []
5911
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5912

    
5913
  def DeclareLocks(self, level):
5914
    if level == locking.LEVEL_NODE:
5915
      self._LockInstancesNodes()
5916

    
5917
  def CheckPrereq(self):
5918
    """Check prerequisites.
5919

5920
    This checks that the instance is in the cluster.
5921

5922
    """
5923
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5924
    assert self.instance is not None, \
5925
      "Cannot retrieve locked instance %s" % self.op.instance_name
5926

    
5927
  def Exec(self, feedback_fn):
5928
    """Deactivate the disks
5929

5930
    """
5931
    instance = self.instance
5932
    if self.op.force:
5933
      _ShutdownInstanceDisks(self, instance)
5934
    else:
5935
      _SafeShutdownInstanceDisks(self, instance)
5936

    
5937

    
5938
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5939
  """Shutdown block devices of an instance.
5940

5941
  This function checks if an instance is running, before calling
5942
  _ShutdownInstanceDisks.
5943

5944
  """
5945
  _CheckInstanceState(lu, instance, INSTANCE_DOWN, msg="cannot shutdown disks")
5946
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5947

    
5948

    
5949
def _ExpandCheckDisks(instance, disks):
5950
  """Return the instance disks selected by the disks list
5951

5952
  @type disks: list of L{objects.Disk} or None
5953
  @param disks: selected disks
5954
  @rtype: list of L{objects.Disk}
5955
  @return: selected instance disks to act on
5956

5957
  """
5958
  if disks is None:
5959
    return instance.disks
5960
  else:
5961
    if not set(disks).issubset(instance.disks):
5962
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5963
                                   " target instance")
5964
    return disks
5965

    
5966

    
5967
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5968
  """Shutdown block devices of an instance.
5969

5970
  This does the shutdown on all nodes of the instance.
5971

5972
  If the ignore_primary is false, errors on the primary node are
5973
  ignored.
5974

5975
  """
5976
  all_result = True
5977
  disks = _ExpandCheckDisks(instance, disks)
5978

    
5979
  for disk in disks:
5980
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5981
      lu.cfg.SetDiskID(top_disk, node)
5982
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5983
      msg = result.fail_msg
5984
      if msg:
5985
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5986
                      disk.iv_name, node, msg)
5987
        if ((node == instance.primary_node and not ignore_primary) or
5988
            (node != instance.primary_node and not result.offline)):
5989
          all_result = False
5990
  return all_result
5991

    
5992

    
5993
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5994
  """Checks if a node has enough free memory.
5995

5996
  This function check if a given node has the needed amount of free
5997
  memory. In case the node has less memory or we cannot get the
5998
  information from the node, this function raise an OpPrereqError
5999
  exception.
6000

6001
  @type lu: C{LogicalUnit}
6002
  @param lu: a logical unit from which we get configuration data
6003
  @type node: C{str}
6004
  @param node: the node to check
6005
  @type reason: C{str}
6006
  @param reason: string to use in the error message
6007
  @type requested: C{int}
6008
  @param requested: the amount of memory in MiB to check for
6009
  @type hypervisor_name: C{str}
6010
  @param hypervisor_name: the hypervisor to ask for memory stats
6011
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
6012
      we cannot check the node
6013

6014
  """
6015
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
6016
  nodeinfo[node].Raise("Can't get data from node %s" % node,
6017
                       prereq=True, ecode=errors.ECODE_ENVIRON)
6018
  free_mem = nodeinfo[node].payload.get("memory_free", None)
6019
  if not isinstance(free_mem, int):
6020
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
6021
                               " was '%s'" % (node, free_mem),
6022
                               errors.ECODE_ENVIRON)
6023
  if requested > free_mem:
6024
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
6025
                               " needed %s MiB, available %s MiB" %
6026
                               (node, reason, requested, free_mem),
6027
                               errors.ECODE_NORES)
6028

    
6029

    
6030
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
6031
  """Checks if nodes have enough free disk space in the all VGs.
6032

6033
  This function check if all given nodes have the needed amount of
6034
  free disk. In case any node has less disk or we cannot get the
6035
  information from the node, this function raise an OpPrereqError
6036
  exception.
6037

6038
  @type lu: C{LogicalUnit}
6039
  @param lu: a logical unit from which we get configuration data
6040
  @type nodenames: C{list}
6041
  @param nodenames: the list of node names to check
6042
  @type req_sizes: C{dict}
6043
  @param req_sizes: the hash of vg and corresponding amount of disk in
6044
      MiB to check for
6045
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6046
      or we cannot check the node
6047

6048
  """
6049
  for vg, req_size in req_sizes.items():
6050
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
6051

    
6052

    
6053
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
6054
  """Checks if nodes have enough free disk space in the specified VG.
6055

6056
  This function check if all given nodes have the needed amount of
6057
  free disk. In case any node has less disk or we cannot get the
6058
  information from the node, this function raise an OpPrereqError
6059
  exception.
6060

6061
  @type lu: C{LogicalUnit}
6062
  @param lu: a logical unit from which we get configuration data
6063
  @type nodenames: C{list}
6064
  @param nodenames: the list of node names to check
6065
  @type vg: C{str}
6066
  @param vg: the volume group to check
6067
  @type requested: C{int}
6068
  @param requested: the amount of disk in MiB to check for
6069
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6070
      or we cannot check the node
6071

6072
  """
6073
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6074
  for node in nodenames:
6075
    info = nodeinfo[node]
6076
    info.Raise("Cannot get current information from node %s" % node,
6077
               prereq=True, ecode=errors.ECODE_ENVIRON)
6078
    vg_free = info.payload.get("vg_free", None)
6079
    if not isinstance(vg_free, int):
6080
      raise errors.OpPrereqError("Can't compute free disk space on node"
6081
                                 " %s for vg %s, result was '%s'" %
6082
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6083
    if requested > vg_free:
6084
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6085
                                 " vg %s: required %d MiB, available %d MiB" %
6086
                                 (node, vg, requested, vg_free),
6087
                                 errors.ECODE_NORES)
6088

    
6089

    
6090
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6091
  """Checks if nodes have enough physical CPUs
6092

6093
  This function checks if all given nodes have the needed number of
6094
  physical CPUs. In case any node has less CPUs or we cannot get the
6095
  information from the node, this function raises an OpPrereqError
6096
  exception.
6097

6098
  @type lu: C{LogicalUnit}
6099
  @param lu: a logical unit from which we get configuration data
6100
  @type nodenames: C{list}
6101
  @param nodenames: the list of node names to check
6102
  @type requested: C{int}
6103
  @param requested: the minimum acceptable number of physical CPUs
6104
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6105
      or we cannot check the node
6106

6107
  """
6108
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6109
  for node in nodenames:
6110
    info = nodeinfo[node]
6111
    info.Raise("Cannot get current information from node %s" % node,
6112
               prereq=True, ecode=errors.ECODE_ENVIRON)
6113
    num_cpus = info.payload.get("cpu_total", None)
6114
    if not isinstance(num_cpus, int):
6115
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6116
                                 " on node %s, result was '%s'" %
6117
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6118
    if requested > num_cpus:
6119
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6120
                                 "required" % (node, num_cpus, requested),
6121
                                 errors.ECODE_NORES)
6122

    
6123

    
6124
class LUInstanceStartup(LogicalUnit):
6125
  """Starts an instance.
6126

6127
  """
6128
  HPATH = "instance-start"
6129
  HTYPE = constants.HTYPE_INSTANCE
6130
  REQ_BGL = False
6131

    
6132
  def CheckArguments(self):
6133
    # extra beparams
6134
    if self.op.beparams:
6135
      # fill the beparams dict
6136
      objects.UpgradeBeParams(self.op.beparams)
6137
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6138

    
6139
  def ExpandNames(self):
6140
    self._ExpandAndLockInstance()
6141

    
6142
  def BuildHooksEnv(self):
6143
    """Build hooks env.
6144

6145
    This runs on master, primary and secondary nodes of the instance.
6146

6147
    """
6148
    env = {
6149
      "FORCE": self.op.force,
6150
      }
6151

    
6152
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6153

    
6154
    return env
6155

    
6156
  def BuildHooksNodes(self):
6157
    """Build hooks nodes.
6158

6159
    """
6160
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6161
    return (nl, nl)
6162

    
6163
  def CheckPrereq(self):
6164
    """Check prerequisites.
6165

6166
    This checks that the instance is in the cluster.
6167

6168
    """
6169
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6170
    assert self.instance is not None, \
6171
      "Cannot retrieve locked instance %s" % self.op.instance_name
6172

    
6173
    # extra hvparams
6174
    if self.op.hvparams:
6175
      # check hypervisor parameter syntax (locally)
6176
      cluster = self.cfg.GetClusterInfo()
6177
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6178
      filled_hvp = cluster.FillHV(instance)
6179
      filled_hvp.update(self.op.hvparams)
6180
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6181
      hv_type.CheckParameterSyntax(filled_hvp)
6182
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6183

    
6184
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6185

    
6186
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6187

    
6188
    if self.primary_offline and self.op.ignore_offline_nodes:
6189
      self.proc.LogWarning("Ignoring offline primary node")
6190

    
6191
      if self.op.hvparams or self.op.beparams:
6192
        self.proc.LogWarning("Overridden parameters are ignored")
6193
    else:
6194
      _CheckNodeOnline(self, instance.primary_node)
6195

    
6196
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6197

    
6198
      # check bridges existence
6199
      _CheckInstanceBridgesExist(self, instance)
6200

    
6201
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6202
                                                instance.name,
6203
                                                instance.hypervisor)
6204
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6205
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6206
      if not remote_info.payload: # not running already
6207
        _CheckNodeFreeMemory(self, instance.primary_node,
6208
                             "starting instance %s" % instance.name,
6209
                             bep[constants.BE_MAXMEM], instance.hypervisor)
6210

    
6211
  def Exec(self, feedback_fn):
6212
    """Start the instance.
6213

6214
    """
6215
    instance = self.instance
6216
    force = self.op.force
6217

    
6218
    if not self.op.no_remember:
6219
      self.cfg.MarkInstanceUp(instance.name)
6220

    
6221
    if self.primary_offline:
6222
      assert self.op.ignore_offline_nodes
6223
      self.proc.LogInfo("Primary node offline, marked instance as started")
6224
    else:
6225
      node_current = instance.primary_node
6226

    
6227
      _StartInstanceDisks(self, instance, force)
6228

    
6229
      result = \
6230
        self.rpc.call_instance_start(node_current,
6231
                                     (instance, self.op.hvparams,
6232
                                      self.op.beparams),
6233
                                     self.op.startup_paused)
6234
      msg = result.fail_msg
6235
      if msg:
6236
        _ShutdownInstanceDisks(self, instance)
6237
        raise errors.OpExecError("Could not start instance: %s" % msg)
6238

    
6239

    
6240
class LUInstanceReboot(LogicalUnit):
6241
  """Reboot an instance.
6242

6243
  """
6244
  HPATH = "instance-reboot"
6245
  HTYPE = constants.HTYPE_INSTANCE
6246
  REQ_BGL = False
6247

    
6248
  def ExpandNames(self):
6249
    self._ExpandAndLockInstance()
6250

    
6251
  def BuildHooksEnv(self):
6252
    """Build hooks env.
6253

6254
    This runs on master, primary and secondary nodes of the instance.
6255

6256
    """
6257
    env = {
6258
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6259
      "REBOOT_TYPE": self.op.reboot_type,
6260
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6261
      }
6262

    
6263
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6264

    
6265
    return env
6266

    
6267
  def BuildHooksNodes(self):
6268
    """Build hooks nodes.
6269

6270
    """
6271
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6272
    return (nl, nl)
6273

    
6274
  def CheckPrereq(self):
6275
    """Check prerequisites.
6276

6277
    This checks that the instance is in the cluster.
6278

6279
    """
6280
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6281
    assert self.instance is not None, \
6282
      "Cannot retrieve locked instance %s" % self.op.instance_name
6283
    _CheckInstanceState(self, instance, INSTANCE_ONLINE)
6284
    _CheckNodeOnline(self, instance.primary_node)
6285

    
6286
    # check bridges existence
6287
    _CheckInstanceBridgesExist(self, instance)
6288

    
6289
  def Exec(self, feedback_fn):
6290
    """Reboot the instance.
6291

6292
    """
6293
    instance = self.instance
6294
    ignore_secondaries = self.op.ignore_secondaries
6295
    reboot_type = self.op.reboot_type
6296

    
6297
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6298
                                              instance.name,
6299
                                              instance.hypervisor)
6300
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6301
    instance_running = bool(remote_info.payload)
6302

    
6303
    node_current = instance.primary_node
6304

    
6305
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6306
                                            constants.INSTANCE_REBOOT_HARD]:
6307
      for disk in instance.disks:
6308
        self.cfg.SetDiskID(disk, node_current)
6309
      result = self.rpc.call_instance_reboot(node_current, instance,
6310
                                             reboot_type,
6311
                                             self.op.shutdown_timeout)
6312
      result.Raise("Could not reboot instance")
6313
    else:
6314
      if instance_running:
6315
        result = self.rpc.call_instance_shutdown(node_current, instance,
6316
                                                 self.op.shutdown_timeout)
6317
        result.Raise("Could not shutdown instance for full reboot")
6318
        _ShutdownInstanceDisks(self, instance)
6319
      else:
6320
        self.LogInfo("Instance %s was already stopped, starting now",
6321
                     instance.name)
6322
      _StartInstanceDisks(self, instance, ignore_secondaries)
6323
      result = self.rpc.call_instance_start(node_current,
6324
                                            (instance, None, None), False)
6325
      msg = result.fail_msg
6326
      if msg:
6327
        _ShutdownInstanceDisks(self, instance)
6328
        raise errors.OpExecError("Could not start instance for"
6329
                                 " full reboot: %s" % msg)
6330

    
6331
    self.cfg.MarkInstanceUp(instance.name)
6332

    
6333

    
6334
class LUInstanceShutdown(LogicalUnit):
6335
  """Shutdown an instance.
6336

6337
  """
6338
  HPATH = "instance-stop"
6339
  HTYPE = constants.HTYPE_INSTANCE
6340
  REQ_BGL = False
6341

    
6342
  def ExpandNames(self):
6343
    self._ExpandAndLockInstance()
6344

    
6345
  def BuildHooksEnv(self):
6346
    """Build hooks env.
6347

6348
    This runs on master, primary and secondary nodes of the instance.
6349

6350
    """
6351
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6352
    env["TIMEOUT"] = self.op.timeout
6353
    return env
6354

    
6355
  def BuildHooksNodes(self):
6356
    """Build hooks nodes.
6357

6358
    """
6359
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6360
    return (nl, nl)
6361

    
6362
  def CheckPrereq(self):
6363
    """Check prerequisites.
6364

6365
    This checks that the instance is in the cluster.
6366

6367
    """
6368
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6369
    assert self.instance is not None, \
6370
      "Cannot retrieve locked instance %s" % self.op.instance_name
6371

    
6372
    _CheckInstanceState(self, self.instance, INSTANCE_ONLINE)
6373

    
6374
    self.primary_offline = \
6375
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6376

    
6377
    if self.primary_offline and self.op.ignore_offline_nodes:
6378
      self.proc.LogWarning("Ignoring offline primary node")
6379
    else:
6380
      _CheckNodeOnline(self, self.instance.primary_node)
6381

    
6382
  def Exec(self, feedback_fn):
6383
    """Shutdown the instance.
6384

6385
    """
6386
    instance = self.instance
6387
    node_current = instance.primary_node
6388
    timeout = self.op.timeout
6389

    
6390
    if not self.op.no_remember:
6391
      self.cfg.MarkInstanceDown(instance.name)
6392

    
6393
    if self.primary_offline:
6394
      assert self.op.ignore_offline_nodes
6395
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6396
    else:
6397
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6398
      msg = result.fail_msg
6399
      if msg:
6400
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6401

    
6402
      _ShutdownInstanceDisks(self, instance)
6403

    
6404

    
6405
class LUInstanceReinstall(LogicalUnit):
6406
  """Reinstall an instance.
6407

6408
  """
6409
  HPATH = "instance-reinstall"
6410
  HTYPE = constants.HTYPE_INSTANCE
6411
  REQ_BGL = False
6412

    
6413
  def ExpandNames(self):
6414
    self._ExpandAndLockInstance()
6415

    
6416
  def BuildHooksEnv(self):
6417
    """Build hooks env.
6418

6419
    This runs on master, primary and secondary nodes of the instance.
6420

6421
    """
6422
    return _BuildInstanceHookEnvByObject(self, self.instance)
6423

    
6424
  def BuildHooksNodes(self):
6425
    """Build hooks nodes.
6426

6427
    """
6428
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6429
    return (nl, nl)
6430

    
6431
  def CheckPrereq(self):
6432
    """Check prerequisites.
6433

6434
    This checks that the instance is in the cluster and is not running.
6435

6436
    """
6437
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6438
    assert instance is not None, \
6439
      "Cannot retrieve locked instance %s" % self.op.instance_name
6440
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6441
                     " offline, cannot reinstall")
6442
    for node in instance.secondary_nodes:
6443
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6444
                       " cannot reinstall")
6445

    
6446
    if instance.disk_template == constants.DT_DISKLESS:
6447
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6448
                                 self.op.instance_name,
6449
                                 errors.ECODE_INVAL)
6450
    _CheckInstanceState(self, instance, INSTANCE_DOWN, msg="cannot reinstall")
6451

    
6452
    if self.op.os_type is not None:
6453
      # OS verification
6454
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6455
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6456
      instance_os = self.op.os_type
6457
    else:
6458
      instance_os = instance.os
6459

    
6460
    nodelist = list(instance.all_nodes)
6461

    
6462
    if self.op.osparams:
6463
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6464
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6465
      self.os_inst = i_osdict # the new dict (without defaults)
6466
    else:
6467
      self.os_inst = None
6468

    
6469
    self.instance = instance
6470

    
6471
  def Exec(self, feedback_fn):
6472
    """Reinstall the instance.
6473

6474
    """
6475
    inst = self.instance
6476

    
6477
    if self.op.os_type is not None:
6478
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6479
      inst.os = self.op.os_type
6480
      # Write to configuration
6481
      self.cfg.Update(inst, feedback_fn)
6482

    
6483
    _StartInstanceDisks(self, inst, None)
6484
    try:
6485
      feedback_fn("Running the instance OS create scripts...")
6486
      # FIXME: pass debug option from opcode to backend
6487
      result = self.rpc.call_instance_os_add(inst.primary_node,
6488
                                             (inst, self.os_inst), True,
6489
                                             self.op.debug_level)
6490
      result.Raise("Could not install OS for instance %s on node %s" %
6491
                   (inst.name, inst.primary_node))
6492
    finally:
6493
      _ShutdownInstanceDisks(self, inst)
6494

    
6495

    
6496
class LUInstanceRecreateDisks(LogicalUnit):
6497
  """Recreate an instance's missing disks.
6498

6499
  """
6500
  HPATH = "instance-recreate-disks"
6501
  HTYPE = constants.HTYPE_INSTANCE
6502
  REQ_BGL = False
6503

    
6504
  def CheckArguments(self):
6505
    # normalise the disk list
6506
    self.op.disks = sorted(frozenset(self.op.disks))
6507

    
6508
  def ExpandNames(self):
6509
    self._ExpandAndLockInstance()
6510
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6511
    if self.op.nodes:
6512
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6513
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6514
    else:
6515
      self.needed_locks[locking.LEVEL_NODE] = []
6516

    
6517
  def DeclareLocks(self, level):
6518
    if level == locking.LEVEL_NODE:
6519
      # if we replace the nodes, we only need to lock the old primary,
6520
      # otherwise we need to lock all nodes for disk re-creation
6521
      primary_only = bool(self.op.nodes)
6522
      self._LockInstancesNodes(primary_only=primary_only)
6523
    elif level == locking.LEVEL_NODE_RES:
6524
      # Copy node locks
6525
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6526
        self.needed_locks[locking.LEVEL_NODE][:]
6527

    
6528
  def BuildHooksEnv(self):
6529
    """Build hooks env.
6530

6531
    This runs on master, primary and secondary nodes of the instance.
6532

6533
    """
6534
    return _BuildInstanceHookEnvByObject(self, self.instance)
6535

    
6536
  def BuildHooksNodes(self):
6537
    """Build hooks nodes.
6538

6539
    """
6540
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6541
    return (nl, nl)
6542

    
6543
  def CheckPrereq(self):
6544
    """Check prerequisites.
6545

6546
    This checks that the instance is in the cluster and is not running.
6547

6548
    """
6549
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6550
    assert instance is not None, \
6551
      "Cannot retrieve locked instance %s" % self.op.instance_name
6552
    if self.op.nodes:
6553
      if len(self.op.nodes) != len(instance.all_nodes):
6554
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6555
                                   " %d replacement nodes were specified" %
6556
                                   (instance.name, len(instance.all_nodes),
6557
                                    len(self.op.nodes)),
6558
                                   errors.ECODE_INVAL)
6559
      assert instance.disk_template != constants.DT_DRBD8 or \
6560
          len(self.op.nodes) == 2
6561
      assert instance.disk_template != constants.DT_PLAIN or \
6562
          len(self.op.nodes) == 1
6563
      primary_node = self.op.nodes[0]
6564
    else:
6565
      primary_node = instance.primary_node
6566
    _CheckNodeOnline(self, primary_node)
6567

    
6568
    if instance.disk_template == constants.DT_DISKLESS:
6569
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6570
                                 self.op.instance_name, errors.ECODE_INVAL)
6571
    # if we replace nodes *and* the old primary is offline, we don't
6572
    # check
6573
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6574
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6575
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6576
    if not (self.op.nodes and old_pnode.offline):
6577
      _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6578
                          msg="cannot recreate disks")
6579

    
6580
    if not self.op.disks:
6581
      self.op.disks = range(len(instance.disks))
6582
    else:
6583
      for idx in self.op.disks:
6584
        if idx >= len(instance.disks):
6585
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6586
                                     errors.ECODE_INVAL)
6587
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6588
      raise errors.OpPrereqError("Can't recreate disks partially and"
6589
                                 " change the nodes at the same time",
6590
                                 errors.ECODE_INVAL)
6591
    self.instance = instance
6592

    
6593
  def Exec(self, feedback_fn):
6594
    """Recreate the disks.
6595

6596
    """
6597
    instance = self.instance
6598

    
6599
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6600
            self.owned_locks(locking.LEVEL_NODE_RES))
6601

    
6602
    to_skip = []
6603
    mods = [] # keeps track of needed logical_id changes
6604

    
6605
    for idx, disk in enumerate(instance.disks):
6606
      if idx not in self.op.disks: # disk idx has not been passed in
6607
        to_skip.append(idx)
6608
        continue
6609
      # update secondaries for disks, if needed
6610
      if self.op.nodes:
6611
        if disk.dev_type == constants.LD_DRBD8:
6612
          # need to update the nodes and minors
6613
          assert len(self.op.nodes) == 2
6614
          assert len(disk.logical_id) == 6 # otherwise disk internals
6615
                                           # have changed
6616
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6617
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6618
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6619
                    new_minors[0], new_minors[1], old_secret)
6620
          assert len(disk.logical_id) == len(new_id)
6621
          mods.append((idx, new_id))
6622

    
6623
    # now that we have passed all asserts above, we can apply the mods
6624
    # in a single run (to avoid partial changes)
6625
    for idx, new_id in mods:
6626
      instance.disks[idx].logical_id = new_id
6627

    
6628
    # change primary node, if needed
6629
    if self.op.nodes:
6630
      instance.primary_node = self.op.nodes[0]
6631
      self.LogWarning("Changing the instance's nodes, you will have to"
6632
                      " remove any disks left on the older nodes manually")
6633

    
6634
    if self.op.nodes:
6635
      self.cfg.Update(instance, feedback_fn)
6636

    
6637
    _CreateDisks(self, instance, to_skip=to_skip)
6638

    
6639

    
6640
class LUInstanceRename(LogicalUnit):
6641
  """Rename an instance.
6642

6643
  """
6644
  HPATH = "instance-rename"
6645
  HTYPE = constants.HTYPE_INSTANCE
6646

    
6647
  def CheckArguments(self):
6648
    """Check arguments.
6649

6650
    """
6651
    if self.op.ip_check and not self.op.name_check:
6652
      # TODO: make the ip check more flexible and not depend on the name check
6653
      raise errors.OpPrereqError("IP address check requires a name check",
6654
                                 errors.ECODE_INVAL)
6655

    
6656
  def BuildHooksEnv(self):
6657
    """Build hooks env.
6658

6659
    This runs on master, primary and secondary nodes of the instance.
6660

6661
    """
6662
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6663
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6664
    return env
6665

    
6666
  def BuildHooksNodes(self):
6667
    """Build hooks nodes.
6668

6669
    """
6670
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6671
    return (nl, nl)
6672

    
6673
  def CheckPrereq(self):
6674
    """Check prerequisites.
6675

6676
    This checks that the instance is in the cluster and is not running.
6677

6678
    """
6679
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6680
                                                self.op.instance_name)
6681
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6682
    assert instance is not None
6683
    _CheckNodeOnline(self, instance.primary_node)
6684
    _CheckInstanceState(self, instance, INSTANCE_NOT_RUNNING,
6685
                        msg="cannot rename")
6686
    self.instance = instance
6687

    
6688
    new_name = self.op.new_name
6689
    if self.op.name_check:
6690
      hostname = netutils.GetHostname(name=new_name)
6691
      if hostname.name != new_name:
6692
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6693
                     hostname.name)
6694
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6695
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6696
                                    " same as given hostname '%s'") %
6697
                                    (hostname.name, self.op.new_name),
6698
                                    errors.ECODE_INVAL)
6699
      new_name = self.op.new_name = hostname.name
6700
      if (self.op.ip_check and
6701
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6702
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6703
                                   (hostname.ip, new_name),
6704
                                   errors.ECODE_NOTUNIQUE)
6705

    
6706
    instance_list = self.cfg.GetInstanceList()
6707
    if new_name in instance_list and new_name != instance.name:
6708
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6709
                                 new_name, errors.ECODE_EXISTS)
6710

    
6711
  def Exec(self, feedback_fn):
6712
    """Rename the instance.
6713

6714
    """
6715
    inst = self.instance
6716
    old_name = inst.name
6717

    
6718
    rename_file_storage = False
6719
    if (inst.disk_template in constants.DTS_FILEBASED and
6720
        self.op.new_name != inst.name):
6721
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6722
      rename_file_storage = True
6723

    
6724
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6725
    # Change the instance lock. This is definitely safe while we hold the BGL.
6726
    # Otherwise the new lock would have to be added in acquired mode.
6727
    assert self.REQ_BGL
6728
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6729
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6730

    
6731
    # re-read the instance from the configuration after rename
6732
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6733

    
6734
    if rename_file_storage:
6735
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6736
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6737
                                                     old_file_storage_dir,
6738
                                                     new_file_storage_dir)
6739
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6740
                   " (but the instance has been renamed in Ganeti)" %
6741
                   (inst.primary_node, old_file_storage_dir,
6742
                    new_file_storage_dir))
6743

    
6744
    _StartInstanceDisks(self, inst, None)
6745
    try:
6746
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6747
                                                 old_name, self.op.debug_level)
6748
      msg = result.fail_msg
6749
      if msg:
6750
        msg = ("Could not run OS rename script for instance %s on node %s"
6751
               " (but the instance has been renamed in Ganeti): %s" %
6752
               (inst.name, inst.primary_node, msg))
6753
        self.proc.LogWarning(msg)
6754
    finally:
6755
      _ShutdownInstanceDisks(self, inst)
6756

    
6757
    return inst.name
6758

    
6759

    
6760
class LUInstanceRemove(LogicalUnit):
6761
  """Remove an instance.
6762

6763
  """
6764
  HPATH = "instance-remove"
6765
  HTYPE = constants.HTYPE_INSTANCE
6766
  REQ_BGL = False
6767

    
6768
  def ExpandNames(self):
6769
    self._ExpandAndLockInstance()
6770
    self.needed_locks[locking.LEVEL_NODE] = []
6771
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6772
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6773

    
6774
  def DeclareLocks(self, level):
6775
    if level == locking.LEVEL_NODE:
6776
      self._LockInstancesNodes()
6777
    elif level == locking.LEVEL_NODE_RES:
6778
      # Copy node locks
6779
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6780
        self.needed_locks[locking.LEVEL_NODE][:]
6781

    
6782
  def BuildHooksEnv(self):
6783
    """Build hooks env.
6784

6785
    This runs on master, primary and secondary nodes of the instance.
6786

6787
    """
6788
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6789
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6790
    return env
6791

    
6792
  def BuildHooksNodes(self):
6793
    """Build hooks nodes.
6794

6795
    """
6796
    nl = [self.cfg.GetMasterNode()]
6797
    nl_post = list(self.instance.all_nodes) + nl
6798
    return (nl, nl_post)
6799

    
6800
  def CheckPrereq(self):
6801
    """Check prerequisites.
6802

6803
    This checks that the instance is in the cluster.
6804

6805
    """
6806
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6807
    assert self.instance is not None, \
6808
      "Cannot retrieve locked instance %s" % self.op.instance_name
6809

    
6810
  def Exec(self, feedback_fn):
6811
    """Remove the instance.
6812

6813
    """
6814
    instance = self.instance
6815
    logging.info("Shutting down instance %s on node %s",
6816
                 instance.name, instance.primary_node)
6817

    
6818
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6819
                                             self.op.shutdown_timeout)
6820
    msg = result.fail_msg
6821
    if msg:
6822
      if self.op.ignore_failures:
6823
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6824
      else:
6825
        raise errors.OpExecError("Could not shutdown instance %s on"
6826
                                 " node %s: %s" %
6827
                                 (instance.name, instance.primary_node, msg))
6828

    
6829
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6830
            self.owned_locks(locking.LEVEL_NODE_RES))
6831
    assert not (set(instance.all_nodes) -
6832
                self.owned_locks(locking.LEVEL_NODE)), \
6833
      "Not owning correct locks"
6834

    
6835
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6836

    
6837

    
6838
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6839
  """Utility function to remove an instance.
6840

6841
  """
6842
  logging.info("Removing block devices for instance %s", instance.name)
6843

    
6844
  if not _RemoveDisks(lu, instance):
6845
    if not ignore_failures:
6846
      raise errors.OpExecError("Can't remove instance's disks")
6847
    feedback_fn("Warning: can't remove instance's disks")
6848

    
6849
  logging.info("Removing instance %s out of cluster config", instance.name)
6850

    
6851
  lu.cfg.RemoveInstance(instance.name)
6852

    
6853
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6854
    "Instance lock removal conflict"
6855

    
6856
  # Remove lock for the instance
6857
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6858

    
6859

    
6860
class LUInstanceQuery(NoHooksLU):
6861
  """Logical unit for querying instances.
6862

6863
  """
6864
  # pylint: disable=W0142
6865
  REQ_BGL = False
6866

    
6867
  def CheckArguments(self):
6868
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6869
                             self.op.output_fields, self.op.use_locking)
6870

    
6871
  def ExpandNames(self):
6872
    self.iq.ExpandNames(self)
6873

    
6874
  def DeclareLocks(self, level):
6875
    self.iq.DeclareLocks(self, level)
6876

    
6877
  def Exec(self, feedback_fn):
6878
    return self.iq.OldStyleQuery(self)
6879

    
6880

    
6881
class LUInstanceFailover(LogicalUnit):
6882
  """Failover an instance.
6883

6884
  """
6885
  HPATH = "instance-failover"
6886
  HTYPE = constants.HTYPE_INSTANCE
6887
  REQ_BGL = False
6888

    
6889
  def CheckArguments(self):
6890
    """Check the arguments.
6891

6892
    """
6893
    self.iallocator = getattr(self.op, "iallocator", None)
6894
    self.target_node = getattr(self.op, "target_node", None)
6895

    
6896
  def ExpandNames(self):
6897
    self._ExpandAndLockInstance()
6898

    
6899
    if self.op.target_node is not None:
6900
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6901

    
6902
    self.needed_locks[locking.LEVEL_NODE] = []
6903
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6904

    
6905
    ignore_consistency = self.op.ignore_consistency
6906
    shutdown_timeout = self.op.shutdown_timeout
6907
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6908
                                       cleanup=False,
6909
                                       failover=True,
6910
                                       ignore_consistency=ignore_consistency,
6911
                                       shutdown_timeout=shutdown_timeout)
6912
    self.tasklets = [self._migrater]
6913

    
6914
  def DeclareLocks(self, level):
6915
    if level == locking.LEVEL_NODE:
6916
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6917
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6918
        if self.op.target_node is None:
6919
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6920
        else:
6921
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6922
                                                   self.op.target_node]
6923
        del self.recalculate_locks[locking.LEVEL_NODE]
6924
      else:
6925
        self._LockInstancesNodes()
6926

    
6927
  def BuildHooksEnv(self):
6928
    """Build hooks env.
6929

6930
    This runs on master, primary and secondary nodes of the instance.
6931

6932
    """
6933
    instance = self._migrater.instance
6934
    source_node = instance.primary_node
6935
    target_node = self.op.target_node
6936
    env = {
6937
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6938
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6939
      "OLD_PRIMARY": source_node,
6940
      "NEW_PRIMARY": target_node,
6941
      }
6942

    
6943
    if instance.disk_template in constants.DTS_INT_MIRROR:
6944
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6945
      env["NEW_SECONDARY"] = source_node
6946
    else:
6947
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6948

    
6949
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6950

    
6951
    return env
6952

    
6953
  def BuildHooksNodes(self):
6954
    """Build hooks nodes.
6955

6956
    """
6957
    instance = self._migrater.instance
6958
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6959
    return (nl, nl + [instance.primary_node])
6960

    
6961

    
6962
class LUInstanceMigrate(LogicalUnit):
6963
  """Migrate an instance.
6964

6965
  This is migration without shutting down, compared to the failover,
6966
  which is done with shutdown.
6967

6968
  """
6969
  HPATH = "instance-migrate"
6970
  HTYPE = constants.HTYPE_INSTANCE
6971
  REQ_BGL = False
6972

    
6973
  def ExpandNames(self):
6974
    self._ExpandAndLockInstance()
6975

    
6976
    if self.op.target_node is not None:
6977
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6978

    
6979
    self.needed_locks[locking.LEVEL_NODE] = []
6980
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6981

    
6982
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6983
                                       cleanup=self.op.cleanup,
6984
                                       failover=False,
6985
                                       fallback=self.op.allow_failover)
6986
    self.tasklets = [self._migrater]
6987

    
6988
  def DeclareLocks(self, level):
6989
    if level == locking.LEVEL_NODE:
6990
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6991
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6992
        if self.op.target_node is None:
6993
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6994
        else:
6995
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6996
                                                   self.op.target_node]
6997
        del self.recalculate_locks[locking.LEVEL_NODE]
6998
      else:
6999
        self._LockInstancesNodes()
7000

    
7001
  def BuildHooksEnv(self):
7002
    """Build hooks env.
7003

7004
    This runs on master, primary and secondary nodes of the instance.
7005

7006
    """
7007
    instance = self._migrater.instance
7008
    source_node = instance.primary_node
7009
    target_node = self.op.target_node
7010
    env = _BuildInstanceHookEnvByObject(self, instance)
7011
    env.update({
7012
      "MIGRATE_LIVE": self._migrater.live,
7013
      "MIGRATE_CLEANUP": self.op.cleanup,
7014
      "OLD_PRIMARY": source_node,
7015
      "NEW_PRIMARY": target_node,
7016
      })
7017

    
7018
    if instance.disk_template in constants.DTS_INT_MIRROR:
7019
      env["OLD_SECONDARY"] = target_node
7020
      env["NEW_SECONDARY"] = source_node
7021
    else:
7022
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
7023

    
7024
    return env
7025

    
7026
  def BuildHooksNodes(self):
7027
    """Build hooks nodes.
7028

7029
    """
7030
    instance = self._migrater.instance
7031
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
7032
    return (nl, nl + [instance.primary_node])
7033

    
7034

    
7035
class LUInstanceMove(LogicalUnit):
7036
  """Move an instance by data-copying.
7037

7038
  """
7039
  HPATH = "instance-move"
7040
  HTYPE = constants.HTYPE_INSTANCE
7041
  REQ_BGL = False
7042

    
7043
  def ExpandNames(self):
7044
    self._ExpandAndLockInstance()
7045
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
7046
    self.op.target_node = target_node
7047
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
7048
    self.needed_locks[locking.LEVEL_NODE_RES] = []
7049
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7050

    
7051
  def DeclareLocks(self, level):
7052
    if level == locking.LEVEL_NODE:
7053
      self._LockInstancesNodes(primary_only=True)
7054
    elif level == locking.LEVEL_NODE_RES:
7055
      # Copy node locks
7056
      self.needed_locks[locking.LEVEL_NODE_RES] = \
7057
        self.needed_locks[locking.LEVEL_NODE][:]
7058

    
7059
  def BuildHooksEnv(self):
7060
    """Build hooks env.
7061

7062
    This runs on master, primary and secondary nodes of the instance.
7063

7064
    """
7065
    env = {
7066
      "TARGET_NODE": self.op.target_node,
7067
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7068
      }
7069
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7070
    return env
7071

    
7072
  def BuildHooksNodes(self):
7073
    """Build hooks nodes.
7074

7075
    """
7076
    nl = [
7077
      self.cfg.GetMasterNode(),
7078
      self.instance.primary_node,
7079
      self.op.target_node,
7080
      ]
7081
    return (nl, nl)
7082

    
7083
  def CheckPrereq(self):
7084
    """Check prerequisites.
7085

7086
    This checks that the instance is in the cluster.
7087

7088
    """
7089
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7090
    assert self.instance is not None, \
7091
      "Cannot retrieve locked instance %s" % self.op.instance_name
7092

    
7093
    node = self.cfg.GetNodeInfo(self.op.target_node)
7094
    assert node is not None, \
7095
      "Cannot retrieve locked node %s" % self.op.target_node
7096

    
7097
    self.target_node = target_node = node.name
7098

    
7099
    if target_node == instance.primary_node:
7100
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7101
                                 (instance.name, target_node),
7102
                                 errors.ECODE_STATE)
7103

    
7104
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7105

    
7106
    for idx, dsk in enumerate(instance.disks):
7107
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7108
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7109
                                   " cannot copy" % idx, errors.ECODE_STATE)
7110

    
7111
    _CheckNodeOnline(self, target_node)
7112
    _CheckNodeNotDrained(self, target_node)
7113
    _CheckNodeVmCapable(self, target_node)
7114

    
7115
    if instance.admin_state == constants.ADMINST_UP:
7116
      # check memory requirements on the secondary node
7117
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7118
                           instance.name, bep[constants.BE_MAXMEM],
7119
                           instance.hypervisor)
7120
    else:
7121
      self.LogInfo("Not checking memory on the secondary node as"
7122
                   " instance will not be started")
7123

    
7124
    # check bridge existance
7125
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7126

    
7127
  def Exec(self, feedback_fn):
7128
    """Move an instance.
7129

7130
    The move is done by shutting it down on its present node, copying
7131
    the data over (slow) and starting it on the new node.
7132

7133
    """
7134
    instance = self.instance
7135

    
7136
    source_node = instance.primary_node
7137
    target_node = self.target_node
7138

    
7139
    self.LogInfo("Shutting down instance %s on source node %s",
7140
                 instance.name, source_node)
7141

    
7142
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7143
            self.owned_locks(locking.LEVEL_NODE_RES))
7144

    
7145
    result = self.rpc.call_instance_shutdown(source_node, instance,
7146
                                             self.op.shutdown_timeout)
7147
    msg = result.fail_msg
7148
    if msg:
7149
      if self.op.ignore_consistency:
7150
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7151
                             " Proceeding anyway. Please make sure node"
7152
                             " %s is down. Error details: %s",
7153
                             instance.name, source_node, source_node, msg)
7154
      else:
7155
        raise errors.OpExecError("Could not shutdown instance %s on"
7156
                                 " node %s: %s" %
7157
                                 (instance.name, source_node, msg))
7158

    
7159
    # create the target disks
7160
    try:
7161
      _CreateDisks(self, instance, target_node=target_node)
7162
    except errors.OpExecError:
7163
      self.LogWarning("Device creation failed, reverting...")
7164
      try:
7165
        _RemoveDisks(self, instance, target_node=target_node)
7166
      finally:
7167
        self.cfg.ReleaseDRBDMinors(instance.name)
7168
        raise
7169

    
7170
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7171

    
7172
    errs = []
7173
    # activate, get path, copy the data over
7174
    for idx, disk in enumerate(instance.disks):
7175
      self.LogInfo("Copying data for disk %d", idx)
7176
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7177
                                               instance.name, True, idx)
7178
      if result.fail_msg:
7179
        self.LogWarning("Can't assemble newly created disk %d: %s",
7180
                        idx, result.fail_msg)
7181
        errs.append(result.fail_msg)
7182
        break
7183
      dev_path = result.payload
7184
      result = self.rpc.call_blockdev_export(source_node, disk,
7185
                                             target_node, dev_path,
7186
                                             cluster_name)
7187
      if result.fail_msg:
7188
        self.LogWarning("Can't copy data over for disk %d: %s",
7189
                        idx, result.fail_msg)
7190
        errs.append(result.fail_msg)
7191
        break
7192

    
7193
    if errs:
7194
      self.LogWarning("Some disks failed to copy, aborting")
7195
      try:
7196
        _RemoveDisks(self, instance, target_node=target_node)
7197
      finally:
7198
        self.cfg.ReleaseDRBDMinors(instance.name)
7199
        raise errors.OpExecError("Errors during disk copy: %s" %
7200
                                 (",".join(errs),))
7201

    
7202
    instance.primary_node = target_node
7203
    self.cfg.Update(instance, feedback_fn)
7204

    
7205
    self.LogInfo("Removing the disks on the original node")
7206
    _RemoveDisks(self, instance, target_node=source_node)
7207

    
7208
    # Only start the instance if it's marked as up
7209
    if instance.admin_state == constants.ADMINST_UP:
7210
      self.LogInfo("Starting instance %s on node %s",
7211
                   instance.name, target_node)
7212

    
7213
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7214
                                           ignore_secondaries=True)
7215
      if not disks_ok:
7216
        _ShutdownInstanceDisks(self, instance)
7217
        raise errors.OpExecError("Can't activate the instance's disks")
7218

    
7219
      result = self.rpc.call_instance_start(target_node,
7220
                                            (instance, None, None), False)
7221
      msg = result.fail_msg
7222
      if msg:
7223
        _ShutdownInstanceDisks(self, instance)
7224
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7225
                                 (instance.name, target_node, msg))
7226

    
7227

    
7228
class LUNodeMigrate(LogicalUnit):
7229
  """Migrate all instances from a node.
7230

7231
  """
7232
  HPATH = "node-migrate"
7233
  HTYPE = constants.HTYPE_NODE
7234
  REQ_BGL = False
7235

    
7236
  def CheckArguments(self):
7237
    pass
7238

    
7239
  def ExpandNames(self):
7240
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7241

    
7242
    self.share_locks = _ShareAll()
7243
    self.needed_locks = {
7244
      locking.LEVEL_NODE: [self.op.node_name],
7245
      }
7246

    
7247
  def BuildHooksEnv(self):
7248
    """Build hooks env.
7249

7250
    This runs on the master, the primary and all the secondaries.
7251

7252
    """
7253
    return {
7254
      "NODE_NAME": self.op.node_name,
7255
      }
7256

    
7257
  def BuildHooksNodes(self):
7258
    """Build hooks nodes.
7259

7260
    """
7261
    nl = [self.cfg.GetMasterNode()]
7262
    return (nl, nl)
7263

    
7264
  def CheckPrereq(self):
7265
    pass
7266

    
7267
  def Exec(self, feedback_fn):
7268
    # Prepare jobs for migration instances
7269
    jobs = [
7270
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7271
                                 mode=self.op.mode,
7272
                                 live=self.op.live,
7273
                                 iallocator=self.op.iallocator,
7274
                                 target_node=self.op.target_node)]
7275
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7276
      ]
7277

    
7278
    # TODO: Run iallocator in this opcode and pass correct placement options to
7279
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7280
    # running the iallocator and the actual migration, a good consistency model
7281
    # will have to be found.
7282

    
7283
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7284
            frozenset([self.op.node_name]))
7285

    
7286
    return ResultWithJobs(jobs)
7287

    
7288

    
7289
class TLMigrateInstance(Tasklet):
7290
  """Tasklet class for instance migration.
7291

7292
  @type live: boolean
7293
  @ivar live: whether the migration will be done live or non-live;
7294
      this variable is initalized only after CheckPrereq has run
7295
  @type cleanup: boolean
7296
  @ivar cleanup: Wheater we cleanup from a failed migration
7297
  @type iallocator: string
7298
  @ivar iallocator: The iallocator used to determine target_node
7299
  @type target_node: string
7300
  @ivar target_node: If given, the target_node to reallocate the instance to
7301
  @type failover: boolean
7302
  @ivar failover: Whether operation results in failover or migration
7303
  @type fallback: boolean
7304
  @ivar fallback: Whether fallback to failover is allowed if migration not
7305
                  possible
7306
  @type ignore_consistency: boolean
7307
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7308
                            and target node
7309
  @type shutdown_timeout: int
7310
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7311

7312
  """
7313

    
7314
  # Constants
7315
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7316
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7317

    
7318
  def __init__(self, lu, instance_name, cleanup=False,
7319
               failover=False, fallback=False,
7320
               ignore_consistency=False,
7321
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7322
    """Initializes this class.
7323

7324
    """
7325
    Tasklet.__init__(self, lu)
7326

    
7327
    # Parameters
7328
    self.instance_name = instance_name
7329
    self.cleanup = cleanup
7330
    self.live = False # will be overridden later
7331
    self.failover = failover
7332
    self.fallback = fallback
7333
    self.ignore_consistency = ignore_consistency
7334
    self.shutdown_timeout = shutdown_timeout
7335

    
7336
  def CheckPrereq(self):
7337
    """Check prerequisites.
7338

7339
    This checks that the instance is in the cluster.
7340

7341
    """
7342
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7343
    instance = self.cfg.GetInstanceInfo(instance_name)
7344
    assert instance is not None
7345
    self.instance = instance
7346

    
7347
    if (not self.cleanup and
7348
        not instance.admin_state == constants.ADMINST_UP and
7349
        not self.failover and self.fallback):
7350
      self.lu.LogInfo("Instance is marked down or offline, fallback allowed,"
7351
                      " switching to failover")
7352
      self.failover = True
7353

    
7354
    if instance.disk_template not in constants.DTS_MIRRORED:
7355
      if self.failover:
7356
        text = "failovers"
7357
      else:
7358
        text = "migrations"
7359
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7360
                                 " %s" % (instance.disk_template, text),
7361
                                 errors.ECODE_STATE)
7362

    
7363
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7364
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7365

    
7366
      if self.lu.op.iallocator:
7367
        self._RunAllocator()
7368
      else:
7369
        # We set set self.target_node as it is required by
7370
        # BuildHooksEnv
7371
        self.target_node = self.lu.op.target_node
7372

    
7373
      # self.target_node is already populated, either directly or by the
7374
      # iallocator run
7375
      target_node = self.target_node
7376
      if self.target_node == instance.primary_node:
7377
        raise errors.OpPrereqError("Cannot migrate instance %s"
7378
                                   " to its primary (%s)" %
7379
                                   (instance.name, instance.primary_node))
7380

    
7381
      if len(self.lu.tasklets) == 1:
7382
        # It is safe to release locks only when we're the only tasklet
7383
        # in the LU
7384
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7385
                      keep=[instance.primary_node, self.target_node])
7386

    
7387
    else:
7388
      secondary_nodes = instance.secondary_nodes
7389
      if not secondary_nodes:
7390
        raise errors.ConfigurationError("No secondary node but using"
7391
                                        " %s disk template" %
7392
                                        instance.disk_template)
7393
      target_node = secondary_nodes[0]
7394
      if self.lu.op.iallocator or (self.lu.op.target_node and
7395
                                   self.lu.op.target_node != target_node):
7396
        if self.failover:
7397
          text = "failed over"
7398
        else:
7399
          text = "migrated"
7400
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7401
                                   " be %s to arbitrary nodes"
7402
                                   " (neither an iallocator nor a target"
7403
                                   " node can be passed)" %
7404
                                   (instance.disk_template, text),
7405
                                   errors.ECODE_INVAL)
7406

    
7407
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7408

    
7409
    # check memory requirements on the secondary node
7410
    if not self.failover or instance.admin_state == constants.ADMINST_UP:
7411
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7412
                           instance.name, i_be[constants.BE_MAXMEM],
7413
                           instance.hypervisor)
7414
    else:
7415
      self.lu.LogInfo("Not checking memory on the secondary node as"
7416
                      " instance will not be started")
7417

    
7418
    # check bridge existance
7419
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7420

    
7421
    if not self.cleanup:
7422
      _CheckNodeNotDrained(self.lu, target_node)
7423
      if not self.failover:
7424
        result = self.rpc.call_instance_migratable(instance.primary_node,
7425
                                                   instance)
7426
        if result.fail_msg and self.fallback:
7427
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7428
                          " failover")
7429
          self.failover = True
7430
        else:
7431
          result.Raise("Can't migrate, please use failover",
7432
                       prereq=True, ecode=errors.ECODE_STATE)
7433

    
7434
    assert not (self.failover and self.cleanup)
7435

    
7436
    if not self.failover:
7437
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7438
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7439
                                   " parameters are accepted",
7440
                                   errors.ECODE_INVAL)
7441
      if self.lu.op.live is not None:
7442
        if self.lu.op.live:
7443
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7444
        else:
7445
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7446
        # reset the 'live' parameter to None so that repeated
7447
        # invocations of CheckPrereq do not raise an exception
7448
        self.lu.op.live = None
7449
      elif self.lu.op.mode is None:
7450
        # read the default value from the hypervisor
7451
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7452
                                                skip_globals=False)
7453
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7454

    
7455
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7456
    else:
7457
      # Failover is never live
7458
      self.live = False
7459

    
7460
  def _RunAllocator(self):
7461
    """Run the allocator based on input opcode.
7462

7463
    """
7464
    ial = IAllocator(self.cfg, self.rpc,
7465
                     mode=constants.IALLOCATOR_MODE_RELOC,
7466
                     name=self.instance_name,
7467
                     # TODO See why hail breaks with a single node below
7468
                     relocate_from=[self.instance.primary_node,
7469
                                    self.instance.primary_node],
7470
                     )
7471

    
7472
    ial.Run(self.lu.op.iallocator)
7473

    
7474
    if not ial.success:
7475
      raise errors.OpPrereqError("Can't compute nodes using"
7476
                                 " iallocator '%s': %s" %
7477
                                 (self.lu.op.iallocator, ial.info),
7478
                                 errors.ECODE_NORES)
7479
    if len(ial.result) != ial.required_nodes:
7480
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7481
                                 " of nodes (%s), required %s" %
7482
                                 (self.lu.op.iallocator, len(ial.result),
7483
                                  ial.required_nodes), errors.ECODE_FAULT)
7484
    self.target_node = ial.result[0]
7485
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7486
                 self.instance_name, self.lu.op.iallocator,
7487
                 utils.CommaJoin(ial.result))
7488

    
7489
  def _WaitUntilSync(self):
7490
    """Poll with custom rpc for disk sync.
7491

7492
    This uses our own step-based rpc call.
7493

7494
    """
7495
    self.feedback_fn("* wait until resync is done")
7496
    all_done = False
7497
    while not all_done:
7498
      all_done = True
7499
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7500
                                            self.nodes_ip,
7501
                                            self.instance.disks)
7502
      min_percent = 100
7503
      for node, nres in result.items():
7504
        nres.Raise("Cannot resync disks on node %s" % node)
7505
        node_done, node_percent = nres.payload
7506
        all_done = all_done and node_done
7507
        if node_percent is not None:
7508
          min_percent = min(min_percent, node_percent)
7509
      if not all_done:
7510
        if min_percent < 100:
7511
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7512
        time.sleep(2)
7513

    
7514
  def _EnsureSecondary(self, node):
7515
    """Demote a node to secondary.
7516

7517
    """
7518
    self.feedback_fn("* switching node %s to secondary mode" % node)
7519

    
7520
    for dev in self.instance.disks:
7521
      self.cfg.SetDiskID(dev, node)
7522

    
7523
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7524
                                          self.instance.disks)
7525
    result.Raise("Cannot change disk to secondary on node %s" % node)
7526

    
7527
  def _GoStandalone(self):
7528
    """Disconnect from the network.
7529

7530
    """
7531
    self.feedback_fn("* changing into standalone mode")
7532
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7533
                                               self.instance.disks)
7534
    for node, nres in result.items():
7535
      nres.Raise("Cannot disconnect disks node %s" % node)
7536

    
7537
  def _GoReconnect(self, multimaster):
7538
    """Reconnect to the network.
7539

7540
    """
7541
    if multimaster:
7542
      msg = "dual-master"
7543
    else:
7544
      msg = "single-master"
7545
    self.feedback_fn("* changing disks into %s mode" % msg)
7546
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7547
                                           self.instance.disks,
7548
                                           self.instance.name, multimaster)
7549
    for node, nres in result.items():
7550
      nres.Raise("Cannot change disks config on node %s" % node)
7551

    
7552
  def _ExecCleanup(self):
7553
    """Try to cleanup after a failed migration.
7554

7555
    The cleanup is done by:
7556
      - check that the instance is running only on one node
7557
        (and update the config if needed)
7558
      - change disks on its secondary node to secondary
7559
      - wait until disks are fully synchronized
7560
      - disconnect from the network
7561
      - change disks into single-master mode
7562
      - wait again until disks are fully synchronized
7563

7564
    """
7565
    instance = self.instance
7566
    target_node = self.target_node
7567
    source_node = self.source_node
7568

    
7569
    # check running on only one node
7570
    self.feedback_fn("* checking where the instance actually runs"
7571
                     " (if this hangs, the hypervisor might be in"
7572
                     " a bad state)")
7573
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7574
    for node, result in ins_l.items():
7575
      result.Raise("Can't contact node %s" % node)
7576

    
7577
    runningon_source = instance.name in ins_l[source_node].payload
7578
    runningon_target = instance.name in ins_l[target_node].payload
7579

    
7580
    if runningon_source and runningon_target:
7581
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7582
                               " or the hypervisor is confused; you will have"
7583
                               " to ensure manually that it runs only on one"
7584
                               " and restart this operation")
7585

    
7586
    if not (runningon_source or runningon_target):
7587
      raise errors.OpExecError("Instance does not seem to be running at all;"
7588
                               " in this case it's safer to repair by"
7589
                               " running 'gnt-instance stop' to ensure disk"
7590
                               " shutdown, and then restarting it")
7591

    
7592
    if runningon_target:
7593
      # the migration has actually succeeded, we need to update the config
7594
      self.feedback_fn("* instance running on secondary node (%s),"
7595
                       " updating config" % target_node)
7596
      instance.primary_node = target_node
7597
      self.cfg.Update(instance, self.feedback_fn)
7598
      demoted_node = source_node
7599
    else:
7600
      self.feedback_fn("* instance confirmed to be running on its"
7601
                       " primary node (%s)" % source_node)
7602
      demoted_node = target_node
7603

    
7604
    if instance.disk_template in constants.DTS_INT_MIRROR:
7605
      self._EnsureSecondary(demoted_node)
7606
      try:
7607
        self._WaitUntilSync()
7608
      except errors.OpExecError:
7609
        # we ignore here errors, since if the device is standalone, it
7610
        # won't be able to sync
7611
        pass
7612
      self._GoStandalone()
7613
      self._GoReconnect(False)
7614
      self._WaitUntilSync()
7615

    
7616
    self.feedback_fn("* done")
7617

    
7618
  def _RevertDiskStatus(self):
7619
    """Try to revert the disk status after a failed migration.
7620

7621
    """
7622
    target_node = self.target_node
7623
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7624
      return
7625

    
7626
    try:
7627
      self._EnsureSecondary(target_node)
7628
      self._GoStandalone()
7629
      self._GoReconnect(False)
7630
      self._WaitUntilSync()
7631
    except errors.OpExecError, err:
7632
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7633
                         " please try to recover the instance manually;"
7634
                         " error '%s'" % str(err))
7635

    
7636
  def _AbortMigration(self):
7637
    """Call the hypervisor code to abort a started migration.
7638

7639
    """
7640
    instance = self.instance
7641
    target_node = self.target_node
7642
    source_node = self.source_node
7643
    migration_info = self.migration_info
7644

    
7645
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7646
                                                                 instance,
7647
                                                                 migration_info,
7648
                                                                 False)
7649
    abort_msg = abort_result.fail_msg
7650
    if abort_msg:
7651
      logging.error("Aborting migration failed on target node %s: %s",
7652
                    target_node, abort_msg)
7653
      # Don't raise an exception here, as we stil have to try to revert the
7654
      # disk status, even if this step failed.
7655

    
7656
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7657
        instance, False, self.live)
7658
    abort_msg = abort_result.fail_msg
7659
    if abort_msg:
7660
      logging.error("Aborting migration failed on source node %s: %s",
7661
                    source_node, abort_msg)
7662

    
7663
  def _ExecMigration(self):
7664
    """Migrate an instance.
7665

7666
    The migrate is done by:
7667
      - change the disks into dual-master mode
7668
      - wait until disks are fully synchronized again
7669
      - migrate the instance
7670
      - change disks on the new secondary node (the old primary) to secondary
7671
      - wait until disks are fully synchronized
7672
      - change disks into single-master mode
7673

7674
    """
7675
    instance = self.instance
7676
    target_node = self.target_node
7677
    source_node = self.source_node
7678

    
7679
    # Check for hypervisor version mismatch and warn the user.
7680
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7681
                                       None, self.instance.hypervisor)
7682
    src_info = nodeinfo[source_node]
7683
    dst_info = nodeinfo[target_node]
7684

    
7685
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7686
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7687
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7688
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7689
      if src_version != dst_version:
7690
        self.feedback_fn("* warning: hypervisor version mismatch between"
7691
                         " source (%s) and target (%s) node" %
7692
                         (src_version, dst_version))
7693

    
7694
    self.feedback_fn("* checking disk consistency between source and target")
7695
    for dev in instance.disks:
7696
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7697
        raise errors.OpExecError("Disk %s is degraded or not fully"
7698
                                 " synchronized on target node,"
7699
                                 " aborting migration" % dev.iv_name)
7700

    
7701
    # First get the migration information from the remote node
7702
    result = self.rpc.call_migration_info(source_node, instance)
7703
    msg = result.fail_msg
7704
    if msg:
7705
      log_err = ("Failed fetching source migration information from %s: %s" %
7706
                 (source_node, msg))
7707
      logging.error(log_err)
7708
      raise errors.OpExecError(log_err)
7709

    
7710
    self.migration_info = migration_info = result.payload
7711

    
7712
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7713
      # Then switch the disks to master/master mode
7714
      self._EnsureSecondary(target_node)
7715
      self._GoStandalone()
7716
      self._GoReconnect(True)
7717
      self._WaitUntilSync()
7718

    
7719
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7720
    result = self.rpc.call_accept_instance(target_node,
7721
                                           instance,
7722
                                           migration_info,
7723
                                           self.nodes_ip[target_node])
7724

    
7725
    msg = result.fail_msg
7726
    if msg:
7727
      logging.error("Instance pre-migration failed, trying to revert"
7728
                    " disk status: %s", msg)
7729
      self.feedback_fn("Pre-migration failed, aborting")
7730
      self._AbortMigration()
7731
      self._RevertDiskStatus()
7732
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7733
                               (instance.name, msg))
7734

    
7735
    self.feedback_fn("* migrating instance to %s" % target_node)
7736
    result = self.rpc.call_instance_migrate(source_node, instance,
7737
                                            self.nodes_ip[target_node],
7738
                                            self.live)
7739
    msg = result.fail_msg
7740
    if msg:
7741
      logging.error("Instance migration failed, trying to revert"
7742
                    " disk status: %s", msg)
7743
      self.feedback_fn("Migration failed, aborting")
7744
      self._AbortMigration()
7745
      self._RevertDiskStatus()
7746
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7747
                               (instance.name, msg))
7748

    
7749
    self.feedback_fn("* starting memory transfer")
7750
    last_feedback = time.time()
7751
    while True:
7752
      result = self.rpc.call_instance_get_migration_status(source_node,
7753
                                                           instance)
7754
      msg = result.fail_msg
7755
      ms = result.payload   # MigrationStatus instance
7756
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7757
        logging.error("Instance migration failed, trying to revert"
7758
                      " disk status: %s", msg)
7759
        self.feedback_fn("Migration failed, aborting")
7760
        self._AbortMigration()
7761
        self._RevertDiskStatus()
7762
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7763
                                 (instance.name, msg))
7764

    
7765
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7766
        self.feedback_fn("* memory transfer complete")
7767
        break
7768

    
7769
      if (utils.TimeoutExpired(last_feedback,
7770
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7771
          ms.transferred_ram is not None):
7772
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7773
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7774
        last_feedback = time.time()
7775

    
7776
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7777

    
7778
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7779
                                                           instance,
7780
                                                           True,
7781
                                                           self.live)
7782
    msg = result.fail_msg
7783
    if msg:
7784
      logging.error("Instance migration succeeded, but finalization failed"
7785
                    " on the source node: %s", msg)
7786
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7787
                               msg)
7788

    
7789
    instance.primary_node = target_node
7790

    
7791
    # distribute new instance config to the other nodes
7792
    self.cfg.Update(instance, self.feedback_fn)
7793

    
7794
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7795
                                                           instance,
7796
                                                           migration_info,
7797
                                                           True)
7798
    msg = result.fail_msg
7799
    if msg:
7800
      logging.error("Instance migration succeeded, but finalization failed"
7801
                    " on the target node: %s", msg)
7802
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7803
                               msg)
7804

    
7805
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7806
      self._EnsureSecondary(source_node)
7807
      self._WaitUntilSync()
7808
      self._GoStandalone()
7809
      self._GoReconnect(False)
7810
      self._WaitUntilSync()
7811

    
7812
    self.feedback_fn("* done")
7813

    
7814
  def _ExecFailover(self):
7815
    """Failover an instance.
7816

7817
    The failover is done by shutting it down on its present node and
7818
    starting it on the secondary.
7819

7820
    """
7821
    instance = self.instance
7822
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7823

    
7824
    source_node = instance.primary_node
7825
    target_node = self.target_node
7826

    
7827
    if instance.admin_state == constants.ADMINST_UP:
7828
      self.feedback_fn("* checking disk consistency between source and target")
7829
      for dev in instance.disks:
7830
        # for drbd, these are drbd over lvm
7831
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7832
          if primary_node.offline:
7833
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7834
                             " target node %s" %
7835
                             (primary_node.name, dev.iv_name, target_node))
7836
          elif not self.ignore_consistency:
7837
            raise errors.OpExecError("Disk %s is degraded on target node,"
7838
                                     " aborting failover" % dev.iv_name)
7839
    else:
7840
      self.feedback_fn("* not checking disk consistency as instance is not"
7841
                       " running")
7842

    
7843
    self.feedback_fn("* shutting down instance on source node")
7844
    logging.info("Shutting down instance %s on node %s",
7845
                 instance.name, source_node)
7846

    
7847
    result = self.rpc.call_instance_shutdown(source_node, instance,
7848
                                             self.shutdown_timeout)
7849
    msg = result.fail_msg
7850
    if msg:
7851
      if self.ignore_consistency or primary_node.offline:
7852
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7853
                           " proceeding anyway; please make sure node"
7854
                           " %s is down; error details: %s",
7855
                           instance.name, source_node, source_node, msg)
7856
      else:
7857
        raise errors.OpExecError("Could not shutdown instance %s on"
7858
                                 " node %s: %s" %
7859
                                 (instance.name, source_node, msg))
7860

    
7861
    self.feedback_fn("* deactivating the instance's disks on source node")
7862
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7863
      raise errors.OpExecError("Can't shut down the instance's disks")
7864

    
7865
    instance.primary_node = target_node
7866
    # distribute new instance config to the other nodes
7867
    self.cfg.Update(instance, self.feedback_fn)
7868

    
7869
    # Only start the instance if it's marked as up
7870
    if instance.admin_state == constants.ADMINST_UP:
7871
      self.feedback_fn("* activating the instance's disks on target node %s" %
7872
                       target_node)
7873
      logging.info("Starting instance %s on node %s",
7874
                   instance.name, target_node)
7875

    
7876
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7877
                                           ignore_secondaries=True)
7878
      if not disks_ok:
7879
        _ShutdownInstanceDisks(self.lu, instance)
7880
        raise errors.OpExecError("Can't activate the instance's disks")
7881

    
7882
      self.feedback_fn("* starting the instance on the target node %s" %
7883
                       target_node)
7884
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7885
                                            False)
7886
      msg = result.fail_msg
7887
      if msg:
7888
        _ShutdownInstanceDisks(self.lu, instance)
7889
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7890
                                 (instance.name, target_node, msg))
7891

    
7892
  def Exec(self, feedback_fn):
7893
    """Perform the migration.
7894

7895
    """
7896
    self.feedback_fn = feedback_fn
7897
    self.source_node = self.instance.primary_node
7898

    
7899
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7900
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7901
      self.target_node = self.instance.secondary_nodes[0]
7902
      # Otherwise self.target_node has been populated either
7903
      # directly, or through an iallocator.
7904

    
7905
    self.all_nodes = [self.source_node, self.target_node]
7906
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7907
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7908

    
7909
    if self.failover:
7910
      feedback_fn("Failover instance %s" % self.instance.name)
7911
      self._ExecFailover()
7912
    else:
7913
      feedback_fn("Migrating instance %s" % self.instance.name)
7914

    
7915
      if self.cleanup:
7916
        return self._ExecCleanup()
7917
      else:
7918
        return self._ExecMigration()
7919

    
7920

    
7921
def _CreateBlockDev(lu, node, instance, device, force_create,
7922
                    info, force_open):
7923
  """Create a tree of block devices on a given node.
7924

7925
  If this device type has to be created on secondaries, create it and
7926
  all its children.
7927

7928
  If not, just recurse to children keeping the same 'force' value.
7929

7930
  @param lu: the lu on whose behalf we execute
7931
  @param node: the node on which to create the device
7932
  @type instance: L{objects.Instance}
7933
  @param instance: the instance which owns the device
7934
  @type device: L{objects.Disk}
7935
  @param device: the device to create
7936
  @type force_create: boolean
7937
  @param force_create: whether to force creation of this device; this
7938
      will be change to True whenever we find a device which has
7939
      CreateOnSecondary() attribute
7940
  @param info: the extra 'metadata' we should attach to the device
7941
      (this will be represented as a LVM tag)
7942
  @type force_open: boolean
7943
  @param force_open: this parameter will be passes to the
7944
      L{backend.BlockdevCreate} function where it specifies
7945
      whether we run on primary or not, and it affects both
7946
      the child assembly and the device own Open() execution
7947

7948
  """
7949
  if device.CreateOnSecondary():
7950
    force_create = True
7951

    
7952
  if device.children:
7953
    for child in device.children:
7954
      _CreateBlockDev(lu, node, instance, child, force_create,
7955
                      info, force_open)
7956

    
7957
  if not force_create:
7958
    return
7959

    
7960
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7961

    
7962

    
7963
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7964
  """Create a single block device on a given node.
7965

7966
  This will not recurse over children of the device, so they must be
7967
  created in advance.
7968

7969
  @param lu: the lu on whose behalf we execute
7970
  @param node: the node on which to create the device
7971
  @type instance: L{objects.Instance}
7972
  @param instance: the instance which owns the device
7973
  @type device: L{objects.Disk}
7974
  @param device: the device to create
7975
  @param info: the extra 'metadata' we should attach to the device
7976
      (this will be represented as a LVM tag)
7977
  @type force_open: boolean
7978
  @param force_open: this parameter will be passes to the
7979
      L{backend.BlockdevCreate} function where it specifies
7980
      whether we run on primary or not, and it affects both
7981
      the child assembly and the device own Open() execution
7982

7983
  """
7984
  lu.cfg.SetDiskID(device, node)
7985
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7986
                                       instance.name, force_open, info)
7987
  result.Raise("Can't create block device %s on"
7988
               " node %s for instance %s" % (device, node, instance.name))
7989
  if device.physical_id is None:
7990
    device.physical_id = result.payload
7991

    
7992

    
7993
def _GenerateUniqueNames(lu, exts):
7994
  """Generate a suitable LV name.
7995

7996
  This will generate a logical volume name for the given instance.
7997

7998
  """
7999
  results = []
8000
  for val in exts:
8001
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
8002
    results.append("%s%s" % (new_id, val))
8003
  return results
8004

    
8005

    
8006
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
8007
                         iv_name, p_minor, s_minor):
8008
  """Generate a drbd8 device complete with its children.
8009

8010
  """
8011
  assert len(vgnames) == len(names) == 2
8012
  port = lu.cfg.AllocatePort()
8013
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
8014
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
8015
                          logical_id=(vgnames[0], names[0]))
8016
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
8017
                          logical_id=(vgnames[1], names[1]))
8018
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
8019
                          logical_id=(primary, secondary, port,
8020
                                      p_minor, s_minor,
8021
                                      shared_secret),
8022
                          children=[dev_data, dev_meta],
8023
                          iv_name=iv_name)
8024
  return drbd_dev
8025

    
8026

    
8027
def _GenerateDiskTemplate(lu, template_name,
8028
                          instance_name, primary_node,
8029
                          secondary_nodes, disk_info,
8030
                          file_storage_dir, file_driver,
8031
                          base_index, feedback_fn):
8032
  """Generate the entire disk layout for a given template type.
8033

8034
  """
8035
  #TODO: compute space requirements
8036

    
8037
  vgname = lu.cfg.GetVGName()
8038
  disk_count = len(disk_info)
8039
  disks = []
8040
  if template_name == constants.DT_DISKLESS:
8041
    pass
8042
  elif template_name == constants.DT_PLAIN:
8043
    if len(secondary_nodes) != 0:
8044
      raise errors.ProgrammerError("Wrong template configuration")
8045

    
8046
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8047
                                      for i in range(disk_count)])
8048
    for idx, disk in enumerate(disk_info):
8049
      disk_index = idx + base_index
8050
      vg = disk.get(constants.IDISK_VG, vgname)
8051
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
8052
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
8053
                              size=disk[constants.IDISK_SIZE],
8054
                              logical_id=(vg, names[idx]),
8055
                              iv_name="disk/%d" % disk_index,
8056
                              mode=disk[constants.IDISK_MODE])
8057
      disks.append(disk_dev)
8058
  elif template_name == constants.DT_DRBD8:
8059
    if len(secondary_nodes) != 1:
8060
      raise errors.ProgrammerError("Wrong template configuration")
8061
    remote_node = secondary_nodes[0]
8062
    minors = lu.cfg.AllocateDRBDMinor(
8063
      [primary_node, remote_node] * len(disk_info), instance_name)
8064

    
8065
    names = []
8066
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8067
                                               for i in range(disk_count)]):
8068
      names.append(lv_prefix + "_data")
8069
      names.append(lv_prefix + "_meta")
8070
    for idx, disk in enumerate(disk_info):
8071
      disk_index = idx + base_index
8072
      data_vg = disk.get(constants.IDISK_VG, vgname)
8073
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8074
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8075
                                      disk[constants.IDISK_SIZE],
8076
                                      [data_vg, meta_vg],
8077
                                      names[idx * 2:idx * 2 + 2],
8078
                                      "disk/%d" % disk_index,
8079
                                      minors[idx * 2], minors[idx * 2 + 1])
8080
      disk_dev.mode = disk[constants.IDISK_MODE]
8081
      disks.append(disk_dev)
8082
  elif template_name == constants.DT_FILE:
8083
    if len(secondary_nodes) != 0:
8084
      raise errors.ProgrammerError("Wrong template configuration")
8085

    
8086
    opcodes.RequireFileStorage()
8087

    
8088
    for idx, disk in enumerate(disk_info):
8089
      disk_index = idx + base_index
8090
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8091
                              size=disk[constants.IDISK_SIZE],
8092
                              iv_name="disk/%d" % disk_index,
8093
                              logical_id=(file_driver,
8094
                                          "%s/disk%d" % (file_storage_dir,
8095
                                                         disk_index)),
8096
                              mode=disk[constants.IDISK_MODE])
8097
      disks.append(disk_dev)
8098
  elif template_name == constants.DT_SHARED_FILE:
8099
    if len(secondary_nodes) != 0:
8100
      raise errors.ProgrammerError("Wrong template configuration")
8101

    
8102
    opcodes.RequireSharedFileStorage()
8103

    
8104
    for idx, disk in enumerate(disk_info):
8105
      disk_index = idx + base_index
8106
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8107
                              size=disk[constants.IDISK_SIZE],
8108
                              iv_name="disk/%d" % disk_index,
8109
                              logical_id=(file_driver,
8110
                                          "%s/disk%d" % (file_storage_dir,
8111
                                                         disk_index)),
8112
                              mode=disk[constants.IDISK_MODE])
8113
      disks.append(disk_dev)
8114
  elif template_name == constants.DT_BLOCK:
8115
    if len(secondary_nodes) != 0:
8116
      raise errors.ProgrammerError("Wrong template configuration")
8117

    
8118
    for idx, disk in enumerate(disk_info):
8119
      disk_index = idx + base_index
8120
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8121
                              size=disk[constants.IDISK_SIZE],
8122
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8123
                                          disk[constants.IDISK_ADOPT]),
8124
                              iv_name="disk/%d" % disk_index,
8125
                              mode=disk[constants.IDISK_MODE])
8126
      disks.append(disk_dev)
8127

    
8128
  else:
8129
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8130
  return disks
8131

    
8132

    
8133
def _GetInstanceInfoText(instance):
8134
  """Compute that text that should be added to the disk's metadata.
8135

8136
  """
8137
  return "originstname+%s" % instance.name
8138

    
8139

    
8140
def _CalcEta(time_taken, written, total_size):
8141
  """Calculates the ETA based on size written and total size.
8142

8143
  @param time_taken: The time taken so far
8144
  @param written: amount written so far
8145
  @param total_size: The total size of data to be written
8146
  @return: The remaining time in seconds
8147

8148
  """
8149
  avg_time = time_taken / float(written)
8150
  return (total_size - written) * avg_time
8151

    
8152

    
8153
def _WipeDisks(lu, instance):
8154
  """Wipes instance disks.
8155

8156
  @type lu: L{LogicalUnit}
8157
  @param lu: the logical unit on whose behalf we execute
8158
  @type instance: L{objects.Instance}
8159
  @param instance: the instance whose disks we should create
8160
  @return: the success of the wipe
8161

8162
  """
8163
  node = instance.primary_node
8164

    
8165
  for device in instance.disks:
8166
    lu.cfg.SetDiskID(device, node)
8167

    
8168
  logging.info("Pause sync of instance %s disks", instance.name)
8169
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8170

    
8171
  for idx, success in enumerate(result.payload):
8172
    if not success:
8173
      logging.warn("pause-sync of instance %s for disks %d failed",
8174
                   instance.name, idx)
8175

    
8176
  try:
8177
    for idx, device in enumerate(instance.disks):
8178
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8179
      # MAX_WIPE_CHUNK at max
8180
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8181
                            constants.MIN_WIPE_CHUNK_PERCENT)
8182
      # we _must_ make this an int, otherwise rounding errors will
8183
      # occur
8184
      wipe_chunk_size = int(wipe_chunk_size)
8185

    
8186
      lu.LogInfo("* Wiping disk %d", idx)
8187
      logging.info("Wiping disk %d for instance %s, node %s using"
8188
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8189

    
8190
      offset = 0
8191
      size = device.size
8192
      last_output = 0
8193
      start_time = time.time()
8194

    
8195
      while offset < size:
8196
        wipe_size = min(wipe_chunk_size, size - offset)
8197
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8198
                      idx, offset, wipe_size)
8199
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8200
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8201
                     (idx, offset, wipe_size))
8202
        now = time.time()
8203
        offset += wipe_size
8204
        if now - last_output >= 60:
8205
          eta = _CalcEta(now - start_time, offset, size)
8206
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8207
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8208
          last_output = now
8209
  finally:
8210
    logging.info("Resume sync of instance %s disks", instance.name)
8211

    
8212
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8213

    
8214
    for idx, success in enumerate(result.payload):
8215
      if not success:
8216
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8217
                      " look at the status and troubleshoot the issue", idx)
8218
        logging.warn("resume-sync of instance %s for disks %d failed",
8219
                     instance.name, idx)
8220

    
8221

    
8222
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8223
  """Create all disks for an instance.
8224

8225
  This abstracts away some work from AddInstance.
8226

8227
  @type lu: L{LogicalUnit}
8228
  @param lu: the logical unit on whose behalf we execute
8229
  @type instance: L{objects.Instance}
8230
  @param instance: the instance whose disks we should create
8231
  @type to_skip: list
8232
  @param to_skip: list of indices to skip
8233
  @type target_node: string
8234
  @param target_node: if passed, overrides the target node for creation
8235
  @rtype: boolean
8236
  @return: the success of the creation
8237

8238
  """
8239
  info = _GetInstanceInfoText(instance)
8240
  if target_node is None:
8241
    pnode = instance.primary_node
8242
    all_nodes = instance.all_nodes
8243
  else:
8244
    pnode = target_node
8245
    all_nodes = [pnode]
8246

    
8247
  if instance.disk_template in constants.DTS_FILEBASED:
8248
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8249
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8250

    
8251
    result.Raise("Failed to create directory '%s' on"
8252
                 " node %s" % (file_storage_dir, pnode))
8253

    
8254
  # Note: this needs to be kept in sync with adding of disks in
8255
  # LUInstanceSetParams
8256
  for idx, device in enumerate(instance.disks):
8257
    if to_skip and idx in to_skip:
8258
      continue
8259
    logging.info("Creating volume %s for instance %s",
8260
                 device.iv_name, instance.name)
8261
    #HARDCODE
8262
    for node in all_nodes:
8263
      f_create = node == pnode
8264
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8265

    
8266

    
8267
def _RemoveDisks(lu, instance, target_node=None):
8268
  """Remove all disks for an instance.
8269

8270
  This abstracts away some work from `AddInstance()` and
8271
  `RemoveInstance()`. Note that in case some of the devices couldn't
8272
  be removed, the removal will continue with the other ones (compare
8273
  with `_CreateDisks()`).
8274

8275
  @type lu: L{LogicalUnit}
8276
  @param lu: the logical unit on whose behalf we execute
8277
  @type instance: L{objects.Instance}
8278
  @param instance: the instance whose disks we should remove
8279
  @type target_node: string
8280
  @param target_node: used to override the node on which to remove the disks
8281
  @rtype: boolean
8282
  @return: the success of the removal
8283

8284
  """
8285
  logging.info("Removing block devices for instance %s", instance.name)
8286

    
8287
  all_result = True
8288
  for device in instance.disks:
8289
    if target_node:
8290
      edata = [(target_node, device)]
8291
    else:
8292
      edata = device.ComputeNodeTree(instance.primary_node)
8293
    for node, disk in edata:
8294
      lu.cfg.SetDiskID(disk, node)
8295
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8296
      if msg:
8297
        lu.LogWarning("Could not remove block device %s on node %s,"
8298
                      " continuing anyway: %s", device.iv_name, node, msg)
8299
        all_result = False
8300

    
8301
    # if this is a DRBD disk, return its port to the pool
8302
    if device.dev_type in constants.LDS_DRBD:
8303
      tcp_port = device.logical_id[2]
8304
      lu.cfg.AddTcpUdpPort(tcp_port)
8305

    
8306
  if instance.disk_template == constants.DT_FILE:
8307
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8308
    if target_node:
8309
      tgt = target_node
8310
    else:
8311
      tgt = instance.primary_node
8312
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8313
    if result.fail_msg:
8314
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8315
                    file_storage_dir, instance.primary_node, result.fail_msg)
8316
      all_result = False
8317

    
8318
  return all_result
8319

    
8320

    
8321
def _ComputeDiskSizePerVG(disk_template, disks):
8322
  """Compute disk size requirements in the volume group
8323

8324
  """
8325
  def _compute(disks, payload):
8326
    """Universal algorithm.
8327

8328
    """
8329
    vgs = {}
8330
    for disk in disks:
8331
      vgs[disk[constants.IDISK_VG]] = \
8332
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8333

    
8334
    return vgs
8335

    
8336
  # Required free disk space as a function of disk and swap space
8337
  req_size_dict = {
8338
    constants.DT_DISKLESS: {},
8339
    constants.DT_PLAIN: _compute(disks, 0),
8340
    # 128 MB are added for drbd metadata for each disk
8341
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8342
    constants.DT_FILE: {},
8343
    constants.DT_SHARED_FILE: {},
8344
  }
8345

    
8346
  if disk_template not in req_size_dict:
8347
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8348
                                 " is unknown" % disk_template)
8349

    
8350
  return req_size_dict[disk_template]
8351

    
8352

    
8353
def _ComputeDiskSize(disk_template, disks):
8354
  """Compute disk size requirements in the volume group
8355

8356
  """
8357
  # Required free disk space as a function of disk and swap space
8358
  req_size_dict = {
8359
    constants.DT_DISKLESS: None,
8360
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8361
    # 128 MB are added for drbd metadata for each disk
8362
    constants.DT_DRBD8:
8363
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8364
    constants.DT_FILE: None,
8365
    constants.DT_SHARED_FILE: 0,
8366
    constants.DT_BLOCK: 0,
8367
  }
8368

    
8369
  if disk_template not in req_size_dict:
8370
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8371
                                 " is unknown" % disk_template)
8372

    
8373
  return req_size_dict[disk_template]
8374

    
8375

    
8376
def _FilterVmNodes(lu, nodenames):
8377
  """Filters out non-vm_capable nodes from a list.
8378

8379
  @type lu: L{LogicalUnit}
8380
  @param lu: the logical unit for which we check
8381
  @type nodenames: list
8382
  @param nodenames: the list of nodes on which we should check
8383
  @rtype: list
8384
  @return: the list of vm-capable nodes
8385

8386
  """
8387
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8388
  return [name for name in nodenames if name not in vm_nodes]
8389

    
8390

    
8391
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8392
  """Hypervisor parameter validation.
8393

8394
  This function abstract the hypervisor parameter validation to be
8395
  used in both instance create and instance modify.
8396

8397
  @type lu: L{LogicalUnit}
8398
  @param lu: the logical unit for which we check
8399
  @type nodenames: list
8400
  @param nodenames: the list of nodes on which we should check
8401
  @type hvname: string
8402
  @param hvname: the name of the hypervisor we should use
8403
  @type hvparams: dict
8404
  @param hvparams: the parameters which we need to check
8405
  @raise errors.OpPrereqError: if the parameters are not valid
8406

8407
  """
8408
  nodenames = _FilterVmNodes(lu, nodenames)
8409

    
8410
  cluster = lu.cfg.GetClusterInfo()
8411
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8412

    
8413
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8414
  for node in nodenames:
8415
    info = hvinfo[node]
8416
    if info.offline:
8417
      continue
8418
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8419

    
8420

    
8421
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8422
  """OS parameters validation.
8423

8424
  @type lu: L{LogicalUnit}
8425
  @param lu: the logical unit for which we check
8426
  @type required: boolean
8427
  @param required: whether the validation should fail if the OS is not
8428
      found
8429
  @type nodenames: list
8430
  @param nodenames: the list of nodes on which we should check
8431
  @type osname: string
8432
  @param osname: the name of the hypervisor we should use
8433
  @type osparams: dict
8434
  @param osparams: the parameters which we need to check
8435
  @raise errors.OpPrereqError: if the parameters are not valid
8436

8437
  """
8438
  nodenames = _FilterVmNodes(lu, nodenames)
8439
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8440
                                   [constants.OS_VALIDATE_PARAMETERS],
8441
                                   osparams)
8442
  for node, nres in result.items():
8443
    # we don't check for offline cases since this should be run only
8444
    # against the master node and/or an instance's nodes
8445
    nres.Raise("OS Parameters validation failed on node %s" % node)
8446
    if not nres.payload:
8447
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8448
                 osname, node)
8449

    
8450

    
8451
class LUInstanceCreate(LogicalUnit):
8452
  """Create an instance.
8453

8454
  """
8455
  HPATH = "instance-add"
8456
  HTYPE = constants.HTYPE_INSTANCE
8457
  REQ_BGL = False
8458

    
8459
  def CheckArguments(self):
8460
    """Check arguments.
8461

8462
    """
8463
    # do not require name_check to ease forward/backward compatibility
8464
    # for tools
8465
    if self.op.no_install and self.op.start:
8466
      self.LogInfo("No-installation mode selected, disabling startup")
8467
      self.op.start = False
8468
    # validate/normalize the instance name
8469
    self.op.instance_name = \
8470
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8471

    
8472
    if self.op.ip_check and not self.op.name_check:
8473
      # TODO: make the ip check more flexible and not depend on the name check
8474
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8475
                                 " check", errors.ECODE_INVAL)
8476

    
8477
    # check nics' parameter names
8478
    for nic in self.op.nics:
8479
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8480

    
8481
    # check disks. parameter names and consistent adopt/no-adopt strategy
8482
    has_adopt = has_no_adopt = False
8483
    for disk in self.op.disks:
8484
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8485
      if constants.IDISK_ADOPT in disk:
8486
        has_adopt = True
8487
      else:
8488
        has_no_adopt = True
8489
    if has_adopt and has_no_adopt:
8490
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8491
                                 errors.ECODE_INVAL)
8492
    if has_adopt:
8493
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8494
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8495
                                   " '%s' disk template" %
8496
                                   self.op.disk_template,
8497
                                   errors.ECODE_INVAL)
8498
      if self.op.iallocator is not None:
8499
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8500
                                   " iallocator script", errors.ECODE_INVAL)
8501
      if self.op.mode == constants.INSTANCE_IMPORT:
8502
        raise errors.OpPrereqError("Disk adoption not allowed for"
8503
                                   " instance import", errors.ECODE_INVAL)
8504
    else:
8505
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8506
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8507
                                   " but no 'adopt' parameter given" %
8508
                                   self.op.disk_template,
8509
                                   errors.ECODE_INVAL)
8510

    
8511
    self.adopt_disks = has_adopt
8512

    
8513
    # instance name verification
8514
    if self.op.name_check:
8515
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8516
      self.op.instance_name = self.hostname1.name
8517
      # used in CheckPrereq for ip ping check
8518
      self.check_ip = self.hostname1.ip
8519
    else:
8520
      self.check_ip = None
8521

    
8522
    # file storage checks
8523
    if (self.op.file_driver and
8524
        not self.op.file_driver in constants.FILE_DRIVER):
8525
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8526
                                 self.op.file_driver, errors.ECODE_INVAL)
8527

    
8528
    if self.op.disk_template == constants.DT_FILE:
8529
      opcodes.RequireFileStorage()
8530
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8531
      opcodes.RequireSharedFileStorage()
8532

    
8533
    ### Node/iallocator related checks
8534
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8535

    
8536
    if self.op.pnode is not None:
8537
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8538
        if self.op.snode is None:
8539
          raise errors.OpPrereqError("The networked disk templates need"
8540
                                     " a mirror node", errors.ECODE_INVAL)
8541
      elif self.op.snode:
8542
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8543
                        " template")
8544
        self.op.snode = None
8545

    
8546
    self._cds = _GetClusterDomainSecret()
8547

    
8548
    if self.op.mode == constants.INSTANCE_IMPORT:
8549
      # On import force_variant must be True, because if we forced it at
8550
      # initial install, our only chance when importing it back is that it
8551
      # works again!
8552
      self.op.force_variant = True
8553

    
8554
      if self.op.no_install:
8555
        self.LogInfo("No-installation mode has no effect during import")
8556

    
8557
    elif self.op.mode == constants.INSTANCE_CREATE:
8558
      if self.op.os_type is None:
8559
        raise errors.OpPrereqError("No guest OS specified",
8560
                                   errors.ECODE_INVAL)
8561
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8562
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8563
                                   " installation" % self.op.os_type,
8564
                                   errors.ECODE_STATE)
8565
      if self.op.disk_template is None:
8566
        raise errors.OpPrereqError("No disk template specified",
8567
                                   errors.ECODE_INVAL)
8568

    
8569
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8570
      # Check handshake to ensure both clusters have the same domain secret
8571
      src_handshake = self.op.source_handshake
8572
      if not src_handshake:
8573
        raise errors.OpPrereqError("Missing source handshake",
8574
                                   errors.ECODE_INVAL)
8575

    
8576
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8577
                                                           src_handshake)
8578
      if errmsg:
8579
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8580
                                   errors.ECODE_INVAL)
8581

    
8582
      # Load and check source CA
8583
      self.source_x509_ca_pem = self.op.source_x509_ca
8584
      if not self.source_x509_ca_pem:
8585
        raise errors.OpPrereqError("Missing source X509 CA",
8586
                                   errors.ECODE_INVAL)
8587

    
8588
      try:
8589
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8590
                                                    self._cds)
8591
      except OpenSSL.crypto.Error, err:
8592
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8593
                                   (err, ), errors.ECODE_INVAL)
8594

    
8595
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8596
      if errcode is not None:
8597
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8598
                                   errors.ECODE_INVAL)
8599

    
8600
      self.source_x509_ca = cert
8601

    
8602
      src_instance_name = self.op.source_instance_name
8603
      if not src_instance_name:
8604
        raise errors.OpPrereqError("Missing source instance name",
8605
                                   errors.ECODE_INVAL)
8606

    
8607
      self.source_instance_name = \
8608
          netutils.GetHostname(name=src_instance_name).name
8609

    
8610
    else:
8611
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8612
                                 self.op.mode, errors.ECODE_INVAL)
8613

    
8614
  def ExpandNames(self):
8615
    """ExpandNames for CreateInstance.
8616

8617
    Figure out the right locks for instance creation.
8618

8619
    """
8620
    self.needed_locks = {}
8621

    
8622
    instance_name = self.op.instance_name
8623
    # this is just a preventive check, but someone might still add this
8624
    # instance in the meantime, and creation will fail at lock-add time
8625
    if instance_name in self.cfg.GetInstanceList():
8626
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8627
                                 instance_name, errors.ECODE_EXISTS)
8628

    
8629
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8630

    
8631
    if self.op.iallocator:
8632
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8633
      # specifying a group on instance creation and then selecting nodes from
8634
      # that group
8635
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8636
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8637
    else:
8638
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8639
      nodelist = [self.op.pnode]
8640
      if self.op.snode is not None:
8641
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8642
        nodelist.append(self.op.snode)
8643
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8644
      # Lock resources of instance's primary and secondary nodes (copy to
8645
      # prevent accidential modification)
8646
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8647

    
8648
    # in case of import lock the source node too
8649
    if self.op.mode == constants.INSTANCE_IMPORT:
8650
      src_node = self.op.src_node
8651
      src_path = self.op.src_path
8652

    
8653
      if src_path is None:
8654
        self.op.src_path = src_path = self.op.instance_name
8655

    
8656
      if src_node is None:
8657
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8658
        self.op.src_node = None
8659
        if os.path.isabs(src_path):
8660
          raise errors.OpPrereqError("Importing an instance from a path"
8661
                                     " requires a source node option",
8662
                                     errors.ECODE_INVAL)
8663
      else:
8664
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8665
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8666
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8667
        if not os.path.isabs(src_path):
8668
          self.op.src_path = src_path = \
8669
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8670

    
8671
  def _RunAllocator(self):
8672
    """Run the allocator based on input opcode.
8673

8674
    """
8675
    nics = [n.ToDict() for n in self.nics]
8676
    ial = IAllocator(self.cfg, self.rpc,
8677
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8678
                     name=self.op.instance_name,
8679
                     disk_template=self.op.disk_template,
8680
                     tags=self.op.tags,
8681
                     os=self.op.os_type,
8682
                     vcpus=self.be_full[constants.BE_VCPUS],
8683
                     memory=self.be_full[constants.BE_MAXMEM],
8684
                     disks=self.disks,
8685
                     nics=nics,
8686
                     hypervisor=self.op.hypervisor,
8687
                     )
8688

    
8689
    ial.Run(self.op.iallocator)
8690

    
8691
    if not ial.success:
8692
      raise errors.OpPrereqError("Can't compute nodes using"
8693
                                 " iallocator '%s': %s" %
8694
                                 (self.op.iallocator, ial.info),
8695
                                 errors.ECODE_NORES)
8696
    if len(ial.result) != ial.required_nodes:
8697
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8698
                                 " of nodes (%s), required %s" %
8699
                                 (self.op.iallocator, len(ial.result),
8700
                                  ial.required_nodes), errors.ECODE_FAULT)
8701
    self.op.pnode = ial.result[0]
8702
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8703
                 self.op.instance_name, self.op.iallocator,
8704
                 utils.CommaJoin(ial.result))
8705
    if ial.required_nodes == 2:
8706
      self.op.snode = ial.result[1]
8707

    
8708
  def BuildHooksEnv(self):
8709
    """Build hooks env.
8710

8711
    This runs on master, primary and secondary nodes of the instance.
8712

8713
    """
8714
    env = {
8715
      "ADD_MODE": self.op.mode,
8716
      }
8717
    if self.op.mode == constants.INSTANCE_IMPORT:
8718
      env["SRC_NODE"] = self.op.src_node
8719
      env["SRC_PATH"] = self.op.src_path
8720
      env["SRC_IMAGES"] = self.src_images
8721

    
8722
    env.update(_BuildInstanceHookEnv(
8723
      name=self.op.instance_name,
8724
      primary_node=self.op.pnode,
8725
      secondary_nodes=self.secondaries,
8726
      status=self.op.start,
8727
      os_type=self.op.os_type,
8728
      minmem=self.be_full[constants.BE_MINMEM],
8729
      maxmem=self.be_full[constants.BE_MAXMEM],
8730
      vcpus=self.be_full[constants.BE_VCPUS],
8731
      nics=_NICListToTuple(self, self.nics),
8732
      disk_template=self.op.disk_template,
8733
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8734
             for d in self.disks],
8735
      bep=self.be_full,
8736
      hvp=self.hv_full,
8737
      hypervisor_name=self.op.hypervisor,
8738
      tags=self.op.tags,
8739
    ))
8740

    
8741
    return env
8742

    
8743
  def BuildHooksNodes(self):
8744
    """Build hooks nodes.
8745

8746
    """
8747
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8748
    return nl, nl
8749

    
8750
  def _ReadExportInfo(self):
8751
    """Reads the export information from disk.
8752

8753
    It will override the opcode source node and path with the actual
8754
    information, if these two were not specified before.
8755

8756
    @return: the export information
8757

8758
    """
8759
    assert self.op.mode == constants.INSTANCE_IMPORT
8760

    
8761
    src_node = self.op.src_node
8762
    src_path = self.op.src_path
8763

    
8764
    if src_node is None:
8765
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8766
      exp_list = self.rpc.call_export_list(locked_nodes)
8767
      found = False
8768
      for node in exp_list:
8769
        if exp_list[node].fail_msg:
8770
          continue
8771
        if src_path in exp_list[node].payload:
8772
          found = True
8773
          self.op.src_node = src_node = node
8774
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8775
                                                       src_path)
8776
          break
8777
      if not found:
8778
        raise errors.OpPrereqError("No export found for relative path %s" %
8779
                                    src_path, errors.ECODE_INVAL)
8780

    
8781
    _CheckNodeOnline(self, src_node)
8782
    result = self.rpc.call_export_info(src_node, src_path)
8783
    result.Raise("No export or invalid export found in dir %s" % src_path)
8784

    
8785
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8786
    if not export_info.has_section(constants.INISECT_EXP):
8787
      raise errors.ProgrammerError("Corrupted export config",
8788
                                   errors.ECODE_ENVIRON)
8789

    
8790
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8791
    if (int(ei_version) != constants.EXPORT_VERSION):
8792
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8793
                                 (ei_version, constants.EXPORT_VERSION),
8794
                                 errors.ECODE_ENVIRON)
8795
    return export_info
8796

    
8797
  def _ReadExportParams(self, einfo):
8798
    """Use export parameters as defaults.
8799

8800
    In case the opcode doesn't specify (as in override) some instance
8801
    parameters, then try to use them from the export information, if
8802
    that declares them.
8803

8804
    """
8805
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8806

    
8807
    if self.op.disk_template is None:
8808
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8809
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8810
                                          "disk_template")
8811
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8812
          raise errors.OpPrereqError("Disk template specified in configuration"
8813
                                     " file is not one of the allowed values:"
8814
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8815
      else:
8816
        raise errors.OpPrereqError("No disk template specified and the export"
8817
                                   " is missing the disk_template information",
8818
                                   errors.ECODE_INVAL)
8819

    
8820
    if not self.op.disks:
8821
      disks = []
8822
      # TODO: import the disk iv_name too
8823
      for idx in range(constants.MAX_DISKS):
8824
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8825
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8826
          disks.append({constants.IDISK_SIZE: disk_sz})
8827
      self.op.disks = disks
8828
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8829
        raise errors.OpPrereqError("No disk info specified and the export"
8830
                                   " is missing the disk information",
8831
                                   errors.ECODE_INVAL)
8832

    
8833
    if not self.op.nics:
8834
      nics = []
8835
      for idx in range(constants.MAX_NICS):
8836
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8837
          ndict = {}
8838
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8839
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8840
            ndict[name] = v
8841
          nics.append(ndict)
8842
        else:
8843
          break
8844
      self.op.nics = nics
8845

    
8846
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8847
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8848

    
8849
    if (self.op.hypervisor is None and
8850
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8851
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8852

    
8853
    if einfo.has_section(constants.INISECT_HYP):
8854
      # use the export parameters but do not override the ones
8855
      # specified by the user
8856
      for name, value in einfo.items(constants.INISECT_HYP):
8857
        if name not in self.op.hvparams:
8858
          self.op.hvparams[name] = value
8859

    
8860
    if einfo.has_section(constants.INISECT_BEP):
8861
      # use the parameters, without overriding
8862
      for name, value in einfo.items(constants.INISECT_BEP):
8863
        if name not in self.op.beparams:
8864
          self.op.beparams[name] = value
8865
        # Compatibility for the old "memory" be param
8866
        if name == constants.BE_MEMORY:
8867
          if constants.BE_MAXMEM not in self.op.beparams:
8868
            self.op.beparams[constants.BE_MAXMEM] = value
8869
          if constants.BE_MINMEM not in self.op.beparams:
8870
            self.op.beparams[constants.BE_MINMEM] = value
8871
    else:
8872
      # try to read the parameters old style, from the main section
8873
      for name in constants.BES_PARAMETERS:
8874
        if (name not in self.op.beparams and
8875
            einfo.has_option(constants.INISECT_INS, name)):
8876
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8877

    
8878
    if einfo.has_section(constants.INISECT_OSP):
8879
      # use the parameters, without overriding
8880
      for name, value in einfo.items(constants.INISECT_OSP):
8881
        if name not in self.op.osparams:
8882
          self.op.osparams[name] = value
8883

    
8884
  def _RevertToDefaults(self, cluster):
8885
    """Revert the instance parameters to the default values.
8886

8887
    """
8888
    # hvparams
8889
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8890
    for name in self.op.hvparams.keys():
8891
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8892
        del self.op.hvparams[name]
8893
    # beparams
8894
    be_defs = cluster.SimpleFillBE({})
8895
    for name in self.op.beparams.keys():
8896
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8897
        del self.op.beparams[name]
8898
    # nic params
8899
    nic_defs = cluster.SimpleFillNIC({})
8900
    for nic in self.op.nics:
8901
      for name in constants.NICS_PARAMETERS:
8902
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8903
          del nic[name]
8904
    # osparams
8905
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8906
    for name in self.op.osparams.keys():
8907
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8908
        del self.op.osparams[name]
8909

    
8910
  def _CalculateFileStorageDir(self):
8911
    """Calculate final instance file storage dir.
8912

8913
    """
8914
    # file storage dir calculation/check
8915
    self.instance_file_storage_dir = None
8916
    if self.op.disk_template in constants.DTS_FILEBASED:
8917
      # build the full file storage dir path
8918
      joinargs = []
8919

    
8920
      if self.op.disk_template == constants.DT_SHARED_FILE:
8921
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8922
      else:
8923
        get_fsd_fn = self.cfg.GetFileStorageDir
8924

    
8925
      cfg_storagedir = get_fsd_fn()
8926
      if not cfg_storagedir:
8927
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8928
      joinargs.append(cfg_storagedir)
8929

    
8930
      if self.op.file_storage_dir is not None:
8931
        joinargs.append(self.op.file_storage_dir)
8932

    
8933
      joinargs.append(self.op.instance_name)
8934

    
8935
      # pylint: disable=W0142
8936
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8937

    
8938
  def CheckPrereq(self):
8939
    """Check prerequisites.
8940

8941
    """
8942
    self._CalculateFileStorageDir()
8943

    
8944
    if self.op.mode == constants.INSTANCE_IMPORT:
8945
      export_info = self._ReadExportInfo()
8946
      self._ReadExportParams(export_info)
8947

    
8948
    if (not self.cfg.GetVGName() and
8949
        self.op.disk_template not in constants.DTS_NOT_LVM):
8950
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8951
                                 " instances", errors.ECODE_STATE)
8952

    
8953
    if (self.op.hypervisor is None or
8954
        self.op.hypervisor == constants.VALUE_AUTO):
8955
      self.op.hypervisor = self.cfg.GetHypervisorType()
8956

    
8957
    cluster = self.cfg.GetClusterInfo()
8958
    enabled_hvs = cluster.enabled_hypervisors
8959
    if self.op.hypervisor not in enabled_hvs:
8960
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8961
                                 " cluster (%s)" % (self.op.hypervisor,
8962
                                  ",".join(enabled_hvs)),
8963
                                 errors.ECODE_STATE)
8964

    
8965
    # Check tag validity
8966
    for tag in self.op.tags:
8967
      objects.TaggableObject.ValidateTag(tag)
8968

    
8969
    # check hypervisor parameter syntax (locally)
8970
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8971
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8972
                                      self.op.hvparams)
8973
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8974
    hv_type.CheckParameterSyntax(filled_hvp)
8975
    self.hv_full = filled_hvp
8976
    # check that we don't specify global parameters on an instance
8977
    _CheckGlobalHvParams(self.op.hvparams)
8978

    
8979
    # fill and remember the beparams dict
8980
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8981
    for param, value in self.op.beparams.iteritems():
8982
      if value == constants.VALUE_AUTO:
8983
        self.op.beparams[param] = default_beparams[param]
8984
    objects.UpgradeBeParams(self.op.beparams)
8985
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8986
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8987

    
8988
    # build os parameters
8989
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8990

    
8991
    # now that hvp/bep are in final format, let's reset to defaults,
8992
    # if told to do so
8993
    if self.op.identify_defaults:
8994
      self._RevertToDefaults(cluster)
8995

    
8996
    # NIC buildup
8997
    self.nics = []
8998
    for idx, nic in enumerate(self.op.nics):
8999
      nic_mode_req = nic.get(constants.INIC_MODE, None)
9000
      nic_mode = nic_mode_req
9001
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
9002
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
9003

    
9004
      # in routed mode, for the first nic, the default ip is 'auto'
9005
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
9006
        default_ip_mode = constants.VALUE_AUTO
9007
      else:
9008
        default_ip_mode = constants.VALUE_NONE
9009

    
9010
      # ip validity checks
9011
      ip = nic.get(constants.INIC_IP, default_ip_mode)
9012
      if ip is None or ip.lower() == constants.VALUE_NONE:
9013
        nic_ip = None
9014
      elif ip.lower() == constants.VALUE_AUTO:
9015
        if not self.op.name_check:
9016
          raise errors.OpPrereqError("IP address set to auto but name checks"
9017
                                     " have been skipped",
9018
                                     errors.ECODE_INVAL)
9019
        nic_ip = self.hostname1.ip
9020
      else:
9021
        if not netutils.IPAddress.IsValid(ip):
9022
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
9023
                                     errors.ECODE_INVAL)
9024
        nic_ip = ip
9025

    
9026
      # TODO: check the ip address for uniqueness
9027
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
9028
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
9029
                                   errors.ECODE_INVAL)
9030

    
9031
      # MAC address verification
9032
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
9033
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9034
        mac = utils.NormalizeAndValidateMac(mac)
9035

    
9036
        try:
9037
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
9038
        except errors.ReservationError:
9039
          raise errors.OpPrereqError("MAC address %s already in use"
9040
                                     " in cluster" % mac,
9041
                                     errors.ECODE_NOTUNIQUE)
9042

    
9043
      #  Build nic parameters
9044
      link = nic.get(constants.INIC_LINK, None)
9045
      if link == constants.VALUE_AUTO:
9046
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
9047
      nicparams = {}
9048
      if nic_mode_req:
9049
        nicparams[constants.NIC_MODE] = nic_mode
9050
      if link:
9051
        nicparams[constants.NIC_LINK] = link
9052

    
9053
      check_params = cluster.SimpleFillNIC(nicparams)
9054
      objects.NIC.CheckParameterSyntax(check_params)
9055
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
9056

    
9057
    # disk checks/pre-build
9058
    default_vg = self.cfg.GetVGName()
9059
    self.disks = []
9060
    for disk in self.op.disks:
9061
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
9062
      if mode not in constants.DISK_ACCESS_SET:
9063
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
9064
                                   mode, errors.ECODE_INVAL)
9065
      size = disk.get(constants.IDISK_SIZE, None)
9066
      if size is None:
9067
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
9068
      try:
9069
        size = int(size)
9070
      except (TypeError, ValueError):
9071
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
9072
                                   errors.ECODE_INVAL)
9073

    
9074
      data_vg = disk.get(constants.IDISK_VG, default_vg)
9075
      new_disk = {
9076
        constants.IDISK_SIZE: size,
9077
        constants.IDISK_MODE: mode,
9078
        constants.IDISK_VG: data_vg,
9079
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9080
        }
9081
      if constants.IDISK_ADOPT in disk:
9082
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9083
      self.disks.append(new_disk)
9084

    
9085
    if self.op.mode == constants.INSTANCE_IMPORT:
9086
      disk_images = []
9087
      for idx in range(len(self.disks)):
9088
        option = "disk%d_dump" % idx
9089
        if export_info.has_option(constants.INISECT_INS, option):
9090
          # FIXME: are the old os-es, disk sizes, etc. useful?
9091
          export_name = export_info.get(constants.INISECT_INS, option)
9092
          image = utils.PathJoin(self.op.src_path, export_name)
9093
          disk_images.append(image)
9094
        else:
9095
          disk_images.append(False)
9096

    
9097
      self.src_images = disk_images
9098

    
9099
      old_name = export_info.get(constants.INISECT_INS, "name")
9100
      if self.op.instance_name == old_name:
9101
        for idx, nic in enumerate(self.nics):
9102
          if nic.mac == constants.VALUE_AUTO:
9103
            nic_mac_ini = "nic%d_mac" % idx
9104
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9105

    
9106
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9107

    
9108
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9109
    if self.op.ip_check:
9110
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9111
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9112
                                   (self.check_ip, self.op.instance_name),
9113
                                   errors.ECODE_NOTUNIQUE)
9114

    
9115
    #### mac address generation
9116
    # By generating here the mac address both the allocator and the hooks get
9117
    # the real final mac address rather than the 'auto' or 'generate' value.
9118
    # There is a race condition between the generation and the instance object
9119
    # creation, which means that we know the mac is valid now, but we're not
9120
    # sure it will be when we actually add the instance. If things go bad
9121
    # adding the instance will abort because of a duplicate mac, and the
9122
    # creation job will fail.
9123
    for nic in self.nics:
9124
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9125
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9126

    
9127
    #### allocator run
9128

    
9129
    if self.op.iallocator is not None:
9130
      self._RunAllocator()
9131

    
9132
    # Release all unneeded node locks
9133
    _ReleaseLocks(self, locking.LEVEL_NODE,
9134
                  keep=filter(None, [self.op.pnode, self.op.snode,
9135
                                     self.op.src_node]))
9136

    
9137
    #### node related checks
9138

    
9139
    # check primary node
9140
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9141
    assert self.pnode is not None, \
9142
      "Cannot retrieve locked node %s" % self.op.pnode
9143
    if pnode.offline:
9144
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9145
                                 pnode.name, errors.ECODE_STATE)
9146
    if pnode.drained:
9147
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9148
                                 pnode.name, errors.ECODE_STATE)
9149
    if not pnode.vm_capable:
9150
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9151
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9152

    
9153
    self.secondaries = []
9154

    
9155
    # mirror node verification
9156
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9157
      if self.op.snode == pnode.name:
9158
        raise errors.OpPrereqError("The secondary node cannot be the"
9159
                                   " primary node", errors.ECODE_INVAL)
9160
      _CheckNodeOnline(self, self.op.snode)
9161
      _CheckNodeNotDrained(self, self.op.snode)
9162
      _CheckNodeVmCapable(self, self.op.snode)
9163
      self.secondaries.append(self.op.snode)
9164

    
9165
    nodenames = [pnode.name] + self.secondaries
9166

    
9167
    if not self.adopt_disks:
9168
      # Check lv size requirements, if not adopting
9169
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9170
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9171

    
9172
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9173
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9174
                                disk[constants.IDISK_ADOPT])
9175
                     for disk in self.disks])
9176
      if len(all_lvs) != len(self.disks):
9177
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9178
                                   errors.ECODE_INVAL)
9179
      for lv_name in all_lvs:
9180
        try:
9181
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9182
          # to ReserveLV uses the same syntax
9183
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9184
        except errors.ReservationError:
9185
          raise errors.OpPrereqError("LV named %s used by another instance" %
9186
                                     lv_name, errors.ECODE_NOTUNIQUE)
9187

    
9188
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9189
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9190

    
9191
      node_lvs = self.rpc.call_lv_list([pnode.name],
9192
                                       vg_names.payload.keys())[pnode.name]
9193
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9194
      node_lvs = node_lvs.payload
9195

    
9196
      delta = all_lvs.difference(node_lvs.keys())
9197
      if delta:
9198
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9199
                                   utils.CommaJoin(delta),
9200
                                   errors.ECODE_INVAL)
9201
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9202
      if online_lvs:
9203
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9204
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9205
                                   errors.ECODE_STATE)
9206
      # update the size of disk based on what is found
9207
      for dsk in self.disks:
9208
        dsk[constants.IDISK_SIZE] = \
9209
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9210
                                        dsk[constants.IDISK_ADOPT])][0]))
9211

    
9212
    elif self.op.disk_template == constants.DT_BLOCK:
9213
      # Normalize and de-duplicate device paths
9214
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9215
                       for disk in self.disks])
9216
      if len(all_disks) != len(self.disks):
9217
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9218
                                   errors.ECODE_INVAL)
9219
      baddisks = [d for d in all_disks
9220
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9221
      if baddisks:
9222
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9223
                                   " cannot be adopted" %
9224
                                   (", ".join(baddisks),
9225
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9226
                                   errors.ECODE_INVAL)
9227

    
9228
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9229
                                            list(all_disks))[pnode.name]
9230
      node_disks.Raise("Cannot get block device information from node %s" %
9231
                       pnode.name)
9232
      node_disks = node_disks.payload
9233
      delta = all_disks.difference(node_disks.keys())
9234
      if delta:
9235
        raise errors.OpPrereqError("Missing block device(s): %s" %
9236
                                   utils.CommaJoin(delta),
9237
                                   errors.ECODE_INVAL)
9238
      for dsk in self.disks:
9239
        dsk[constants.IDISK_SIZE] = \
9240
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9241

    
9242
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9243

    
9244
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9245
    # check OS parameters (remotely)
9246
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9247

    
9248
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9249

    
9250
    # memory check on primary node
9251
    #TODO(dynmem): use MINMEM for checking
9252
    if self.op.start:
9253
      _CheckNodeFreeMemory(self, self.pnode.name,
9254
                           "creating instance %s" % self.op.instance_name,
9255
                           self.be_full[constants.BE_MAXMEM],
9256
                           self.op.hypervisor)
9257

    
9258
    self.dry_run_result = list(nodenames)
9259

    
9260
  def Exec(self, feedback_fn):
9261
    """Create and add the instance to the cluster.
9262

9263
    """
9264
    instance = self.op.instance_name
9265
    pnode_name = self.pnode.name
9266

    
9267
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9268
                self.owned_locks(locking.LEVEL_NODE)), \
9269
      "Node locks differ from node resource locks"
9270

    
9271
    ht_kind = self.op.hypervisor
9272
    if ht_kind in constants.HTS_REQ_PORT:
9273
      network_port = self.cfg.AllocatePort()
9274
    else:
9275
      network_port = None
9276

    
9277
    disks = _GenerateDiskTemplate(self,
9278
                                  self.op.disk_template,
9279
                                  instance, pnode_name,
9280
                                  self.secondaries,
9281
                                  self.disks,
9282
                                  self.instance_file_storage_dir,
9283
                                  self.op.file_driver,
9284
                                  0,
9285
                                  feedback_fn)
9286

    
9287
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9288
                            primary_node=pnode_name,
9289
                            nics=self.nics, disks=disks,
9290
                            disk_template=self.op.disk_template,
9291
                            admin_state=constants.ADMINST_DOWN,
9292
                            network_port=network_port,
9293
                            beparams=self.op.beparams,
9294
                            hvparams=self.op.hvparams,
9295
                            hypervisor=self.op.hypervisor,
9296
                            osparams=self.op.osparams,
9297
                            )
9298

    
9299
    if self.op.tags:
9300
      for tag in self.op.tags:
9301
        iobj.AddTag(tag)
9302

    
9303
    if self.adopt_disks:
9304
      if self.op.disk_template == constants.DT_PLAIN:
9305
        # rename LVs to the newly-generated names; we need to construct
9306
        # 'fake' LV disks with the old data, plus the new unique_id
9307
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9308
        rename_to = []
9309
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9310
          rename_to.append(t_dsk.logical_id)
9311
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9312
          self.cfg.SetDiskID(t_dsk, pnode_name)
9313
        result = self.rpc.call_blockdev_rename(pnode_name,
9314
                                               zip(tmp_disks, rename_to))
9315
        result.Raise("Failed to rename adoped LVs")
9316
    else:
9317
      feedback_fn("* creating instance disks...")
9318
      try:
9319
        _CreateDisks(self, iobj)
9320
      except errors.OpExecError:
9321
        self.LogWarning("Device creation failed, reverting...")
9322
        try:
9323
          _RemoveDisks(self, iobj)
9324
        finally:
9325
          self.cfg.ReleaseDRBDMinors(instance)
9326
          raise
9327

    
9328
    feedback_fn("adding instance %s to cluster config" % instance)
9329

    
9330
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9331

    
9332
    # Declare that we don't want to remove the instance lock anymore, as we've
9333
    # added the instance to the config
9334
    del self.remove_locks[locking.LEVEL_INSTANCE]
9335

    
9336
    if self.op.mode == constants.INSTANCE_IMPORT:
9337
      # Release unused nodes
9338
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9339
    else:
9340
      # Release all nodes
9341
      _ReleaseLocks(self, locking.LEVEL_NODE)
9342

    
9343
    disk_abort = False
9344
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9345
      feedback_fn("* wiping instance disks...")
9346
      try:
9347
        _WipeDisks(self, iobj)
9348
      except errors.OpExecError, err:
9349
        logging.exception("Wiping disks failed")
9350
        self.LogWarning("Wiping instance disks failed (%s)", err)
9351
        disk_abort = True
9352

    
9353
    if disk_abort:
9354
      # Something is already wrong with the disks, don't do anything else
9355
      pass
9356
    elif self.op.wait_for_sync:
9357
      disk_abort = not _WaitForSync(self, iobj)
9358
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9359
      # make sure the disks are not degraded (still sync-ing is ok)
9360
      feedback_fn("* checking mirrors status")
9361
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9362
    else:
9363
      disk_abort = False
9364

    
9365
    if disk_abort:
9366
      _RemoveDisks(self, iobj)
9367
      self.cfg.RemoveInstance(iobj.name)
9368
      # Make sure the instance lock gets removed
9369
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9370
      raise errors.OpExecError("There are some degraded disks for"
9371
                               " this instance")
9372

    
9373
    # Release all node resource locks
9374
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9375

    
9376
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9377
      if self.op.mode == constants.INSTANCE_CREATE:
9378
        if not self.op.no_install:
9379
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9380
                        not self.op.wait_for_sync)
9381
          if pause_sync:
9382
            feedback_fn("* pausing disk sync to install instance OS")
9383
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9384
                                                              iobj.disks, True)
9385
            for idx, success in enumerate(result.payload):
9386
              if not success:
9387
                logging.warn("pause-sync of instance %s for disk %d failed",
9388
                             instance, idx)
9389

    
9390
          feedback_fn("* running the instance OS create scripts...")
9391
          # FIXME: pass debug option from opcode to backend
9392
          os_add_result = \
9393
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9394
                                          self.op.debug_level)
9395
          if pause_sync:
9396
            feedback_fn("* resuming disk sync")
9397
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9398
                                                              iobj.disks, False)
9399
            for idx, success in enumerate(result.payload):
9400
              if not success:
9401
                logging.warn("resume-sync of instance %s for disk %d failed",
9402
                             instance, idx)
9403

    
9404
          os_add_result.Raise("Could not add os for instance %s"
9405
                              " on node %s" % (instance, pnode_name))
9406

    
9407
      elif self.op.mode == constants.INSTANCE_IMPORT:
9408
        feedback_fn("* running the instance OS import scripts...")
9409

    
9410
        transfers = []
9411

    
9412
        for idx, image in enumerate(self.src_images):
9413
          if not image:
9414
            continue
9415

    
9416
          # FIXME: pass debug option from opcode to backend
9417
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9418
                                             constants.IEIO_FILE, (image, ),
9419
                                             constants.IEIO_SCRIPT,
9420
                                             (iobj.disks[idx], idx),
9421
                                             None)
9422
          transfers.append(dt)
9423

    
9424
        import_result = \
9425
          masterd.instance.TransferInstanceData(self, feedback_fn,
9426
                                                self.op.src_node, pnode_name,
9427
                                                self.pnode.secondary_ip,
9428
                                                iobj, transfers)
9429
        if not compat.all(import_result):
9430
          self.LogWarning("Some disks for instance %s on node %s were not"
9431
                          " imported successfully" % (instance, pnode_name))
9432

    
9433
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9434
        feedback_fn("* preparing remote import...")
9435
        # The source cluster will stop the instance before attempting to make a
9436
        # connection. In some cases stopping an instance can take a long time,
9437
        # hence the shutdown timeout is added to the connection timeout.
9438
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9439
                           self.op.source_shutdown_timeout)
9440
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9441

    
9442
        assert iobj.primary_node == self.pnode.name
9443
        disk_results = \
9444
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9445
                                        self.source_x509_ca,
9446
                                        self._cds, timeouts)
9447
        if not compat.all(disk_results):
9448
          # TODO: Should the instance still be started, even if some disks
9449
          # failed to import (valid for local imports, too)?
9450
          self.LogWarning("Some disks for instance %s on node %s were not"
9451
                          " imported successfully" % (instance, pnode_name))
9452

    
9453
        # Run rename script on newly imported instance
9454
        assert iobj.name == instance
9455
        feedback_fn("Running rename script for %s" % instance)
9456
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9457
                                                   self.source_instance_name,
9458
                                                   self.op.debug_level)
9459
        if result.fail_msg:
9460
          self.LogWarning("Failed to run rename script for %s on node"
9461
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9462

    
9463
      else:
9464
        # also checked in the prereq part
9465
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9466
                                     % self.op.mode)
9467

    
9468
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9469

    
9470
    if self.op.start:
9471
      iobj.admin_state = constants.ADMINST_UP
9472
      self.cfg.Update(iobj, feedback_fn)
9473
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9474
      feedback_fn("* starting instance...")
9475
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9476
                                            False)
9477
      result.Raise("Could not start instance")
9478

    
9479
    return list(iobj.all_nodes)
9480

    
9481

    
9482
class LUInstanceConsole(NoHooksLU):
9483
  """Connect to an instance's console.
9484

9485
  This is somewhat special in that it returns the command line that
9486
  you need to run on the master node in order to connect to the
9487
  console.
9488

9489
  """
9490
  REQ_BGL = False
9491

    
9492
  def ExpandNames(self):
9493
    self.share_locks = _ShareAll()
9494
    self._ExpandAndLockInstance()
9495

    
9496
  def CheckPrereq(self):
9497
    """Check prerequisites.
9498

9499
    This checks that the instance is in the cluster.
9500

9501
    """
9502
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9503
    assert self.instance is not None, \
9504
      "Cannot retrieve locked instance %s" % self.op.instance_name
9505
    _CheckNodeOnline(self, self.instance.primary_node)
9506

    
9507
  def Exec(self, feedback_fn):
9508
    """Connect to the console of an instance
9509

9510
    """
9511
    instance = self.instance
9512
    node = instance.primary_node
9513

    
9514
    node_insts = self.rpc.call_instance_list([node],
9515
                                             [instance.hypervisor])[node]
9516
    node_insts.Raise("Can't get node information from %s" % node)
9517

    
9518
    if instance.name not in node_insts.payload:
9519
      if instance.admin_state == constants.ADMINST_UP:
9520
        state = constants.INSTST_ERRORDOWN
9521
      elif instance.admin_state == constants.ADMINST_DOWN:
9522
        state = constants.INSTST_ADMINDOWN
9523
      else:
9524
        state = constants.INSTST_ADMINOFFLINE
9525
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9526
                               (instance.name, state))
9527

    
9528
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9529

    
9530
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9531

    
9532

    
9533
def _GetInstanceConsole(cluster, instance):
9534
  """Returns console information for an instance.
9535

9536
  @type cluster: L{objects.Cluster}
9537
  @type instance: L{objects.Instance}
9538
  @rtype: dict
9539

9540
  """
9541
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9542
  # beparams and hvparams are passed separately, to avoid editing the
9543
  # instance and then saving the defaults in the instance itself.
9544
  hvparams = cluster.FillHV(instance)
9545
  beparams = cluster.FillBE(instance)
9546
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9547

    
9548
  assert console.instance == instance.name
9549
  assert console.Validate()
9550

    
9551
  return console.ToDict()
9552

    
9553

    
9554
class LUInstanceReplaceDisks(LogicalUnit):
9555
  """Replace the disks of an instance.
9556

9557
  """
9558
  HPATH = "mirrors-replace"
9559
  HTYPE = constants.HTYPE_INSTANCE
9560
  REQ_BGL = False
9561

    
9562
  def CheckArguments(self):
9563
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9564
                                  self.op.iallocator)
9565

    
9566
  def ExpandNames(self):
9567
    self._ExpandAndLockInstance()
9568

    
9569
    assert locking.LEVEL_NODE not in self.needed_locks
9570
    assert locking.LEVEL_NODE_RES not in self.needed_locks
9571
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9572

    
9573
    assert self.op.iallocator is None or self.op.remote_node is None, \
9574
      "Conflicting options"
9575

    
9576
    if self.op.remote_node is not None:
9577
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9578

    
9579
      # Warning: do not remove the locking of the new secondary here
9580
      # unless DRBD8.AddChildren is changed to work in parallel;
9581
      # currently it doesn't since parallel invocations of
9582
      # FindUnusedMinor will conflict
9583
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9584
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9585
    else:
9586
      self.needed_locks[locking.LEVEL_NODE] = []
9587
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9588

    
9589
      if self.op.iallocator is not None:
9590
        # iallocator will select a new node in the same group
9591
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9592

    
9593
    self.needed_locks[locking.LEVEL_NODE_RES] = []
9594

    
9595
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9596
                                   self.op.iallocator, self.op.remote_node,
9597
                                   self.op.disks, False, self.op.early_release)
9598

    
9599
    self.tasklets = [self.replacer]
9600

    
9601
  def DeclareLocks(self, level):
9602
    if level == locking.LEVEL_NODEGROUP:
9603
      assert self.op.remote_node is None
9604
      assert self.op.iallocator is not None
9605
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9606

    
9607
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9608
      # Lock all groups used by instance optimistically; this requires going
9609
      # via the node before it's locked, requiring verification later on
9610
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9611
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9612

    
9613
    elif level == locking.LEVEL_NODE:
9614
      if self.op.iallocator is not None:
9615
        assert self.op.remote_node is None
9616
        assert not self.needed_locks[locking.LEVEL_NODE]
9617

    
9618
        # Lock member nodes of all locked groups
9619
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9620
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9621
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9622
      else:
9623
        self._LockInstancesNodes()
9624
    elif level == locking.LEVEL_NODE_RES:
9625
      # Reuse node locks
9626
      self.needed_locks[locking.LEVEL_NODE_RES] = \
9627
        self.needed_locks[locking.LEVEL_NODE]
9628

    
9629
  def BuildHooksEnv(self):
9630
    """Build hooks env.
9631

9632
    This runs on the master, the primary and all the secondaries.
9633

9634
    """
9635
    instance = self.replacer.instance
9636
    env = {
9637
      "MODE": self.op.mode,
9638
      "NEW_SECONDARY": self.op.remote_node,
9639
      "OLD_SECONDARY": instance.secondary_nodes[0],
9640
      }
9641
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9642
    return env
9643

    
9644
  def BuildHooksNodes(self):
9645
    """Build hooks nodes.
9646

9647
    """
9648
    instance = self.replacer.instance
9649
    nl = [
9650
      self.cfg.GetMasterNode(),
9651
      instance.primary_node,
9652
      ]
9653
    if self.op.remote_node is not None:
9654
      nl.append(self.op.remote_node)
9655
    return nl, nl
9656

    
9657
  def CheckPrereq(self):
9658
    """Check prerequisites.
9659

9660
    """
9661
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9662
            self.op.iallocator is None)
9663

    
9664
    # Verify if node group locks are still correct
9665
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9666
    if owned_groups:
9667
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9668

    
9669
    return LogicalUnit.CheckPrereq(self)
9670

    
9671

    
9672
class TLReplaceDisks(Tasklet):
9673
  """Replaces disks for an instance.
9674

9675
  Note: Locking is not within the scope of this class.
9676

9677
  """
9678
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9679
               disks, delay_iallocator, early_release):
9680
    """Initializes this class.
9681

9682
    """
9683
    Tasklet.__init__(self, lu)
9684

    
9685
    # Parameters
9686
    self.instance_name = instance_name
9687
    self.mode = mode
9688
    self.iallocator_name = iallocator_name
9689
    self.remote_node = remote_node
9690
    self.disks = disks
9691
    self.delay_iallocator = delay_iallocator
9692
    self.early_release = early_release
9693

    
9694
    # Runtime data
9695
    self.instance = None
9696
    self.new_node = None
9697
    self.target_node = None
9698
    self.other_node = None
9699
    self.remote_node_info = None
9700
    self.node_secondary_ip = None
9701

    
9702
  @staticmethod
9703
  def CheckArguments(mode, remote_node, iallocator):
9704
    """Helper function for users of this class.
9705

9706
    """
9707
    # check for valid parameter combination
9708
    if mode == constants.REPLACE_DISK_CHG:
9709
      if remote_node is None and iallocator is None:
9710
        raise errors.OpPrereqError("When changing the secondary either an"
9711
                                   " iallocator script must be used or the"
9712
                                   " new node given", errors.ECODE_INVAL)
9713

    
9714
      if remote_node is not None and iallocator is not None:
9715
        raise errors.OpPrereqError("Give either the iallocator or the new"
9716
                                   " secondary, not both", errors.ECODE_INVAL)
9717

    
9718
    elif remote_node is not None or iallocator is not None:
9719
      # Not replacing the secondary
9720
      raise errors.OpPrereqError("The iallocator and new node options can"
9721
                                 " only be used when changing the"
9722
                                 " secondary node", errors.ECODE_INVAL)
9723

    
9724
  @staticmethod
9725
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9726
    """Compute a new secondary node using an IAllocator.
9727

9728
    """
9729
    ial = IAllocator(lu.cfg, lu.rpc,
9730
                     mode=constants.IALLOCATOR_MODE_RELOC,
9731
                     name=instance_name,
9732
                     relocate_from=list(relocate_from))
9733

    
9734
    ial.Run(iallocator_name)
9735

    
9736
    if not ial.success:
9737
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9738
                                 " %s" % (iallocator_name, ial.info),
9739
                                 errors.ECODE_NORES)
9740

    
9741
    if len(ial.result) != ial.required_nodes:
9742
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9743
                                 " of nodes (%s), required %s" %
9744
                                 (iallocator_name,
9745
                                  len(ial.result), ial.required_nodes),
9746
                                 errors.ECODE_FAULT)
9747

    
9748
    remote_node_name = ial.result[0]
9749

    
9750
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9751
               instance_name, remote_node_name)
9752

    
9753
    return remote_node_name
9754

    
9755
  def _FindFaultyDisks(self, node_name):
9756
    """Wrapper for L{_FindFaultyInstanceDisks}.
9757

9758
    """
9759
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9760
                                    node_name, True)
9761

    
9762
  def _CheckDisksActivated(self, instance):
9763
    """Checks if the instance disks are activated.
9764

9765
    @param instance: The instance to check disks
9766
    @return: True if they are activated, False otherwise
9767

9768
    """
9769
    nodes = instance.all_nodes
9770

    
9771
    for idx, dev in enumerate(instance.disks):
9772
      for node in nodes:
9773
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9774
        self.cfg.SetDiskID(dev, node)
9775

    
9776
        result = self.rpc.call_blockdev_find(node, dev)
9777

    
9778
        if result.offline:
9779
          continue
9780
        elif result.fail_msg or not result.payload:
9781
          return False
9782

    
9783
    return True
9784

    
9785
  def CheckPrereq(self):
9786
    """Check prerequisites.
9787

9788
    This checks that the instance is in the cluster.
9789

9790
    """
9791
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9792
    assert instance is not None, \
9793
      "Cannot retrieve locked instance %s" % self.instance_name
9794

    
9795
    if instance.disk_template != constants.DT_DRBD8:
9796
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9797
                                 " instances", errors.ECODE_INVAL)
9798

    
9799
    if len(instance.secondary_nodes) != 1:
9800
      raise errors.OpPrereqError("The instance has a strange layout,"
9801
                                 " expected one secondary but found %d" %
9802
                                 len(instance.secondary_nodes),
9803
                                 errors.ECODE_FAULT)
9804

    
9805
    if not self.delay_iallocator:
9806
      self._CheckPrereq2()
9807

    
9808
  def _CheckPrereq2(self):
9809
    """Check prerequisites, second part.
9810

9811
    This function should always be part of CheckPrereq. It was separated and is
9812
    now called from Exec because during node evacuation iallocator was only
9813
    called with an unmodified cluster model, not taking planned changes into
9814
    account.
9815

9816
    """
9817
    instance = self.instance
9818
    secondary_node = instance.secondary_nodes[0]
9819

    
9820
    if self.iallocator_name is None:
9821
      remote_node = self.remote_node
9822
    else:
9823
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9824
                                       instance.name, instance.secondary_nodes)
9825

    
9826
    if remote_node is None:
9827
      self.remote_node_info = None
9828
    else:
9829
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9830
             "Remote node '%s' is not locked" % remote_node
9831

    
9832
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9833
      assert self.remote_node_info is not None, \
9834
        "Cannot retrieve locked node %s" % remote_node
9835

    
9836
    if remote_node == self.instance.primary_node:
9837
      raise errors.OpPrereqError("The specified node is the primary node of"
9838
                                 " the instance", errors.ECODE_INVAL)
9839

    
9840
    if remote_node == secondary_node:
9841
      raise errors.OpPrereqError("The specified node is already the"
9842
                                 " secondary node of the instance",
9843
                                 errors.ECODE_INVAL)
9844

    
9845
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9846
                                    constants.REPLACE_DISK_CHG):
9847
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9848
                                 errors.ECODE_INVAL)
9849

    
9850
    if self.mode == constants.REPLACE_DISK_AUTO:
9851
      if not self._CheckDisksActivated(instance):
9852
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9853
                                   " first" % self.instance_name,
9854
                                   errors.ECODE_STATE)
9855
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9856
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9857

    
9858
      if faulty_primary and faulty_secondary:
9859
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9860
                                   " one node and can not be repaired"
9861
                                   " automatically" % self.instance_name,
9862
                                   errors.ECODE_STATE)
9863

    
9864
      if faulty_primary:
9865
        self.disks = faulty_primary
9866
        self.target_node = instance.primary_node
9867
        self.other_node = secondary_node
9868
        check_nodes = [self.target_node, self.other_node]
9869
      elif faulty_secondary:
9870
        self.disks = faulty_secondary
9871
        self.target_node = secondary_node
9872
        self.other_node = instance.primary_node
9873
        check_nodes = [self.target_node, self.other_node]
9874
      else:
9875
        self.disks = []
9876
        check_nodes = []
9877

    
9878
    else:
9879
      # Non-automatic modes
9880
      if self.mode == constants.REPLACE_DISK_PRI:
9881
        self.target_node = instance.primary_node
9882
        self.other_node = secondary_node
9883
        check_nodes = [self.target_node, self.other_node]
9884

    
9885
      elif self.mode == constants.REPLACE_DISK_SEC:
9886
        self.target_node = secondary_node
9887
        self.other_node = instance.primary_node
9888
        check_nodes = [self.target_node, self.other_node]
9889

    
9890
      elif self.mode == constants.REPLACE_DISK_CHG:
9891
        self.new_node = remote_node
9892
        self.other_node = instance.primary_node
9893
        self.target_node = secondary_node
9894
        check_nodes = [self.new_node, self.other_node]
9895

    
9896
        _CheckNodeNotDrained(self.lu, remote_node)
9897
        _CheckNodeVmCapable(self.lu, remote_node)
9898

    
9899
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9900
        assert old_node_info is not None
9901
        if old_node_info.offline and not self.early_release:
9902
          # doesn't make sense to delay the release
9903
          self.early_release = True
9904
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9905
                          " early-release mode", secondary_node)
9906

    
9907
      else:
9908
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9909
                                     self.mode)
9910

    
9911
      # If not specified all disks should be replaced
9912
      if not self.disks:
9913
        self.disks = range(len(self.instance.disks))
9914

    
9915
    for node in check_nodes:
9916
      _CheckNodeOnline(self.lu, node)
9917

    
9918
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9919
                                                          self.other_node,
9920
                                                          self.target_node]
9921
                              if node_name is not None)
9922

    
9923
    # Release unneeded node and node resource locks
9924
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9925
    _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES, keep=touched_nodes)
9926

    
9927
    # Release any owned node group
9928
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9929
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9930

    
9931
    # Check whether disks are valid
9932
    for disk_idx in self.disks:
9933
      instance.FindDisk(disk_idx)
9934

    
9935
    # Get secondary node IP addresses
9936
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9937
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9938

    
9939
  def Exec(self, feedback_fn):
9940
    """Execute disk replacement.
9941

9942
    This dispatches the disk replacement to the appropriate handler.
9943

9944
    """
9945
    if self.delay_iallocator:
9946
      self._CheckPrereq2()
9947

    
9948
    if __debug__:
9949
      # Verify owned locks before starting operation
9950
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9951
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9952
          ("Incorrect node locks, owning %s, expected %s" %
9953
           (owned_nodes, self.node_secondary_ip.keys()))
9954
      assert (self.lu.owned_locks(locking.LEVEL_NODE) ==
9955
              self.lu.owned_locks(locking.LEVEL_NODE_RES))
9956

    
9957
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9958
      assert list(owned_instances) == [self.instance_name], \
9959
          "Instance '%s' not locked" % self.instance_name
9960

    
9961
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9962
          "Should not own any node group lock at this point"
9963

    
9964
    if not self.disks:
9965
      feedback_fn("No disks need replacement")
9966
      return
9967

    
9968
    feedback_fn("Replacing disk(s) %s for %s" %
9969
                (utils.CommaJoin(self.disks), self.instance.name))
9970

    
9971
    activate_disks = (self.instance.admin_state != constants.ADMINST_UP)
9972

    
9973
    # Activate the instance disks if we're replacing them on a down instance
9974
    if activate_disks:
9975
      _StartInstanceDisks(self.lu, self.instance, True)
9976

    
9977
    try:
9978
      # Should we replace the secondary node?
9979
      if self.new_node is not None:
9980
        fn = self._ExecDrbd8Secondary
9981
      else:
9982
        fn = self._ExecDrbd8DiskOnly
9983

    
9984
      result = fn(feedback_fn)
9985
    finally:
9986
      # Deactivate the instance disks if we're replacing them on a
9987
      # down instance
9988
      if activate_disks:
9989
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9990

    
9991
    assert not self.lu.owned_locks(locking.LEVEL_NODE)
9992

    
9993
    if __debug__:
9994
      # Verify owned locks
9995
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE_RES)
9996
      nodes = frozenset(self.node_secondary_ip)
9997
      assert ((self.early_release and not owned_nodes) or
9998
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9999
        ("Not owning the correct locks, early_release=%s, owned=%r,"
10000
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
10001

    
10002
    return result
10003

    
10004
  def _CheckVolumeGroup(self, nodes):
10005
    self.lu.LogInfo("Checking volume groups")
10006

    
10007
    vgname = self.cfg.GetVGName()
10008

    
10009
    # Make sure volume group exists on all involved nodes
10010
    results = self.rpc.call_vg_list(nodes)
10011
    if not results:
10012
      raise errors.OpExecError("Can't list volume groups on the nodes")
10013

    
10014
    for node in nodes:
10015
      res = results[node]
10016
      res.Raise("Error checking node %s" % node)
10017
      if vgname not in res.payload:
10018
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
10019
                                 (vgname, node))
10020

    
10021
  def _CheckDisksExistence(self, nodes):
10022
    # Check disk existence
10023
    for idx, dev in enumerate(self.instance.disks):
10024
      if idx not in self.disks:
10025
        continue
10026

    
10027
      for node in nodes:
10028
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
10029
        self.cfg.SetDiskID(dev, node)
10030

    
10031
        result = self.rpc.call_blockdev_find(node, dev)
10032

    
10033
        msg = result.fail_msg
10034
        if msg or not result.payload:
10035
          if not msg:
10036
            msg = "disk not found"
10037
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
10038
                                   (idx, node, msg))
10039

    
10040
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
10041
    for idx, dev in enumerate(self.instance.disks):
10042
      if idx not in self.disks:
10043
        continue
10044

    
10045
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
10046
                      (idx, node_name))
10047

    
10048
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
10049
                                   ldisk=ldisk):
10050
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
10051
                                 " replace disks for instance %s" %
10052
                                 (node_name, self.instance.name))
10053

    
10054
  def _CreateNewStorage(self, node_name):
10055
    """Create new storage on the primary or secondary node.
10056

10057
    This is only used for same-node replaces, not for changing the
10058
    secondary node, hence we don't want to modify the existing disk.
10059

10060
    """
10061
    iv_names = {}
10062

    
10063
    for idx, dev in enumerate(self.instance.disks):
10064
      if idx not in self.disks:
10065
        continue
10066

    
10067
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
10068

    
10069
      self.cfg.SetDiskID(dev, node_name)
10070

    
10071
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
10072
      names = _GenerateUniqueNames(self.lu, lv_names)
10073

    
10074
      vg_data = dev.children[0].logical_id[0]
10075
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
10076
                             logical_id=(vg_data, names[0]))
10077
      vg_meta = dev.children[1].logical_id[0]
10078
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
10079
                             logical_id=(vg_meta, names[1]))
10080

    
10081
      new_lvs = [lv_data, lv_meta]
10082
      old_lvs = [child.Copy() for child in dev.children]
10083
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
10084

    
10085
      # we pass force_create=True to force the LVM creation
10086
      for new_lv in new_lvs:
10087
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
10088
                        _GetInstanceInfoText(self.instance), False)
10089

    
10090
    return iv_names
10091

    
10092
  def _CheckDevices(self, node_name, iv_names):
10093
    for name, (dev, _, _) in iv_names.iteritems():
10094
      self.cfg.SetDiskID(dev, node_name)
10095

    
10096
      result = self.rpc.call_blockdev_find(node_name, dev)
10097

    
10098
      msg = result.fail_msg
10099
      if msg or not result.payload:
10100
        if not msg:
10101
          msg = "disk not found"
10102
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10103
                                 (name, msg))
10104

    
10105
      if result.payload.is_degraded:
10106
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10107

    
10108
  def _RemoveOldStorage(self, node_name, iv_names):
10109
    for name, (_, old_lvs, _) in iv_names.iteritems():
10110
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10111

    
10112
      for lv in old_lvs:
10113
        self.cfg.SetDiskID(lv, node_name)
10114

    
10115
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10116
        if msg:
10117
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10118
                             hint="remove unused LVs manually")
10119

    
10120
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10121
    """Replace a disk on the primary or secondary for DRBD 8.
10122

10123
    The algorithm for replace is quite complicated:
10124

10125
      1. for each disk to be replaced:
10126

10127
        1. create new LVs on the target node with unique names
10128
        1. detach old LVs from the drbd device
10129
        1. rename old LVs to name_replaced.<time_t>
10130
        1. rename new LVs to old LVs
10131
        1. attach the new LVs (with the old names now) to the drbd device
10132

10133
      1. wait for sync across all devices
10134

10135
      1. for each modified disk:
10136

10137
        1. remove old LVs (which have the name name_replaces.<time_t>)
10138

10139
    Failures are not very well handled.
10140

10141
    """
10142
    steps_total = 6
10143

    
10144
    # Step: check device activation
10145
    self.lu.LogStep(1, steps_total, "Check device existence")
10146
    self._CheckDisksExistence([self.other_node, self.target_node])
10147
    self._CheckVolumeGroup([self.target_node, self.other_node])
10148

    
10149
    # Step: check other node consistency
10150
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10151
    self._CheckDisksConsistency(self.other_node,
10152
                                self.other_node == self.instance.primary_node,
10153
                                False)
10154

    
10155
    # Step: create new storage
10156
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10157
    iv_names = self._CreateNewStorage(self.target_node)
10158

    
10159
    # Step: for each lv, detach+rename*2+attach
10160
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10161
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10162
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10163

    
10164
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10165
                                                     old_lvs)
10166
      result.Raise("Can't detach drbd from local storage on node"
10167
                   " %s for device %s" % (self.target_node, dev.iv_name))
10168
      #dev.children = []
10169
      #cfg.Update(instance)
10170

    
10171
      # ok, we created the new LVs, so now we know we have the needed
10172
      # storage; as such, we proceed on the target node to rename
10173
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10174
      # using the assumption that logical_id == physical_id (which in
10175
      # turn is the unique_id on that node)
10176

    
10177
      # FIXME(iustin): use a better name for the replaced LVs
10178
      temp_suffix = int(time.time())
10179
      ren_fn = lambda d, suff: (d.physical_id[0],
10180
                                d.physical_id[1] + "_replaced-%s" % suff)
10181

    
10182
      # Build the rename list based on what LVs exist on the node
10183
      rename_old_to_new = []
10184
      for to_ren in old_lvs:
10185
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10186
        if not result.fail_msg and result.payload:
10187
          # device exists
10188
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10189

    
10190
      self.lu.LogInfo("Renaming the old LVs on the target node")
10191
      result = self.rpc.call_blockdev_rename(self.target_node,
10192
                                             rename_old_to_new)
10193
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10194

    
10195
      # Now we rename the new LVs to the old LVs
10196
      self.lu.LogInfo("Renaming the new LVs on the target node")
10197
      rename_new_to_old = [(new, old.physical_id)
10198
                           for old, new in zip(old_lvs, new_lvs)]
10199
      result = self.rpc.call_blockdev_rename(self.target_node,
10200
                                             rename_new_to_old)
10201
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10202

    
10203
      # Intermediate steps of in memory modifications
10204
      for old, new in zip(old_lvs, new_lvs):
10205
        new.logical_id = old.logical_id
10206
        self.cfg.SetDiskID(new, self.target_node)
10207

    
10208
      # We need to modify old_lvs so that removal later removes the
10209
      # right LVs, not the newly added ones; note that old_lvs is a
10210
      # copy here
10211
      for disk in old_lvs:
10212
        disk.logical_id = ren_fn(disk, temp_suffix)
10213
        self.cfg.SetDiskID(disk, self.target_node)
10214

    
10215
      # Now that the new lvs have the old name, we can add them to the device
10216
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10217
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10218
                                                  new_lvs)
10219
      msg = result.fail_msg
10220
      if msg:
10221
        for new_lv in new_lvs:
10222
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10223
                                               new_lv).fail_msg
10224
          if msg2:
10225
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10226
                               hint=("cleanup manually the unused logical"
10227
                                     "volumes"))
10228
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10229

    
10230
    cstep = itertools.count(5)
10231

    
10232
    if self.early_release:
10233
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10234
      self._RemoveOldStorage(self.target_node, iv_names)
10235
      # TODO: Check if releasing locks early still makes sense
10236
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10237
    else:
10238
      # Release all resource locks except those used by the instance
10239
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10240
                    keep=self.node_secondary_ip.keys())
10241

    
10242
    # Release all node locks while waiting for sync
10243
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10244

    
10245
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10246
    # shutdown in the caller into consideration.
10247

    
10248
    # Wait for sync
10249
    # This can fail as the old devices are degraded and _WaitForSync
10250
    # does a combined result over all disks, so we don't check its return value
10251
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10252
    _WaitForSync(self.lu, self.instance)
10253

    
10254
    # Check all devices manually
10255
    self._CheckDevices(self.instance.primary_node, iv_names)
10256

    
10257
    # Step: remove old storage
10258
    if not self.early_release:
10259
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10260
      self._RemoveOldStorage(self.target_node, iv_names)
10261

    
10262
  def _ExecDrbd8Secondary(self, feedback_fn):
10263
    """Replace the secondary node for DRBD 8.
10264

10265
    The algorithm for replace is quite complicated:
10266
      - for all disks of the instance:
10267
        - create new LVs on the new node with same names
10268
        - shutdown the drbd device on the old secondary
10269
        - disconnect the drbd network on the primary
10270
        - create the drbd device on the new secondary
10271
        - network attach the drbd on the primary, using an artifice:
10272
          the drbd code for Attach() will connect to the network if it
10273
          finds a device which is connected to the good local disks but
10274
          not network enabled
10275
      - wait for sync across all devices
10276
      - remove all disks from the old secondary
10277

10278
    Failures are not very well handled.
10279

10280
    """
10281
    steps_total = 6
10282

    
10283
    pnode = self.instance.primary_node
10284

    
10285
    # Step: check device activation
10286
    self.lu.LogStep(1, steps_total, "Check device existence")
10287
    self._CheckDisksExistence([self.instance.primary_node])
10288
    self._CheckVolumeGroup([self.instance.primary_node])
10289

    
10290
    # Step: check other node consistency
10291
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10292
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10293

    
10294
    # Step: create new storage
10295
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10296
    for idx, dev in enumerate(self.instance.disks):
10297
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10298
                      (self.new_node, idx))
10299
      # we pass force_create=True to force LVM creation
10300
      for new_lv in dev.children:
10301
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10302
                        _GetInstanceInfoText(self.instance), False)
10303

    
10304
    # Step 4: dbrd minors and drbd setups changes
10305
    # after this, we must manually remove the drbd minors on both the
10306
    # error and the success paths
10307
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10308
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10309
                                         for dev in self.instance.disks],
10310
                                        self.instance.name)
10311
    logging.debug("Allocated minors %r", minors)
10312

    
10313
    iv_names = {}
10314
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10315
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10316
                      (self.new_node, idx))
10317
      # create new devices on new_node; note that we create two IDs:
10318
      # one without port, so the drbd will be activated without
10319
      # networking information on the new node at this stage, and one
10320
      # with network, for the latter activation in step 4
10321
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10322
      if self.instance.primary_node == o_node1:
10323
        p_minor = o_minor1
10324
      else:
10325
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10326
        p_minor = o_minor2
10327

    
10328
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10329
                      p_minor, new_minor, o_secret)
10330
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10331
                    p_minor, new_minor, o_secret)
10332

    
10333
      iv_names[idx] = (dev, dev.children, new_net_id)
10334
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10335
                    new_net_id)
10336
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10337
                              logical_id=new_alone_id,
10338
                              children=dev.children,
10339
                              size=dev.size)
10340
      try:
10341
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10342
                              _GetInstanceInfoText(self.instance), False)
10343
      except errors.GenericError:
10344
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10345
        raise
10346

    
10347
    # We have new devices, shutdown the drbd on the old secondary
10348
    for idx, dev in enumerate(self.instance.disks):
10349
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10350
      self.cfg.SetDiskID(dev, self.target_node)
10351
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10352
      if msg:
10353
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10354
                           "node: %s" % (idx, msg),
10355
                           hint=("Please cleanup this device manually as"
10356
                                 " soon as possible"))
10357

    
10358
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10359
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10360
                                               self.instance.disks)[pnode]
10361

    
10362
    msg = result.fail_msg
10363
    if msg:
10364
      # detaches didn't succeed (unlikely)
10365
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10366
      raise errors.OpExecError("Can't detach the disks from the network on"
10367
                               " old node: %s" % (msg,))
10368

    
10369
    # if we managed to detach at least one, we update all the disks of
10370
    # the instance to point to the new secondary
10371
    self.lu.LogInfo("Updating instance configuration")
10372
    for dev, _, new_logical_id in iv_names.itervalues():
10373
      dev.logical_id = new_logical_id
10374
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10375

    
10376
    self.cfg.Update(self.instance, feedback_fn)
10377

    
10378
    # Release all node locks (the configuration has been updated)
10379
    _ReleaseLocks(self.lu, locking.LEVEL_NODE)
10380

    
10381
    # and now perform the drbd attach
10382
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10383
                    " (standalone => connected)")
10384
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10385
                                            self.new_node],
10386
                                           self.node_secondary_ip,
10387
                                           self.instance.disks,
10388
                                           self.instance.name,
10389
                                           False)
10390
    for to_node, to_result in result.items():
10391
      msg = to_result.fail_msg
10392
      if msg:
10393
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10394
                           to_node, msg,
10395
                           hint=("please do a gnt-instance info to see the"
10396
                                 " status of disks"))
10397

    
10398
    cstep = itertools.count(5)
10399

    
10400
    if self.early_release:
10401
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10402
      self._RemoveOldStorage(self.target_node, iv_names)
10403
      # TODO: Check if releasing locks early still makes sense
10404
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES)
10405
    else:
10406
      # Release all resource locks except those used by the instance
10407
      _ReleaseLocks(self.lu, locking.LEVEL_NODE_RES,
10408
                    keep=self.node_secondary_ip.keys())
10409

    
10410
    # TODO: Can the instance lock be downgraded here? Take the optional disk
10411
    # shutdown in the caller into consideration.
10412

    
10413
    # Wait for sync
10414
    # This can fail as the old devices are degraded and _WaitForSync
10415
    # does a combined result over all disks, so we don't check its return value
10416
    self.lu.LogStep(cstep.next(), steps_total, "Sync devices")
10417
    _WaitForSync(self.lu, self.instance)
10418

    
10419
    # Check all devices manually
10420
    self._CheckDevices(self.instance.primary_node, iv_names)
10421

    
10422
    # Step: remove old storage
10423
    if not self.early_release:
10424
      self.lu.LogStep(cstep.next(), steps_total, "Removing old storage")
10425
      self._RemoveOldStorage(self.target_node, iv_names)
10426

    
10427

    
10428
class LURepairNodeStorage(NoHooksLU):
10429
  """Repairs the volume group on a node.
10430

10431
  """
10432
  REQ_BGL = False
10433

    
10434
  def CheckArguments(self):
10435
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10436

    
10437
    storage_type = self.op.storage_type
10438

    
10439
    if (constants.SO_FIX_CONSISTENCY not in
10440
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10441
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10442
                                 " repaired" % storage_type,
10443
                                 errors.ECODE_INVAL)
10444

    
10445
  def ExpandNames(self):
10446
    self.needed_locks = {
10447
      locking.LEVEL_NODE: [self.op.node_name],
10448
      }
10449

    
10450
  def _CheckFaultyDisks(self, instance, node_name):
10451
    """Ensure faulty disks abort the opcode or at least warn."""
10452
    try:
10453
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10454
                                  node_name, True):
10455
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10456
                                   " node '%s'" % (instance.name, node_name),
10457
                                   errors.ECODE_STATE)
10458
    except errors.OpPrereqError, err:
10459
      if self.op.ignore_consistency:
10460
        self.proc.LogWarning(str(err.args[0]))
10461
      else:
10462
        raise
10463

    
10464
  def CheckPrereq(self):
10465
    """Check prerequisites.
10466

10467
    """
10468
    # Check whether any instance on this node has faulty disks
10469
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10470
      if inst.admin_state != constants.ADMINST_UP:
10471
        continue
10472
      check_nodes = set(inst.all_nodes)
10473
      check_nodes.discard(self.op.node_name)
10474
      for inst_node_name in check_nodes:
10475
        self._CheckFaultyDisks(inst, inst_node_name)
10476

    
10477
  def Exec(self, feedback_fn):
10478
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10479
                (self.op.name, self.op.node_name))
10480

    
10481
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10482
    result = self.rpc.call_storage_execute(self.op.node_name,
10483
                                           self.op.storage_type, st_args,
10484
                                           self.op.name,
10485
                                           constants.SO_FIX_CONSISTENCY)
10486
    result.Raise("Failed to repair storage unit '%s' on %s" %
10487
                 (self.op.name, self.op.node_name))
10488

    
10489

    
10490
class LUNodeEvacuate(NoHooksLU):
10491
  """Evacuates instances off a list of nodes.
10492

10493
  """
10494
  REQ_BGL = False
10495

    
10496
  def CheckArguments(self):
10497
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10498

    
10499
  def ExpandNames(self):
10500
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10501

    
10502
    if self.op.remote_node is not None:
10503
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10504
      assert self.op.remote_node
10505

    
10506
      if self.op.remote_node == self.op.node_name:
10507
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10508
                                   " secondary node", errors.ECODE_INVAL)
10509

    
10510
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10511
        raise errors.OpPrereqError("Without the use of an iallocator only"
10512
                                   " secondary instances can be evacuated",
10513
                                   errors.ECODE_INVAL)
10514

    
10515
    # Declare locks
10516
    self.share_locks = _ShareAll()
10517
    self.needed_locks = {
10518
      locking.LEVEL_INSTANCE: [],
10519
      locking.LEVEL_NODEGROUP: [],
10520
      locking.LEVEL_NODE: [],
10521
      }
10522

    
10523
    if self.op.remote_node is None:
10524
      # Iallocator will choose any node(s) in the same group
10525
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10526
    else:
10527
      group_nodes = frozenset([self.op.remote_node])
10528

    
10529
    # Determine nodes to be locked
10530
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10531

    
10532
  def _DetermineInstances(self):
10533
    """Builds list of instances to operate on.
10534

10535
    """
10536
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10537

    
10538
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10539
      # Primary instances only
10540
      inst_fn = _GetNodePrimaryInstances
10541
      assert self.op.remote_node is None, \
10542
        "Evacuating primary instances requires iallocator"
10543
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10544
      # Secondary instances only
10545
      inst_fn = _GetNodeSecondaryInstances
10546
    else:
10547
      # All instances
10548
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10549
      inst_fn = _GetNodeInstances
10550

    
10551
    return inst_fn(self.cfg, self.op.node_name)
10552

    
10553
  def DeclareLocks(self, level):
10554
    if level == locking.LEVEL_INSTANCE:
10555
      # Lock instances optimistically, needs verification once node and group
10556
      # locks have been acquired
10557
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10558
        set(i.name for i in self._DetermineInstances())
10559

    
10560
    elif level == locking.LEVEL_NODEGROUP:
10561
      # Lock node groups optimistically, needs verification once nodes have
10562
      # been acquired
10563
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10564
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10565

    
10566
    elif level == locking.LEVEL_NODE:
10567
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10568

    
10569
  def CheckPrereq(self):
10570
    # Verify locks
10571
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10572
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10573
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10574

    
10575
    assert owned_nodes == self.lock_nodes
10576

    
10577
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10578
    if owned_groups != wanted_groups:
10579
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10580
                               " current groups are '%s', used to be '%s'" %
10581
                               (utils.CommaJoin(wanted_groups),
10582
                                utils.CommaJoin(owned_groups)))
10583

    
10584
    # Determine affected instances
10585
    self.instances = self._DetermineInstances()
10586
    self.instance_names = [i.name for i in self.instances]
10587

    
10588
    if set(self.instance_names) != owned_instances:
10589
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10590
                               " were acquired, current instances are '%s',"
10591
                               " used to be '%s'" %
10592
                               (self.op.node_name,
10593
                                utils.CommaJoin(self.instance_names),
10594
                                utils.CommaJoin(owned_instances)))
10595

    
10596
    if self.instance_names:
10597
      self.LogInfo("Evacuating instances from node '%s': %s",
10598
                   self.op.node_name,
10599
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10600
    else:
10601
      self.LogInfo("No instances to evacuate from node '%s'",
10602
                   self.op.node_name)
10603

    
10604
    if self.op.remote_node is not None:
10605
      for i in self.instances:
10606
        if i.primary_node == self.op.remote_node:
10607
          raise errors.OpPrereqError("Node %s is the primary node of"
10608
                                     " instance %s, cannot use it as"
10609
                                     " secondary" %
10610
                                     (self.op.remote_node, i.name),
10611
                                     errors.ECODE_INVAL)
10612

    
10613
  def Exec(self, feedback_fn):
10614
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10615

    
10616
    if not self.instance_names:
10617
      # No instances to evacuate
10618
      jobs = []
10619

    
10620
    elif self.op.iallocator is not None:
10621
      # TODO: Implement relocation to other group
10622
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10623
                       evac_mode=self.op.mode,
10624
                       instances=list(self.instance_names))
10625

    
10626
      ial.Run(self.op.iallocator)
10627

    
10628
      if not ial.success:
10629
        raise errors.OpPrereqError("Can't compute node evacuation using"
10630
                                   " iallocator '%s': %s" %
10631
                                   (self.op.iallocator, ial.info),
10632
                                   errors.ECODE_NORES)
10633

    
10634
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10635

    
10636
    elif self.op.remote_node is not None:
10637
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10638
      jobs = [
10639
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10640
                                        remote_node=self.op.remote_node,
10641
                                        disks=[],
10642
                                        mode=constants.REPLACE_DISK_CHG,
10643
                                        early_release=self.op.early_release)]
10644
        for instance_name in self.instance_names
10645
        ]
10646

    
10647
    else:
10648
      raise errors.ProgrammerError("No iallocator or remote node")
10649

    
10650
    return ResultWithJobs(jobs)
10651

    
10652

    
10653
def _SetOpEarlyRelease(early_release, op):
10654
  """Sets C{early_release} flag on opcodes if available.
10655

10656
  """
10657
  try:
10658
    op.early_release = early_release
10659
  except AttributeError:
10660
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10661

    
10662
  return op
10663

    
10664

    
10665
def _NodeEvacDest(use_nodes, group, nodes):
10666
  """Returns group or nodes depending on caller's choice.
10667

10668
  """
10669
  if use_nodes:
10670
    return utils.CommaJoin(nodes)
10671
  else:
10672
    return group
10673

    
10674

    
10675
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10676
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10677

10678
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10679
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10680

10681
  @type lu: L{LogicalUnit}
10682
  @param lu: Logical unit instance
10683
  @type alloc_result: tuple/list
10684
  @param alloc_result: Result from iallocator
10685
  @type early_release: bool
10686
  @param early_release: Whether to release locks early if possible
10687
  @type use_nodes: bool
10688
  @param use_nodes: Whether to display node names instead of groups
10689

10690
  """
10691
  (moved, failed, jobs) = alloc_result
10692

    
10693
  if failed:
10694
    failreason = utils.CommaJoin("%s (%s)" % (name, reason)
10695
                                 for (name, reason) in failed)
10696
    lu.LogWarning("Unable to evacuate instances %s", failreason)
10697
    raise errors.OpExecError("Unable to evacuate instances %s" % failreason)
10698

    
10699
  if moved:
10700
    lu.LogInfo("Instances to be moved: %s",
10701
               utils.CommaJoin("%s (to %s)" %
10702
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10703
                               for (name, group, nodes) in moved))
10704

    
10705
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10706
              map(opcodes.OpCode.LoadOpCode, ops))
10707
          for ops in jobs]
10708

    
10709

    
10710
class LUInstanceGrowDisk(LogicalUnit):
10711
  """Grow a disk of an instance.
10712

10713
  """
10714
  HPATH = "disk-grow"
10715
  HTYPE = constants.HTYPE_INSTANCE
10716
  REQ_BGL = False
10717

    
10718
  def ExpandNames(self):
10719
    self._ExpandAndLockInstance()
10720
    self.needed_locks[locking.LEVEL_NODE] = []
10721
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10722
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10723

    
10724
  def DeclareLocks(self, level):
10725
    if level == locking.LEVEL_NODE:
10726
      self._LockInstancesNodes()
10727
    elif level == locking.LEVEL_NODE_RES:
10728
      # Copy node locks
10729
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10730
        self.needed_locks[locking.LEVEL_NODE][:]
10731

    
10732
  def BuildHooksEnv(self):
10733
    """Build hooks env.
10734

10735
    This runs on the master, the primary and all the secondaries.
10736

10737
    """
10738
    env = {
10739
      "DISK": self.op.disk,
10740
      "AMOUNT": self.op.amount,
10741
      }
10742
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10743
    return env
10744

    
10745
  def BuildHooksNodes(self):
10746
    """Build hooks nodes.
10747

10748
    """
10749
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10750
    return (nl, nl)
10751

    
10752
  def CheckPrereq(self):
10753
    """Check prerequisites.
10754

10755
    This checks that the instance is in the cluster.
10756

10757
    """
10758
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10759
    assert instance is not None, \
10760
      "Cannot retrieve locked instance %s" % self.op.instance_name
10761
    nodenames = list(instance.all_nodes)
10762
    for node in nodenames:
10763
      _CheckNodeOnline(self, node)
10764

    
10765
    self.instance = instance
10766

    
10767
    if instance.disk_template not in constants.DTS_GROWABLE:
10768
      raise errors.OpPrereqError("Instance's disk layout does not support"
10769
                                 " growing", errors.ECODE_INVAL)
10770

    
10771
    self.disk = instance.FindDisk(self.op.disk)
10772

    
10773
    if instance.disk_template not in (constants.DT_FILE,
10774
                                      constants.DT_SHARED_FILE):
10775
      # TODO: check the free disk space for file, when that feature will be
10776
      # supported
10777
      _CheckNodesFreeDiskPerVG(self, nodenames,
10778
                               self.disk.ComputeGrowth(self.op.amount))
10779

    
10780
  def Exec(self, feedback_fn):
10781
    """Execute disk grow.
10782

10783
    """
10784
    instance = self.instance
10785
    disk = self.disk
10786

    
10787
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10788
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10789
            self.owned_locks(locking.LEVEL_NODE_RES))
10790

    
10791
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10792
    if not disks_ok:
10793
      raise errors.OpExecError("Cannot activate block device to grow")
10794

    
10795
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10796
                (self.op.disk, instance.name,
10797
                 utils.FormatUnit(self.op.amount, "h")))
10798

    
10799
    # First run all grow ops in dry-run mode
10800
    for node in instance.all_nodes:
10801
      self.cfg.SetDiskID(disk, node)
10802
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10803
      result.Raise("Grow request failed to node %s" % node)
10804

    
10805
    # We know that (as far as we can test) operations across different
10806
    # nodes will succeed, time to run it for real
10807
    for node in instance.all_nodes:
10808
      self.cfg.SetDiskID(disk, node)
10809
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10810
      result.Raise("Grow request failed to node %s" % node)
10811

    
10812
      # TODO: Rewrite code to work properly
10813
      # DRBD goes into sync mode for a short amount of time after executing the
10814
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10815
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10816
      # time is a work-around.
10817
      time.sleep(5)
10818

    
10819
    disk.RecordGrow(self.op.amount)
10820
    self.cfg.Update(instance, feedback_fn)
10821

    
10822
    # Changes have been recorded, release node lock
10823
    _ReleaseLocks(self, locking.LEVEL_NODE)
10824

    
10825
    # Downgrade lock while waiting for sync
10826
    self.glm.downgrade(locking.LEVEL_INSTANCE)
10827

    
10828
    if self.op.wait_for_sync:
10829
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10830
      if disk_abort:
10831
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10832
                             " status; please check the instance")
10833
      if instance.admin_state != constants.ADMINST_UP:
10834
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10835
    elif instance.admin_state != constants.ADMINST_UP:
10836
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10837
                           " not supposed to be running because no wait for"
10838
                           " sync mode was requested")
10839

    
10840
    assert self.owned_locks(locking.LEVEL_NODE_RES)
10841
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10842

    
10843

    
10844
class LUInstanceQueryData(NoHooksLU):
10845
  """Query runtime instance data.
10846

10847
  """
10848
  REQ_BGL = False
10849

    
10850
  def ExpandNames(self):
10851
    self.needed_locks = {}
10852

    
10853
    # Use locking if requested or when non-static information is wanted
10854
    if not (self.op.static or self.op.use_locking):
10855
      self.LogWarning("Non-static data requested, locks need to be acquired")
10856
      self.op.use_locking = True
10857

    
10858
    if self.op.instances or not self.op.use_locking:
10859
      # Expand instance names right here
10860
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10861
    else:
10862
      # Will use acquired locks
10863
      self.wanted_names = None
10864

    
10865
    if self.op.use_locking:
10866
      self.share_locks = _ShareAll()
10867

    
10868
      if self.wanted_names is None:
10869
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10870
      else:
10871
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10872

    
10873
      self.needed_locks[locking.LEVEL_NODE] = []
10874
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10875

    
10876
  def DeclareLocks(self, level):
10877
    if self.op.use_locking and level == locking.LEVEL_NODE:
10878
      self._LockInstancesNodes()
10879

    
10880
  def CheckPrereq(self):
10881
    """Check prerequisites.
10882

10883
    This only checks the optional instance list against the existing names.
10884

10885
    """
10886
    if self.wanted_names is None:
10887
      assert self.op.use_locking, "Locking was not used"
10888
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10889

    
10890
    self.wanted_instances = \
10891
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10892

    
10893
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10894
    """Returns the status of a block device
10895

10896
    """
10897
    if self.op.static or not node:
10898
      return None
10899

    
10900
    self.cfg.SetDiskID(dev, node)
10901

    
10902
    result = self.rpc.call_blockdev_find(node, dev)
10903
    if result.offline:
10904
      return None
10905

    
10906
    result.Raise("Can't compute disk status for %s" % instance_name)
10907

    
10908
    status = result.payload
10909
    if status is None:
10910
      return None
10911

    
10912
    return (status.dev_path, status.major, status.minor,
10913
            status.sync_percent, status.estimated_time,
10914
            status.is_degraded, status.ldisk_status)
10915

    
10916
  def _ComputeDiskStatus(self, instance, snode, dev):
10917
    """Compute block device status.
10918

10919
    """
10920
    if dev.dev_type in constants.LDS_DRBD:
10921
      # we change the snode then (otherwise we use the one passed in)
10922
      if dev.logical_id[0] == instance.primary_node:
10923
        snode = dev.logical_id[1]
10924
      else:
10925
        snode = dev.logical_id[0]
10926

    
10927
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10928
                                              instance.name, dev)
10929
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10930

    
10931
    if dev.children:
10932
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10933
                                        instance, snode),
10934
                         dev.children)
10935
    else:
10936
      dev_children = []
10937

    
10938
    return {
10939
      "iv_name": dev.iv_name,
10940
      "dev_type": dev.dev_type,
10941
      "logical_id": dev.logical_id,
10942
      "physical_id": dev.physical_id,
10943
      "pstatus": dev_pstatus,
10944
      "sstatus": dev_sstatus,
10945
      "children": dev_children,
10946
      "mode": dev.mode,
10947
      "size": dev.size,
10948
      }
10949

    
10950
  def Exec(self, feedback_fn):
10951
    """Gather and return data"""
10952
    result = {}
10953

    
10954
    cluster = self.cfg.GetClusterInfo()
10955

    
10956
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10957
                                          for i in self.wanted_instances)
10958
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10959
      if self.op.static or pnode.offline:
10960
        remote_state = None
10961
        if pnode.offline:
10962
          self.LogWarning("Primary node %s is marked offline, returning static"
10963
                          " information only for instance %s" %
10964
                          (pnode.name, instance.name))
10965
      else:
10966
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10967
                                                  instance.name,
10968
                                                  instance.hypervisor)
10969
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10970
        remote_info = remote_info.payload
10971
        if remote_info and "state" in remote_info:
10972
          remote_state = "up"
10973
        else:
10974
          if instance.admin_state == constants.ADMINST_UP:
10975
            remote_state = "down"
10976
          else:
10977
            remote_state = instance.admin_state
10978

    
10979
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10980
                  instance.disks)
10981

    
10982
      result[instance.name] = {
10983
        "name": instance.name,
10984
        "config_state": instance.admin_state,
10985
        "run_state": remote_state,
10986
        "pnode": instance.primary_node,
10987
        "snodes": instance.secondary_nodes,
10988
        "os": instance.os,
10989
        # this happens to be the same format used for hooks
10990
        "nics": _NICListToTuple(self, instance.nics),
10991
        "disk_template": instance.disk_template,
10992
        "disks": disks,
10993
        "hypervisor": instance.hypervisor,
10994
        "network_port": instance.network_port,
10995
        "hv_instance": instance.hvparams,
10996
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10997
        "be_instance": instance.beparams,
10998
        "be_actual": cluster.FillBE(instance),
10999
        "os_instance": instance.osparams,
11000
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
11001
        "serial_no": instance.serial_no,
11002
        "mtime": instance.mtime,
11003
        "ctime": instance.ctime,
11004
        "uuid": instance.uuid,
11005
        }
11006

    
11007
    return result
11008

    
11009

    
11010
class LUInstanceSetParams(LogicalUnit):
11011
  """Modifies an instances's parameters.
11012

11013
  """
11014
  HPATH = "instance-modify"
11015
  HTYPE = constants.HTYPE_INSTANCE
11016
  REQ_BGL = False
11017

    
11018
  def CheckArguments(self):
11019
    if not (self.op.nics or self.op.disks or self.op.disk_template or
11020
            self.op.hvparams or self.op.beparams or self.op.os_name or
11021
            self.op.online_inst or self.op.offline_inst):
11022
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
11023

    
11024
    if self.op.hvparams:
11025
      _CheckGlobalHvParams(self.op.hvparams)
11026

    
11027
    # Disk validation
11028
    disk_addremove = 0
11029
    for disk_op, disk_dict in self.op.disks:
11030
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
11031
      if disk_op == constants.DDM_REMOVE:
11032
        disk_addremove += 1
11033
        continue
11034
      elif disk_op == constants.DDM_ADD:
11035
        disk_addremove += 1
11036
      else:
11037
        if not isinstance(disk_op, int):
11038
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
11039
        if not isinstance(disk_dict, dict):
11040
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
11041
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11042

    
11043
      if disk_op == constants.DDM_ADD:
11044
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
11045
        if mode not in constants.DISK_ACCESS_SET:
11046
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
11047
                                     errors.ECODE_INVAL)
11048
        size = disk_dict.get(constants.IDISK_SIZE, None)
11049
        if size is None:
11050
          raise errors.OpPrereqError("Required disk parameter size missing",
11051
                                     errors.ECODE_INVAL)
11052
        try:
11053
          size = int(size)
11054
        except (TypeError, ValueError), err:
11055
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
11056
                                     str(err), errors.ECODE_INVAL)
11057
        disk_dict[constants.IDISK_SIZE] = size
11058
      else:
11059
        # modification of disk
11060
        if constants.IDISK_SIZE in disk_dict:
11061
          raise errors.OpPrereqError("Disk size change not possible, use"
11062
                                     " grow-disk", errors.ECODE_INVAL)
11063

    
11064
    if disk_addremove > 1:
11065
      raise errors.OpPrereqError("Only one disk add or remove operation"
11066
                                 " supported at a time", errors.ECODE_INVAL)
11067

    
11068
    if self.op.disks and self.op.disk_template is not None:
11069
      raise errors.OpPrereqError("Disk template conversion and other disk"
11070
                                 " changes not supported at the same time",
11071
                                 errors.ECODE_INVAL)
11072

    
11073
    if (self.op.disk_template and
11074
        self.op.disk_template in constants.DTS_INT_MIRROR and
11075
        self.op.remote_node is None):
11076
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
11077
                                 " one requires specifying a secondary node",
11078
                                 errors.ECODE_INVAL)
11079

    
11080
    # NIC validation
11081
    nic_addremove = 0
11082
    for nic_op, nic_dict in self.op.nics:
11083
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
11084
      if nic_op == constants.DDM_REMOVE:
11085
        nic_addremove += 1
11086
        continue
11087
      elif nic_op == constants.DDM_ADD:
11088
        nic_addremove += 1
11089
      else:
11090
        if not isinstance(nic_op, int):
11091
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
11092
        if not isinstance(nic_dict, dict):
11093
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
11094
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
11095

    
11096
      # nic_dict should be a dict
11097
      nic_ip = nic_dict.get(constants.INIC_IP, None)
11098
      if nic_ip is not None:
11099
        if nic_ip.lower() == constants.VALUE_NONE:
11100
          nic_dict[constants.INIC_IP] = None
11101
        else:
11102
          if not netutils.IPAddress.IsValid(nic_ip):
11103
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
11104
                                       errors.ECODE_INVAL)
11105

    
11106
      nic_bridge = nic_dict.get("bridge", None)
11107
      nic_link = nic_dict.get(constants.INIC_LINK, None)
11108
      if nic_bridge and nic_link:
11109
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
11110
                                   " at the same time", errors.ECODE_INVAL)
11111
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
11112
        nic_dict["bridge"] = None
11113
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
11114
        nic_dict[constants.INIC_LINK] = None
11115

    
11116
      if nic_op == constants.DDM_ADD:
11117
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11118
        if nic_mac is None:
11119
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11120

    
11121
      if constants.INIC_MAC in nic_dict:
11122
        nic_mac = nic_dict[constants.INIC_MAC]
11123
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11124
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11125

    
11126
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11127
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11128
                                     " modifying an existing nic",
11129
                                     errors.ECODE_INVAL)
11130

    
11131
    if nic_addremove > 1:
11132
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11133
                                 " supported at a time", errors.ECODE_INVAL)
11134

    
11135
  def ExpandNames(self):
11136
    self._ExpandAndLockInstance()
11137
    # Can't even acquire node locks in shared mode as upcoming changes in
11138
    # Ganeti 2.6 will start to modify the node object on disk conversion
11139
    self.needed_locks[locking.LEVEL_NODE] = []
11140
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11141
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11142

    
11143
  def DeclareLocks(self, level):
11144
    if level == locking.LEVEL_NODE:
11145
      self._LockInstancesNodes()
11146
      if self.op.disk_template and self.op.remote_node:
11147
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11148
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11149
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11150
      # Copy node locks
11151
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11152
        self.needed_locks[locking.LEVEL_NODE][:]
11153

    
11154
  def BuildHooksEnv(self):
11155
    """Build hooks env.
11156

11157
    This runs on the master, primary and secondaries.
11158

11159
    """
11160
    args = dict()
11161
    if constants.BE_MINMEM in self.be_new:
11162
      args["minmem"] = self.be_new[constants.BE_MINMEM]
11163
    if constants.BE_MAXMEM in self.be_new:
11164
      args["maxmem"] = self.be_new[constants.BE_MAXMEM]
11165
    if constants.BE_VCPUS in self.be_new:
11166
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11167
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11168
    # information at all.
11169
    if self.op.nics:
11170
      args["nics"] = []
11171
      nic_override = dict(self.op.nics)
11172
      for idx, nic in enumerate(self.instance.nics):
11173
        if idx in nic_override:
11174
          this_nic_override = nic_override[idx]
11175
        else:
11176
          this_nic_override = {}
11177
        if constants.INIC_IP in this_nic_override:
11178
          ip = this_nic_override[constants.INIC_IP]
11179
        else:
11180
          ip = nic.ip
11181
        if constants.INIC_MAC in this_nic_override:
11182
          mac = this_nic_override[constants.INIC_MAC]
11183
        else:
11184
          mac = nic.mac
11185
        if idx in self.nic_pnew:
11186
          nicparams = self.nic_pnew[idx]
11187
        else:
11188
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11189
        mode = nicparams[constants.NIC_MODE]
11190
        link = nicparams[constants.NIC_LINK]
11191
        args["nics"].append((ip, mac, mode, link))
11192
      if constants.DDM_ADD in nic_override:
11193
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11194
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11195
        nicparams = self.nic_pnew[constants.DDM_ADD]
11196
        mode = nicparams[constants.NIC_MODE]
11197
        link = nicparams[constants.NIC_LINK]
11198
        args["nics"].append((ip, mac, mode, link))
11199
      elif constants.DDM_REMOVE in nic_override:
11200
        del args["nics"][-1]
11201

    
11202
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11203
    if self.op.disk_template:
11204
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11205

    
11206
    return env
11207

    
11208
  def BuildHooksNodes(self):
11209
    """Build hooks nodes.
11210

11211
    """
11212
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11213
    return (nl, nl)
11214

    
11215
  def CheckPrereq(self):
11216
    """Check prerequisites.
11217

11218
    This only checks the instance list against the existing names.
11219

11220
    """
11221
    # checking the new params on the primary/secondary nodes
11222

    
11223
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11224
    cluster = self.cluster = self.cfg.GetClusterInfo()
11225
    assert self.instance is not None, \
11226
      "Cannot retrieve locked instance %s" % self.op.instance_name
11227
    pnode = instance.primary_node
11228
    nodelist = list(instance.all_nodes)
11229

    
11230
    # OS change
11231
    if self.op.os_name and not self.op.force:
11232
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11233
                      self.op.force_variant)
11234
      instance_os = self.op.os_name
11235
    else:
11236
      instance_os = instance.os
11237

    
11238
    if self.op.disk_template:
11239
      if instance.disk_template == self.op.disk_template:
11240
        raise errors.OpPrereqError("Instance already has disk template %s" %
11241
                                   instance.disk_template, errors.ECODE_INVAL)
11242

    
11243
      if (instance.disk_template,
11244
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11245
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11246
                                   " %s to %s" % (instance.disk_template,
11247
                                                  self.op.disk_template),
11248
                                   errors.ECODE_INVAL)
11249
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11250
                          msg="cannot change disk template")
11251
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11252
        if self.op.remote_node == pnode:
11253
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11254
                                     " as the primary node of the instance" %
11255
                                     self.op.remote_node, errors.ECODE_STATE)
11256
        _CheckNodeOnline(self, self.op.remote_node)
11257
        _CheckNodeNotDrained(self, self.op.remote_node)
11258
        # FIXME: here we assume that the old instance type is DT_PLAIN
11259
        assert instance.disk_template == constants.DT_PLAIN
11260
        disks = [{constants.IDISK_SIZE: d.size,
11261
                  constants.IDISK_VG: d.logical_id[0]}
11262
                 for d in instance.disks]
11263
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11264
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11265

    
11266
    # hvparams processing
11267
    if self.op.hvparams:
11268
      hv_type = instance.hypervisor
11269
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11270
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11271
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11272

    
11273
      # local check
11274
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11275
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11276
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11277
      self.hv_inst = i_hvdict # the new dict (without defaults)
11278
    else:
11279
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11280
                                              instance.hvparams)
11281
      self.hv_new = self.hv_inst = {}
11282

    
11283
    # beparams processing
11284
    if self.op.beparams:
11285
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11286
                                   use_none=True)
11287
      objects.UpgradeBeParams(i_bedict)
11288
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11289
      be_new = cluster.SimpleFillBE(i_bedict)
11290
      self.be_proposed = self.be_new = be_new # the new actual values
11291
      self.be_inst = i_bedict # the new dict (without defaults)
11292
    else:
11293
      self.be_new = self.be_inst = {}
11294
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11295
    be_old = cluster.FillBE(instance)
11296

    
11297
    # CPU param validation -- checking every time a paramtere is
11298
    # changed to cover all cases where either CPU mask or vcpus have
11299
    # changed
11300
    if (constants.BE_VCPUS in self.be_proposed and
11301
        constants.HV_CPU_MASK in self.hv_proposed):
11302
      cpu_list = \
11303
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11304
      # Verify mask is consistent with number of vCPUs. Can skip this
11305
      # test if only 1 entry in the CPU mask, which means same mask
11306
      # is applied to all vCPUs.
11307
      if (len(cpu_list) > 1 and
11308
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11309
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11310
                                   " CPU mask [%s]" %
11311
                                   (self.be_proposed[constants.BE_VCPUS],
11312
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11313
                                   errors.ECODE_INVAL)
11314

    
11315
      # Only perform this test if a new CPU mask is given
11316
      if constants.HV_CPU_MASK in self.hv_new:
11317
        # Calculate the largest CPU number requested
11318
        max_requested_cpu = max(map(max, cpu_list))
11319
        # Check that all of the instance's nodes have enough physical CPUs to
11320
        # satisfy the requested CPU mask
11321
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11322
                                max_requested_cpu + 1, instance.hypervisor)
11323

    
11324
    # osparams processing
11325
    if self.op.osparams:
11326
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11327
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11328
      self.os_inst = i_osdict # the new dict (without defaults)
11329
    else:
11330
      self.os_inst = {}
11331

    
11332
    self.warn = []
11333

    
11334
    #TODO(dynmem): do the appropriate check involving MINMEM
11335
    if (constants.BE_MAXMEM in self.op.beparams and not self.op.force and
11336
        be_new[constants.BE_MAXMEM] > be_old[constants.BE_MAXMEM]):
11337
      mem_check_list = [pnode]
11338
      if be_new[constants.BE_AUTO_BALANCE]:
11339
        # either we changed auto_balance to yes or it was from before
11340
        mem_check_list.extend(instance.secondary_nodes)
11341
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11342
                                                  instance.hypervisor)
11343
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11344
                                         instance.hypervisor)
11345
      pninfo = nodeinfo[pnode]
11346
      msg = pninfo.fail_msg
11347
      if msg:
11348
        # Assume the primary node is unreachable and go ahead
11349
        self.warn.append("Can't get info from primary node %s: %s" %
11350
                         (pnode, msg))
11351
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11352
        self.warn.append("Node data from primary node %s doesn't contain"
11353
                         " free memory information" % pnode)
11354
      elif instance_info.fail_msg:
11355
        self.warn.append("Can't get instance runtime information: %s" %
11356
                        instance_info.fail_msg)
11357
      else:
11358
        if instance_info.payload:
11359
          current_mem = int(instance_info.payload["memory"])
11360
        else:
11361
          # Assume instance not running
11362
          # (there is a slight race condition here, but it's not very probable,
11363
          # and we have no other way to check)
11364
          current_mem = 0
11365
        #TODO(dynmem): do the appropriate check involving MINMEM
11366
        miss_mem = (be_new[constants.BE_MAXMEM] - current_mem -
11367
                    pninfo.payload["memory_free"])
11368
        if miss_mem > 0:
11369
          raise errors.OpPrereqError("This change will prevent the instance"
11370
                                     " from starting, due to %d MB of memory"
11371
                                     " missing on its primary node" % miss_mem,
11372
                                     errors.ECODE_NORES)
11373

    
11374
      if be_new[constants.BE_AUTO_BALANCE]:
11375
        for node, nres in nodeinfo.items():
11376
          if node not in instance.secondary_nodes:
11377
            continue
11378
          nres.Raise("Can't get info from secondary node %s" % node,
11379
                     prereq=True, ecode=errors.ECODE_STATE)
11380
          if not isinstance(nres.payload.get("memory_free", None), int):
11381
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11382
                                       " memory information" % node,
11383
                                       errors.ECODE_STATE)
11384
          #TODO(dynmem): do the appropriate check involving MINMEM
11385
          elif be_new[constants.BE_MAXMEM] > nres.payload["memory_free"]:
11386
            raise errors.OpPrereqError("This change will prevent the instance"
11387
                                       " from failover to its secondary node"
11388
                                       " %s, due to not enough memory" % node,
11389
                                       errors.ECODE_STATE)
11390

    
11391
    # NIC processing
11392
    self.nic_pnew = {}
11393
    self.nic_pinst = {}
11394
    for nic_op, nic_dict in self.op.nics:
11395
      if nic_op == constants.DDM_REMOVE:
11396
        if not instance.nics:
11397
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11398
                                     errors.ECODE_INVAL)
11399
        continue
11400
      if nic_op != constants.DDM_ADD:
11401
        # an existing nic
11402
        if not instance.nics:
11403
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11404
                                     " no NICs" % nic_op,
11405
                                     errors.ECODE_INVAL)
11406
        if nic_op < 0 or nic_op >= len(instance.nics):
11407
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11408
                                     " are 0 to %d" %
11409
                                     (nic_op, len(instance.nics) - 1),
11410
                                     errors.ECODE_INVAL)
11411
        old_nic_params = instance.nics[nic_op].nicparams
11412
        old_nic_ip = instance.nics[nic_op].ip
11413
      else:
11414
        old_nic_params = {}
11415
        old_nic_ip = None
11416

    
11417
      update_params_dict = dict([(key, nic_dict[key])
11418
                                 for key in constants.NICS_PARAMETERS
11419
                                 if key in nic_dict])
11420

    
11421
      if "bridge" in nic_dict:
11422
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11423

    
11424
      new_nic_params = _GetUpdatedParams(old_nic_params,
11425
                                         update_params_dict)
11426
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11427
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11428
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11429
      self.nic_pinst[nic_op] = new_nic_params
11430
      self.nic_pnew[nic_op] = new_filled_nic_params
11431
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11432

    
11433
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11434
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11435
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11436
        if msg:
11437
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11438
          if self.op.force:
11439
            self.warn.append(msg)
11440
          else:
11441
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11442
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11443
        if constants.INIC_IP in nic_dict:
11444
          nic_ip = nic_dict[constants.INIC_IP]
11445
        else:
11446
          nic_ip = old_nic_ip
11447
        if nic_ip is None:
11448
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11449
                                     " on a routed nic", errors.ECODE_INVAL)
11450
      if constants.INIC_MAC in nic_dict:
11451
        nic_mac = nic_dict[constants.INIC_MAC]
11452
        if nic_mac is None:
11453
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11454
                                     errors.ECODE_INVAL)
11455
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11456
          # otherwise generate the mac
11457
          nic_dict[constants.INIC_MAC] = \
11458
            self.cfg.GenerateMAC(self.proc.GetECId())
11459
        else:
11460
          # or validate/reserve the current one
11461
          try:
11462
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11463
          except errors.ReservationError:
11464
            raise errors.OpPrereqError("MAC address %s already in use"
11465
                                       " in cluster" % nic_mac,
11466
                                       errors.ECODE_NOTUNIQUE)
11467

    
11468
    # DISK processing
11469
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11470
      raise errors.OpPrereqError("Disk operations not supported for"
11471
                                 " diskless instances",
11472
                                 errors.ECODE_INVAL)
11473
    for disk_op, _ in self.op.disks:
11474
      if disk_op == constants.DDM_REMOVE:
11475
        if len(instance.disks) == 1:
11476
          raise errors.OpPrereqError("Cannot remove the last disk of"
11477
                                     " an instance", errors.ECODE_INVAL)
11478
        _CheckInstanceState(self, instance, INSTANCE_DOWN,
11479
                            msg="cannot remove disks")
11480

    
11481
      if (disk_op == constants.DDM_ADD and
11482
          len(instance.disks) >= constants.MAX_DISKS):
11483
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11484
                                   " add more" % constants.MAX_DISKS,
11485
                                   errors.ECODE_STATE)
11486
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11487
        # an existing disk
11488
        if disk_op < 0 or disk_op >= len(instance.disks):
11489
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11490
                                     " are 0 to %d" %
11491
                                     (disk_op, len(instance.disks)),
11492
                                     errors.ECODE_INVAL)
11493

    
11494
    # disabling the instance
11495
    if self.op.offline_inst:
11496
      _CheckInstanceState(self, instance, INSTANCE_DOWN,
11497
                          msg="cannot change instance state to offline")
11498

    
11499
    # enabling the instance
11500
    if self.op.online_inst:
11501
      _CheckInstanceState(self, instance, INSTANCE_OFFLINE,
11502
                          msg="cannot make instance go online")
11503

    
11504
  def _ConvertPlainToDrbd(self, feedback_fn):
11505
    """Converts an instance from plain to drbd.
11506

11507
    """
11508
    feedback_fn("Converting template to drbd")
11509
    instance = self.instance
11510
    pnode = instance.primary_node
11511
    snode = self.op.remote_node
11512

    
11513
    assert instance.disk_template == constants.DT_PLAIN
11514

    
11515
    # create a fake disk info for _GenerateDiskTemplate
11516
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11517
                  constants.IDISK_VG: d.logical_id[0]}
11518
                 for d in instance.disks]
11519
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11520
                                      instance.name, pnode, [snode],
11521
                                      disk_info, None, None, 0, feedback_fn)
11522
    info = _GetInstanceInfoText(instance)
11523
    feedback_fn("Creating aditional volumes...")
11524
    # first, create the missing data and meta devices
11525
    for disk in new_disks:
11526
      # unfortunately this is... not too nice
11527
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11528
                            info, True)
11529
      for child in disk.children:
11530
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11531
    # at this stage, all new LVs have been created, we can rename the
11532
    # old ones
11533
    feedback_fn("Renaming original volumes...")
11534
    rename_list = [(o, n.children[0].logical_id)
11535
                   for (o, n) in zip(instance.disks, new_disks)]
11536
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11537
    result.Raise("Failed to rename original LVs")
11538

    
11539
    feedback_fn("Initializing DRBD devices...")
11540
    # all child devices are in place, we can now create the DRBD devices
11541
    for disk in new_disks:
11542
      for node in [pnode, snode]:
11543
        f_create = node == pnode
11544
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11545

    
11546
    # at this point, the instance has been modified
11547
    instance.disk_template = constants.DT_DRBD8
11548
    instance.disks = new_disks
11549
    self.cfg.Update(instance, feedback_fn)
11550

    
11551
    # Release node locks while waiting for sync
11552
    _ReleaseLocks(self, locking.LEVEL_NODE)
11553

    
11554
    # disks are created, waiting for sync
11555
    disk_abort = not _WaitForSync(self, instance,
11556
                                  oneshot=not self.op.wait_for_sync)
11557
    if disk_abort:
11558
      raise errors.OpExecError("There are some degraded disks for"
11559
                               " this instance, please cleanup manually")
11560

    
11561
    # Node resource locks will be released by caller
11562

    
11563
  def _ConvertDrbdToPlain(self, feedback_fn):
11564
    """Converts an instance from drbd to plain.
11565

11566
    """
11567
    instance = self.instance
11568

    
11569
    assert len(instance.secondary_nodes) == 1
11570
    assert instance.disk_template == constants.DT_DRBD8
11571

    
11572
    pnode = instance.primary_node
11573
    snode = instance.secondary_nodes[0]
11574
    feedback_fn("Converting template to plain")
11575

    
11576
    old_disks = instance.disks
11577
    new_disks = [d.children[0] for d in old_disks]
11578

    
11579
    # copy over size and mode
11580
    for parent, child in zip(old_disks, new_disks):
11581
      child.size = parent.size
11582
      child.mode = parent.mode
11583

    
11584
    # update instance structure
11585
    instance.disks = new_disks
11586
    instance.disk_template = constants.DT_PLAIN
11587
    self.cfg.Update(instance, feedback_fn)
11588

    
11589
    # Release locks in case removing disks takes a while
11590
    _ReleaseLocks(self, locking.LEVEL_NODE)
11591

    
11592
    feedback_fn("Removing volumes on the secondary node...")
11593
    for disk in old_disks:
11594
      self.cfg.SetDiskID(disk, snode)
11595
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11596
      if msg:
11597
        self.LogWarning("Could not remove block device %s on node %s,"
11598
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11599

    
11600
    feedback_fn("Removing unneeded volumes on the primary node...")
11601
    for idx, disk in enumerate(old_disks):
11602
      meta = disk.children[1]
11603
      self.cfg.SetDiskID(meta, pnode)
11604
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11605
      if msg:
11606
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11607
                        " continuing anyway: %s", idx, pnode, msg)
11608

    
11609
    # this is a DRBD disk, return its port to the pool
11610
    for disk in old_disks:
11611
      tcp_port = disk.logical_id[2]
11612
      self.cfg.AddTcpUdpPort(tcp_port)
11613

    
11614
    # Node resource locks will be released by caller
11615

    
11616
  def Exec(self, feedback_fn):
11617
    """Modifies an instance.
11618

11619
    All parameters take effect only at the next restart of the instance.
11620

11621
    """
11622
    # Process here the warnings from CheckPrereq, as we don't have a
11623
    # feedback_fn there.
11624
    for warn in self.warn:
11625
      feedback_fn("WARNING: %s" % warn)
11626

    
11627
    assert ((self.op.disk_template is None) ^
11628
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11629
      "Not owning any node resource locks"
11630

    
11631
    result = []
11632
    instance = self.instance
11633
    # disk changes
11634
    for disk_op, disk_dict in self.op.disks:
11635
      if disk_op == constants.DDM_REMOVE:
11636
        # remove the last disk
11637
        device = instance.disks.pop()
11638
        device_idx = len(instance.disks)
11639
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11640
          self.cfg.SetDiskID(disk, node)
11641
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11642
          if msg:
11643
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11644
                            " continuing anyway", device_idx, node, msg)
11645
        result.append(("disk/%d" % device_idx, "remove"))
11646

    
11647
        # if this is a DRBD disk, return its port to the pool
11648
        if device.dev_type in constants.LDS_DRBD:
11649
          tcp_port = device.logical_id[2]
11650
          self.cfg.AddTcpUdpPort(tcp_port)
11651
      elif disk_op == constants.DDM_ADD:
11652
        # add a new disk
11653
        if instance.disk_template in (constants.DT_FILE,
11654
                                        constants.DT_SHARED_FILE):
11655
          file_driver, file_path = instance.disks[0].logical_id
11656
          file_path = os.path.dirname(file_path)
11657
        else:
11658
          file_driver = file_path = None
11659
        disk_idx_base = len(instance.disks)
11660
        new_disk = _GenerateDiskTemplate(self,
11661
                                         instance.disk_template,
11662
                                         instance.name, instance.primary_node,
11663
                                         instance.secondary_nodes,
11664
                                         [disk_dict],
11665
                                         file_path,
11666
                                         file_driver,
11667
                                         disk_idx_base, feedback_fn)[0]
11668
        instance.disks.append(new_disk)
11669
        info = _GetInstanceInfoText(instance)
11670

    
11671
        logging.info("Creating volume %s for instance %s",
11672
                     new_disk.iv_name, instance.name)
11673
        # Note: this needs to be kept in sync with _CreateDisks
11674
        #HARDCODE
11675
        for node in instance.all_nodes:
11676
          f_create = node == instance.primary_node
11677
          try:
11678
            _CreateBlockDev(self, node, instance, new_disk,
11679
                            f_create, info, f_create)
11680
          except errors.OpExecError, err:
11681
            self.LogWarning("Failed to create volume %s (%s) on"
11682
                            " node %s: %s",
11683
                            new_disk.iv_name, new_disk, node, err)
11684
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11685
                       (new_disk.size, new_disk.mode)))
11686
      else:
11687
        # change a given disk
11688
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11689
        result.append(("disk.mode/%d" % disk_op,
11690
                       disk_dict[constants.IDISK_MODE]))
11691

    
11692
    if self.op.disk_template:
11693
      if __debug__:
11694
        check_nodes = set(instance.all_nodes)
11695
        if self.op.remote_node:
11696
          check_nodes.add(self.op.remote_node)
11697
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11698
          owned = self.owned_locks(level)
11699
          assert not (check_nodes - owned), \
11700
            ("Not owning the correct locks, owning %r, expected at least %r" %
11701
             (owned, check_nodes))
11702

    
11703
      r_shut = _ShutdownInstanceDisks(self, instance)
11704
      if not r_shut:
11705
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11706
                                 " proceed with disk template conversion")
11707
      mode = (instance.disk_template, self.op.disk_template)
11708
      try:
11709
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11710
      except:
11711
        self.cfg.ReleaseDRBDMinors(instance.name)
11712
        raise
11713
      result.append(("disk_template", self.op.disk_template))
11714

    
11715
      assert instance.disk_template == self.op.disk_template, \
11716
        ("Expected disk template '%s', found '%s'" %
11717
         (self.op.disk_template, instance.disk_template))
11718

    
11719
    # Release node and resource locks if there are any (they might already have
11720
    # been released during disk conversion)
11721
    _ReleaseLocks(self, locking.LEVEL_NODE)
11722
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11723

    
11724
    # NIC changes
11725
    for nic_op, nic_dict in self.op.nics:
11726
      if nic_op == constants.DDM_REMOVE:
11727
        # remove the last nic
11728
        del instance.nics[-1]
11729
        result.append(("nic.%d" % len(instance.nics), "remove"))
11730
      elif nic_op == constants.DDM_ADD:
11731
        # mac and bridge should be set, by now
11732
        mac = nic_dict[constants.INIC_MAC]
11733
        ip = nic_dict.get(constants.INIC_IP, None)
11734
        nicparams = self.nic_pinst[constants.DDM_ADD]
11735
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11736
        instance.nics.append(new_nic)
11737
        result.append(("nic.%d" % (len(instance.nics) - 1),
11738
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11739
                       (new_nic.mac, new_nic.ip,
11740
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11741
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11742
                       )))
11743
      else:
11744
        for key in (constants.INIC_MAC, constants.INIC_IP):
11745
          if key in nic_dict:
11746
            setattr(instance.nics[nic_op], key, nic_dict[key])
11747
        if nic_op in self.nic_pinst:
11748
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11749
        for key, val in nic_dict.iteritems():
11750
          result.append(("nic.%s/%d" % (key, nic_op), val))
11751

    
11752
    # hvparams changes
11753
    if self.op.hvparams:
11754
      instance.hvparams = self.hv_inst
11755
      for key, val in self.op.hvparams.iteritems():
11756
        result.append(("hv/%s" % key, val))
11757

    
11758
    # beparams changes
11759
    if self.op.beparams:
11760
      instance.beparams = self.be_inst
11761
      for key, val in self.op.beparams.iteritems():
11762
        result.append(("be/%s" % key, val))
11763

    
11764
    # OS change
11765
    if self.op.os_name:
11766
      instance.os = self.op.os_name
11767

    
11768
    # osparams changes
11769
    if self.op.osparams:
11770
      instance.osparams = self.os_inst
11771
      for key, val in self.op.osparams.iteritems():
11772
        result.append(("os/%s" % key, val))
11773

    
11774
    # online/offline instance
11775
    if self.op.online_inst:
11776
      self.cfg.MarkInstanceDown(instance.name)
11777
      result.append(("admin_state", constants.ADMINST_DOWN))
11778
    if self.op.offline_inst:
11779
      self.cfg.MarkInstanceOffline(instance.name)
11780
      result.append(("admin_state", constants.ADMINST_OFFLINE))
11781

    
11782
    self.cfg.Update(instance, feedback_fn)
11783

    
11784
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11785
                self.owned_locks(locking.LEVEL_NODE)), \
11786
      "All node locks should have been released by now"
11787

    
11788
    return result
11789

    
11790
  _DISK_CONVERSIONS = {
11791
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11792
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11793
    }
11794

    
11795

    
11796
class LUInstanceChangeGroup(LogicalUnit):
11797
  HPATH = "instance-change-group"
11798
  HTYPE = constants.HTYPE_INSTANCE
11799
  REQ_BGL = False
11800

    
11801
  def ExpandNames(self):
11802
    self.share_locks = _ShareAll()
11803
    self.needed_locks = {
11804
      locking.LEVEL_NODEGROUP: [],
11805
      locking.LEVEL_NODE: [],
11806
      }
11807

    
11808
    self._ExpandAndLockInstance()
11809

    
11810
    if self.op.target_groups:
11811
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11812
                                  self.op.target_groups)
11813
    else:
11814
      self.req_target_uuids = None
11815

    
11816
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11817

    
11818
  def DeclareLocks(self, level):
11819
    if level == locking.LEVEL_NODEGROUP:
11820
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11821

    
11822
      if self.req_target_uuids:
11823
        lock_groups = set(self.req_target_uuids)
11824

    
11825
        # Lock all groups used by instance optimistically; this requires going
11826
        # via the node before it's locked, requiring verification later on
11827
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11828
        lock_groups.update(instance_groups)
11829
      else:
11830
        # No target groups, need to lock all of them
11831
        lock_groups = locking.ALL_SET
11832

    
11833
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11834

    
11835
    elif level == locking.LEVEL_NODE:
11836
      if self.req_target_uuids:
11837
        # Lock all nodes used by instances
11838
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11839
        self._LockInstancesNodes()
11840

    
11841
        # Lock all nodes in all potential target groups
11842
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11843
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11844
        member_nodes = [node_name
11845
                        for group in lock_groups
11846
                        for node_name in self.cfg.GetNodeGroup(group).members]
11847
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11848
      else:
11849
        # Lock all nodes as all groups are potential targets
11850
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11851

    
11852
  def CheckPrereq(self):
11853
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11854
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11855
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11856

    
11857
    assert (self.req_target_uuids is None or
11858
            owned_groups.issuperset(self.req_target_uuids))
11859
    assert owned_instances == set([self.op.instance_name])
11860

    
11861
    # Get instance information
11862
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11863

    
11864
    # Check if node groups for locked instance are still correct
11865
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11866
      ("Instance %s's nodes changed while we kept the lock" %
11867
       self.op.instance_name)
11868

    
11869
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11870
                                           owned_groups)
11871

    
11872
    if self.req_target_uuids:
11873
      # User requested specific target groups
11874
      self.target_uuids = self.req_target_uuids
11875
    else:
11876
      # All groups except those used by the instance are potential targets
11877
      self.target_uuids = owned_groups - inst_groups
11878

    
11879
    conflicting_groups = self.target_uuids & inst_groups
11880
    if conflicting_groups:
11881
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11882
                                 " used by the instance '%s'" %
11883
                                 (utils.CommaJoin(conflicting_groups),
11884
                                  self.op.instance_name),
11885
                                 errors.ECODE_INVAL)
11886

    
11887
    if not self.target_uuids:
11888
      raise errors.OpPrereqError("There are no possible target groups",
11889
                                 errors.ECODE_INVAL)
11890

    
11891
  def BuildHooksEnv(self):
11892
    """Build hooks env.
11893

11894
    """
11895
    assert self.target_uuids
11896

    
11897
    env = {
11898
      "TARGET_GROUPS": " ".join(self.target_uuids),
11899
      }
11900

    
11901
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11902

    
11903
    return env
11904

    
11905
  def BuildHooksNodes(self):
11906
    """Build hooks nodes.
11907

11908
    """
11909
    mn = self.cfg.GetMasterNode()
11910
    return ([mn], [mn])
11911

    
11912
  def Exec(self, feedback_fn):
11913
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11914

    
11915
    assert instances == [self.op.instance_name], "Instance not locked"
11916

    
11917
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11918
                     instances=instances, target_groups=list(self.target_uuids))
11919

    
11920
    ial.Run(self.op.iallocator)
11921

    
11922
    if not ial.success:
11923
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11924
                                 " instance '%s' using iallocator '%s': %s" %
11925
                                 (self.op.instance_name, self.op.iallocator,
11926
                                  ial.info),
11927
                                 errors.ECODE_NORES)
11928

    
11929
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11930

    
11931
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11932
                 " instance '%s'", len(jobs), self.op.instance_name)
11933

    
11934
    return ResultWithJobs(jobs)
11935

    
11936

    
11937
class LUBackupQuery(NoHooksLU):
11938
  """Query the exports list
11939

11940
  """
11941
  REQ_BGL = False
11942

    
11943
  def ExpandNames(self):
11944
    self.needed_locks = {}
11945
    self.share_locks[locking.LEVEL_NODE] = 1
11946
    if not self.op.nodes:
11947
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11948
    else:
11949
      self.needed_locks[locking.LEVEL_NODE] = \
11950
        _GetWantedNodes(self, self.op.nodes)
11951

    
11952
  def Exec(self, feedback_fn):
11953
    """Compute the list of all the exported system images.
11954

11955
    @rtype: dict
11956
    @return: a dictionary with the structure node->(export-list)
11957
        where export-list is a list of the instances exported on
11958
        that node.
11959

11960
    """
11961
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11962
    rpcresult = self.rpc.call_export_list(self.nodes)
11963
    result = {}
11964
    for node in rpcresult:
11965
      if rpcresult[node].fail_msg:
11966
        result[node] = False
11967
      else:
11968
        result[node] = rpcresult[node].payload
11969

    
11970
    return result
11971

    
11972

    
11973
class LUBackupPrepare(NoHooksLU):
11974
  """Prepares an instance for an export and returns useful information.
11975

11976
  """
11977
  REQ_BGL = False
11978

    
11979
  def ExpandNames(self):
11980
    self._ExpandAndLockInstance()
11981

    
11982
  def CheckPrereq(self):
11983
    """Check prerequisites.
11984

11985
    """
11986
    instance_name = self.op.instance_name
11987

    
11988
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11989
    assert self.instance is not None, \
11990
          "Cannot retrieve locked instance %s" % self.op.instance_name
11991
    _CheckNodeOnline(self, self.instance.primary_node)
11992

    
11993
    self._cds = _GetClusterDomainSecret()
11994

    
11995
  def Exec(self, feedback_fn):
11996
    """Prepares an instance for an export.
11997

11998
    """
11999
    instance = self.instance
12000

    
12001
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12002
      salt = utils.GenerateSecret(8)
12003

    
12004
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
12005
      result = self.rpc.call_x509_cert_create(instance.primary_node,
12006
                                              constants.RIE_CERT_VALIDITY)
12007
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
12008

    
12009
      (name, cert_pem) = result.payload
12010

    
12011
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
12012
                                             cert_pem)
12013

    
12014
      return {
12015
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
12016
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
12017
                          salt),
12018
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
12019
        }
12020

    
12021
    return None
12022

    
12023

    
12024
class LUBackupExport(LogicalUnit):
12025
  """Export an instance to an image in the cluster.
12026

12027
  """
12028
  HPATH = "instance-export"
12029
  HTYPE = constants.HTYPE_INSTANCE
12030
  REQ_BGL = False
12031

    
12032
  def CheckArguments(self):
12033
    """Check the arguments.
12034

12035
    """
12036
    self.x509_key_name = self.op.x509_key_name
12037
    self.dest_x509_ca_pem = self.op.destination_x509_ca
12038

    
12039
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
12040
      if not self.x509_key_name:
12041
        raise errors.OpPrereqError("Missing X509 key name for encryption",
12042
                                   errors.ECODE_INVAL)
12043

    
12044
      if not self.dest_x509_ca_pem:
12045
        raise errors.OpPrereqError("Missing destination X509 CA",
12046
                                   errors.ECODE_INVAL)
12047

    
12048
  def ExpandNames(self):
12049
    self._ExpandAndLockInstance()
12050

    
12051
    # Lock all nodes for local exports
12052
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12053
      # FIXME: lock only instance primary and destination node
12054
      #
12055
      # Sad but true, for now we have do lock all nodes, as we don't know where
12056
      # the previous export might be, and in this LU we search for it and
12057
      # remove it from its current node. In the future we could fix this by:
12058
      #  - making a tasklet to search (share-lock all), then create the
12059
      #    new one, then one to remove, after
12060
      #  - removing the removal operation altogether
12061
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12062

    
12063
  def DeclareLocks(self, level):
12064
    """Last minute lock declaration."""
12065
    # All nodes are locked anyway, so nothing to do here.
12066

    
12067
  def BuildHooksEnv(self):
12068
    """Build hooks env.
12069

12070
    This will run on the master, primary node and target node.
12071

12072
    """
12073
    env = {
12074
      "EXPORT_MODE": self.op.mode,
12075
      "EXPORT_NODE": self.op.target_node,
12076
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
12077
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
12078
      # TODO: Generic function for boolean env variables
12079
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
12080
      }
12081

    
12082
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
12083

    
12084
    return env
12085

    
12086
  def BuildHooksNodes(self):
12087
    """Build hooks nodes.
12088

12089
    """
12090
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
12091

    
12092
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12093
      nl.append(self.op.target_node)
12094

    
12095
    return (nl, nl)
12096

    
12097
  def CheckPrereq(self):
12098
    """Check prerequisites.
12099

12100
    This checks that the instance and node names are valid.
12101

12102
    """
12103
    instance_name = self.op.instance_name
12104

    
12105
    self.instance = self.cfg.GetInstanceInfo(instance_name)
12106
    assert self.instance is not None, \
12107
          "Cannot retrieve locked instance %s" % self.op.instance_name
12108
    _CheckNodeOnline(self, self.instance.primary_node)
12109

    
12110
    if (self.op.remove_instance and
12111
        self.instance.admin_state == constants.ADMINST_UP and
12112
        not self.op.shutdown):
12113
      raise errors.OpPrereqError("Can not remove instance without shutting it"
12114
                                 " down before")
12115

    
12116
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12117
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
12118
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
12119
      assert self.dst_node is not None
12120

    
12121
      _CheckNodeOnline(self, self.dst_node.name)
12122
      _CheckNodeNotDrained(self, self.dst_node.name)
12123

    
12124
      self._cds = None
12125
      self.dest_disk_info = None
12126
      self.dest_x509_ca = None
12127

    
12128
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12129
      self.dst_node = None
12130

    
12131
      if len(self.op.target_node) != len(self.instance.disks):
12132
        raise errors.OpPrereqError(("Received destination information for %s"
12133
                                    " disks, but instance %s has %s disks") %
12134
                                   (len(self.op.target_node), instance_name,
12135
                                    len(self.instance.disks)),
12136
                                   errors.ECODE_INVAL)
12137

    
12138
      cds = _GetClusterDomainSecret()
12139

    
12140
      # Check X509 key name
12141
      try:
12142
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
12143
      except (TypeError, ValueError), err:
12144
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
12145

    
12146
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
12147
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
12148
                                   errors.ECODE_INVAL)
12149

    
12150
      # Load and verify CA
12151
      try:
12152
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12153
      except OpenSSL.crypto.Error, err:
12154
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12155
                                   (err, ), errors.ECODE_INVAL)
12156

    
12157
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12158
      if errcode is not None:
12159
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12160
                                   (msg, ), errors.ECODE_INVAL)
12161

    
12162
      self.dest_x509_ca = cert
12163

    
12164
      # Verify target information
12165
      disk_info = []
12166
      for idx, disk_data in enumerate(self.op.target_node):
12167
        try:
12168
          (host, port, magic) = \
12169
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12170
        except errors.GenericError, err:
12171
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12172
                                     (idx, err), errors.ECODE_INVAL)
12173

    
12174
        disk_info.append((host, port, magic))
12175

    
12176
      assert len(disk_info) == len(self.op.target_node)
12177
      self.dest_disk_info = disk_info
12178

    
12179
    else:
12180
      raise errors.ProgrammerError("Unhandled export mode %r" %
12181
                                   self.op.mode)
12182

    
12183
    # instance disk type verification
12184
    # TODO: Implement export support for file-based disks
12185
    for disk in self.instance.disks:
12186
      if disk.dev_type == constants.LD_FILE:
12187
        raise errors.OpPrereqError("Export not supported for instances with"
12188
                                   " file-based disks", errors.ECODE_INVAL)
12189

    
12190
  def _CleanupExports(self, feedback_fn):
12191
    """Removes exports of current instance from all other nodes.
12192

12193
    If an instance in a cluster with nodes A..D was exported to node C, its
12194
    exports will be removed from the nodes A, B and D.
12195

12196
    """
12197
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12198

    
12199
    nodelist = self.cfg.GetNodeList()
12200
    nodelist.remove(self.dst_node.name)
12201

    
12202
    # on one-node clusters nodelist will be empty after the removal
12203
    # if we proceed the backup would be removed because OpBackupQuery
12204
    # substitutes an empty list with the full cluster node list.
12205
    iname = self.instance.name
12206
    if nodelist:
12207
      feedback_fn("Removing old exports for instance %s" % iname)
12208
      exportlist = self.rpc.call_export_list(nodelist)
12209
      for node in exportlist:
12210
        if exportlist[node].fail_msg:
12211
          continue
12212
        if iname in exportlist[node].payload:
12213
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12214
          if msg:
12215
            self.LogWarning("Could not remove older export for instance %s"
12216
                            " on node %s: %s", iname, node, msg)
12217

    
12218
  def Exec(self, feedback_fn):
12219
    """Export an instance to an image in the cluster.
12220

12221
    """
12222
    assert self.op.mode in constants.EXPORT_MODES
12223

    
12224
    instance = self.instance
12225
    src_node = instance.primary_node
12226

    
12227
    if self.op.shutdown:
12228
      # shutdown the instance, but not the disks
12229
      feedback_fn("Shutting down instance %s" % instance.name)
12230
      result = self.rpc.call_instance_shutdown(src_node, instance,
12231
                                               self.op.shutdown_timeout)
12232
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12233
      result.Raise("Could not shutdown instance %s on"
12234
                   " node %s" % (instance.name, src_node))
12235

    
12236
    # set the disks ID correctly since call_instance_start needs the
12237
    # correct drbd minor to create the symlinks
12238
    for disk in instance.disks:
12239
      self.cfg.SetDiskID(disk, src_node)
12240

    
12241
    activate_disks = (instance.admin_state != constants.ADMINST_UP)
12242

    
12243
    if activate_disks:
12244
      # Activate the instance disks if we'exporting a stopped instance
12245
      feedback_fn("Activating disks for %s" % instance.name)
12246
      _StartInstanceDisks(self, instance, None)
12247

    
12248
    try:
12249
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12250
                                                     instance)
12251

    
12252
      helper.CreateSnapshots()
12253
      try:
12254
        if (self.op.shutdown and
12255
            instance.admin_state == constants.ADMINST_UP and
12256
            not self.op.remove_instance):
12257
          assert not activate_disks
12258
          feedback_fn("Starting instance %s" % instance.name)
12259
          result = self.rpc.call_instance_start(src_node,
12260
                                                (instance, None, None), False)
12261
          msg = result.fail_msg
12262
          if msg:
12263
            feedback_fn("Failed to start instance: %s" % msg)
12264
            _ShutdownInstanceDisks(self, instance)
12265
            raise errors.OpExecError("Could not start instance: %s" % msg)
12266

    
12267
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12268
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12269
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12270
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12271
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12272

    
12273
          (key_name, _, _) = self.x509_key_name
12274

    
12275
          dest_ca_pem = \
12276
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12277
                                            self.dest_x509_ca)
12278

    
12279
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12280
                                                     key_name, dest_ca_pem,
12281
                                                     timeouts)
12282
      finally:
12283
        helper.Cleanup()
12284

    
12285
      # Check for backwards compatibility
12286
      assert len(dresults) == len(instance.disks)
12287
      assert compat.all(isinstance(i, bool) for i in dresults), \
12288
             "Not all results are boolean: %r" % dresults
12289

    
12290
    finally:
12291
      if activate_disks:
12292
        feedback_fn("Deactivating disks for %s" % instance.name)
12293
        _ShutdownInstanceDisks(self, instance)
12294

    
12295
    if not (compat.all(dresults) and fin_resu):
12296
      failures = []
12297
      if not fin_resu:
12298
        failures.append("export finalization")
12299
      if not compat.all(dresults):
12300
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12301
                               if not dsk)
12302
        failures.append("disk export: disk(s) %s" % fdsk)
12303

    
12304
      raise errors.OpExecError("Export failed, errors in %s" %
12305
                               utils.CommaJoin(failures))
12306

    
12307
    # At this point, the export was successful, we can cleanup/finish
12308

    
12309
    # Remove instance if requested
12310
    if self.op.remove_instance:
12311
      feedback_fn("Removing instance %s" % instance.name)
12312
      _RemoveInstance(self, feedback_fn, instance,
12313
                      self.op.ignore_remove_failures)
12314

    
12315
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12316
      self._CleanupExports(feedback_fn)
12317

    
12318
    return fin_resu, dresults
12319

    
12320

    
12321
class LUBackupRemove(NoHooksLU):
12322
  """Remove exports related to the named instance.
12323

12324
  """
12325
  REQ_BGL = False
12326

    
12327
  def ExpandNames(self):
12328
    self.needed_locks = {}
12329
    # We need all nodes to be locked in order for RemoveExport to work, but we
12330
    # don't need to lock the instance itself, as nothing will happen to it (and
12331
    # we can remove exports also for a removed instance)
12332
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12333

    
12334
  def Exec(self, feedback_fn):
12335
    """Remove any export.
12336

12337
    """
12338
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12339
    # If the instance was not found we'll try with the name that was passed in.
12340
    # This will only work if it was an FQDN, though.
12341
    fqdn_warn = False
12342
    if not instance_name:
12343
      fqdn_warn = True
12344
      instance_name = self.op.instance_name
12345

    
12346
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12347
    exportlist = self.rpc.call_export_list(locked_nodes)
12348
    found = False
12349
    for node in exportlist:
12350
      msg = exportlist[node].fail_msg
12351
      if msg:
12352
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12353
        continue
12354
      if instance_name in exportlist[node].payload:
12355
        found = True
12356
        result = self.rpc.call_export_remove(node, instance_name)
12357
        msg = result.fail_msg
12358
        if msg:
12359
          logging.error("Could not remove export for instance %s"
12360
                        " on node %s: %s", instance_name, node, msg)
12361

    
12362
    if fqdn_warn and not found:
12363
      feedback_fn("Export not found. If trying to remove an export belonging"
12364
                  " to a deleted instance please use its Fully Qualified"
12365
                  " Domain Name.")
12366

    
12367

    
12368
class LUGroupAdd(LogicalUnit):
12369
  """Logical unit for creating node groups.
12370

12371
  """
12372
  HPATH = "group-add"
12373
  HTYPE = constants.HTYPE_GROUP
12374
  REQ_BGL = False
12375

    
12376
  def ExpandNames(self):
12377
    # We need the new group's UUID here so that we can create and acquire the
12378
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12379
    # that it should not check whether the UUID exists in the configuration.
12380
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12381
    self.needed_locks = {}
12382
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12383

    
12384
  def CheckPrereq(self):
12385
    """Check prerequisites.
12386

12387
    This checks that the given group name is not an existing node group
12388
    already.
12389

12390
    """
12391
    try:
12392
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12393
    except errors.OpPrereqError:
12394
      pass
12395
    else:
12396
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12397
                                 " node group (UUID: %s)" %
12398
                                 (self.op.group_name, existing_uuid),
12399
                                 errors.ECODE_EXISTS)
12400

    
12401
    if self.op.ndparams:
12402
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12403

    
12404
  def BuildHooksEnv(self):
12405
    """Build hooks env.
12406

12407
    """
12408
    return {
12409
      "GROUP_NAME": self.op.group_name,
12410
      }
12411

    
12412
  def BuildHooksNodes(self):
12413
    """Build hooks nodes.
12414

12415
    """
12416
    mn = self.cfg.GetMasterNode()
12417
    return ([mn], [mn])
12418

    
12419
  def Exec(self, feedback_fn):
12420
    """Add the node group to the cluster.
12421

12422
    """
12423
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12424
                                  uuid=self.group_uuid,
12425
                                  alloc_policy=self.op.alloc_policy,
12426
                                  ndparams=self.op.ndparams)
12427

    
12428
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12429
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12430

    
12431

    
12432
class LUGroupAssignNodes(NoHooksLU):
12433
  """Logical unit for assigning nodes to groups.
12434

12435
  """
12436
  REQ_BGL = False
12437

    
12438
  def ExpandNames(self):
12439
    # These raise errors.OpPrereqError on their own:
12440
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12441
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12442

    
12443
    # We want to lock all the affected nodes and groups. We have readily
12444
    # available the list of nodes, and the *destination* group. To gather the
12445
    # list of "source" groups, we need to fetch node information later on.
12446
    self.needed_locks = {
12447
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12448
      locking.LEVEL_NODE: self.op.nodes,
12449
      }
12450

    
12451
  def DeclareLocks(self, level):
12452
    if level == locking.LEVEL_NODEGROUP:
12453
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12454

    
12455
      # Try to get all affected nodes' groups without having the group or node
12456
      # lock yet. Needs verification later in the code flow.
12457
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12458

    
12459
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12460

    
12461
  def CheckPrereq(self):
12462
    """Check prerequisites.
12463

12464
    """
12465
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12466
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12467
            frozenset(self.op.nodes))
12468

    
12469
    expected_locks = (set([self.group_uuid]) |
12470
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12471
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12472
    if actual_locks != expected_locks:
12473
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12474
                               " current groups are '%s', used to be '%s'" %
12475
                               (utils.CommaJoin(expected_locks),
12476
                                utils.CommaJoin(actual_locks)))
12477

    
12478
    self.node_data = self.cfg.GetAllNodesInfo()
12479
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12480
    instance_data = self.cfg.GetAllInstancesInfo()
12481

    
12482
    if self.group is None:
12483
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12484
                               (self.op.group_name, self.group_uuid))
12485

    
12486
    (new_splits, previous_splits) = \
12487
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12488
                                             for node in self.op.nodes],
12489
                                            self.node_data, instance_data)
12490

    
12491
    if new_splits:
12492
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12493

    
12494
      if not self.op.force:
12495
        raise errors.OpExecError("The following instances get split by this"
12496
                                 " change and --force was not given: %s" %
12497
                                 fmt_new_splits)
12498
      else:
12499
        self.LogWarning("This operation will split the following instances: %s",
12500
                        fmt_new_splits)
12501

    
12502
        if previous_splits:
12503
          self.LogWarning("In addition, these already-split instances continue"
12504
                          " to be split across groups: %s",
12505
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12506

    
12507
  def Exec(self, feedback_fn):
12508
    """Assign nodes to a new group.
12509

12510
    """
12511
    for node in self.op.nodes:
12512
      self.node_data[node].group = self.group_uuid
12513

    
12514
    # FIXME: Depends on side-effects of modifying the result of
12515
    # C{cfg.GetAllNodesInfo}
12516

    
12517
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12518

    
12519
  @staticmethod
12520
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12521
    """Check for split instances after a node assignment.
12522

12523
    This method considers a series of node assignments as an atomic operation,
12524
    and returns information about split instances after applying the set of
12525
    changes.
12526

12527
    In particular, it returns information about newly split instances, and
12528
    instances that were already split, and remain so after the change.
12529

12530
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12531
    considered.
12532

12533
    @type changes: list of (node_name, new_group_uuid) pairs.
12534
    @param changes: list of node assignments to consider.
12535
    @param node_data: a dict with data for all nodes
12536
    @param instance_data: a dict with all instances to consider
12537
    @rtype: a two-tuple
12538
    @return: a list of instances that were previously okay and result split as a
12539
      consequence of this change, and a list of instances that were previously
12540
      split and this change does not fix.
12541

12542
    """
12543
    changed_nodes = dict((node, group) for node, group in changes
12544
                         if node_data[node].group != group)
12545

    
12546
    all_split_instances = set()
12547
    previously_split_instances = set()
12548

    
12549
    def InstanceNodes(instance):
12550
      return [instance.primary_node] + list(instance.secondary_nodes)
12551

    
12552
    for inst in instance_data.values():
12553
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12554
        continue
12555

    
12556
      instance_nodes = InstanceNodes(inst)
12557

    
12558
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12559
        previously_split_instances.add(inst.name)
12560

    
12561
      if len(set(changed_nodes.get(node, node_data[node].group)
12562
                 for node in instance_nodes)) > 1:
12563
        all_split_instances.add(inst.name)
12564

    
12565
    return (list(all_split_instances - previously_split_instances),
12566
            list(previously_split_instances & all_split_instances))
12567

    
12568

    
12569
class _GroupQuery(_QueryBase):
12570
  FIELDS = query.GROUP_FIELDS
12571

    
12572
  def ExpandNames(self, lu):
12573
    lu.needed_locks = {}
12574

    
12575
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12576
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12577

    
12578
    if not self.names:
12579
      self.wanted = [name_to_uuid[name]
12580
                     for name in utils.NiceSort(name_to_uuid.keys())]
12581
    else:
12582
      # Accept names to be either names or UUIDs.
12583
      missing = []
12584
      self.wanted = []
12585
      all_uuid = frozenset(self._all_groups.keys())
12586

    
12587
      for name in self.names:
12588
        if name in all_uuid:
12589
          self.wanted.append(name)
12590
        elif name in name_to_uuid:
12591
          self.wanted.append(name_to_uuid[name])
12592
        else:
12593
          missing.append(name)
12594

    
12595
      if missing:
12596
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12597
                                   utils.CommaJoin(missing),
12598
                                   errors.ECODE_NOENT)
12599

    
12600
  def DeclareLocks(self, lu, level):
12601
    pass
12602

    
12603
  def _GetQueryData(self, lu):
12604
    """Computes the list of node groups and their attributes.
12605

12606
    """
12607
    do_nodes = query.GQ_NODE in self.requested_data
12608
    do_instances = query.GQ_INST in self.requested_data
12609

    
12610
    group_to_nodes = None
12611
    group_to_instances = None
12612

    
12613
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12614
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12615
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12616
    # instance->node. Hence, we will need to process nodes even if we only need
12617
    # instance information.
12618
    if do_nodes or do_instances:
12619
      all_nodes = lu.cfg.GetAllNodesInfo()
12620
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12621
      node_to_group = {}
12622

    
12623
      for node in all_nodes.values():
12624
        if node.group in group_to_nodes:
12625
          group_to_nodes[node.group].append(node.name)
12626
          node_to_group[node.name] = node.group
12627

    
12628
      if do_instances:
12629
        all_instances = lu.cfg.GetAllInstancesInfo()
12630
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12631

    
12632
        for instance in all_instances.values():
12633
          node = instance.primary_node
12634
          if node in node_to_group:
12635
            group_to_instances[node_to_group[node]].append(instance.name)
12636

    
12637
        if not do_nodes:
12638
          # Do not pass on node information if it was not requested.
12639
          group_to_nodes = None
12640

    
12641
    return query.GroupQueryData([self._all_groups[uuid]
12642
                                 for uuid in self.wanted],
12643
                                group_to_nodes, group_to_instances)
12644

    
12645

    
12646
class LUGroupQuery(NoHooksLU):
12647
  """Logical unit for querying node groups.
12648

12649
  """
12650
  REQ_BGL = False
12651

    
12652
  def CheckArguments(self):
12653
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12654
                          self.op.output_fields, False)
12655

    
12656
  def ExpandNames(self):
12657
    self.gq.ExpandNames(self)
12658

    
12659
  def DeclareLocks(self, level):
12660
    self.gq.DeclareLocks(self, level)
12661

    
12662
  def Exec(self, feedback_fn):
12663
    return self.gq.OldStyleQuery(self)
12664

    
12665

    
12666
class LUGroupSetParams(LogicalUnit):
12667
  """Modifies the parameters of a node group.
12668

12669
  """
12670
  HPATH = "group-modify"
12671
  HTYPE = constants.HTYPE_GROUP
12672
  REQ_BGL = False
12673

    
12674
  def CheckArguments(self):
12675
    all_changes = [
12676
      self.op.ndparams,
12677
      self.op.alloc_policy,
12678
      ]
12679

    
12680
    if all_changes.count(None) == len(all_changes):
12681
      raise errors.OpPrereqError("Please pass at least one modification",
12682
                                 errors.ECODE_INVAL)
12683

    
12684
  def ExpandNames(self):
12685
    # This raises errors.OpPrereqError on its own:
12686
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12687

    
12688
    self.needed_locks = {
12689
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12690
      }
12691

    
12692
  def CheckPrereq(self):
12693
    """Check prerequisites.
12694

12695
    """
12696
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12697

    
12698
    if self.group is None:
12699
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12700
                               (self.op.group_name, self.group_uuid))
12701

    
12702
    if self.op.ndparams:
12703
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12704
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12705
      self.new_ndparams = new_ndparams
12706

    
12707
  def BuildHooksEnv(self):
12708
    """Build hooks env.
12709

12710
    """
12711
    return {
12712
      "GROUP_NAME": self.op.group_name,
12713
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12714
      }
12715

    
12716
  def BuildHooksNodes(self):
12717
    """Build hooks nodes.
12718

12719
    """
12720
    mn = self.cfg.GetMasterNode()
12721
    return ([mn], [mn])
12722

    
12723
  def Exec(self, feedback_fn):
12724
    """Modifies the node group.
12725

12726
    """
12727
    result = []
12728

    
12729
    if self.op.ndparams:
12730
      self.group.ndparams = self.new_ndparams
12731
      result.append(("ndparams", str(self.group.ndparams)))
12732

    
12733
    if self.op.alloc_policy:
12734
      self.group.alloc_policy = self.op.alloc_policy
12735

    
12736
    self.cfg.Update(self.group, feedback_fn)
12737
    return result
12738

    
12739

    
12740
class LUGroupRemove(LogicalUnit):
12741
  HPATH = "group-remove"
12742
  HTYPE = constants.HTYPE_GROUP
12743
  REQ_BGL = False
12744

    
12745
  def ExpandNames(self):
12746
    # This will raises errors.OpPrereqError on its own:
12747
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12748
    self.needed_locks = {
12749
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12750
      }
12751

    
12752
  def CheckPrereq(self):
12753
    """Check prerequisites.
12754

12755
    This checks that the given group name exists as a node group, that is
12756
    empty (i.e., contains no nodes), and that is not the last group of the
12757
    cluster.
12758

12759
    """
12760
    # Verify that the group is empty.
12761
    group_nodes = [node.name
12762
                   for node in self.cfg.GetAllNodesInfo().values()
12763
                   if node.group == self.group_uuid]
12764

    
12765
    if group_nodes:
12766
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12767
                                 " nodes: %s" %
12768
                                 (self.op.group_name,
12769
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12770
                                 errors.ECODE_STATE)
12771

    
12772
    # Verify the cluster would not be left group-less.
12773
    if len(self.cfg.GetNodeGroupList()) == 1:
12774
      raise errors.OpPrereqError("Group '%s' is the only group,"
12775
                                 " cannot be removed" %
12776
                                 self.op.group_name,
12777
                                 errors.ECODE_STATE)
12778

    
12779
  def BuildHooksEnv(self):
12780
    """Build hooks env.
12781

12782
    """
12783
    return {
12784
      "GROUP_NAME": self.op.group_name,
12785
      }
12786

    
12787
  def BuildHooksNodes(self):
12788
    """Build hooks nodes.
12789

12790
    """
12791
    mn = self.cfg.GetMasterNode()
12792
    return ([mn], [mn])
12793

    
12794
  def Exec(self, feedback_fn):
12795
    """Remove the node group.
12796

12797
    """
12798
    try:
12799
      self.cfg.RemoveNodeGroup(self.group_uuid)
12800
    except errors.ConfigurationError:
12801
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12802
                               (self.op.group_name, self.group_uuid))
12803

    
12804
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12805

    
12806

    
12807
class LUGroupRename(LogicalUnit):
12808
  HPATH = "group-rename"
12809
  HTYPE = constants.HTYPE_GROUP
12810
  REQ_BGL = False
12811

    
12812
  def ExpandNames(self):
12813
    # This raises errors.OpPrereqError on its own:
12814
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12815

    
12816
    self.needed_locks = {
12817
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12818
      }
12819

    
12820
  def CheckPrereq(self):
12821
    """Check prerequisites.
12822

12823
    Ensures requested new name is not yet used.
12824

12825
    """
12826
    try:
12827
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12828
    except errors.OpPrereqError:
12829
      pass
12830
    else:
12831
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12832
                                 " node group (UUID: %s)" %
12833
                                 (self.op.new_name, new_name_uuid),
12834
                                 errors.ECODE_EXISTS)
12835

    
12836
  def BuildHooksEnv(self):
12837
    """Build hooks env.
12838

12839
    """
12840
    return {
12841
      "OLD_NAME": self.op.group_name,
12842
      "NEW_NAME": self.op.new_name,
12843
      }
12844

    
12845
  def BuildHooksNodes(self):
12846
    """Build hooks nodes.
12847

12848
    """
12849
    mn = self.cfg.GetMasterNode()
12850

    
12851
    all_nodes = self.cfg.GetAllNodesInfo()
12852
    all_nodes.pop(mn, None)
12853

    
12854
    run_nodes = [mn]
12855
    run_nodes.extend(node.name for node in all_nodes.values()
12856
                     if node.group == self.group_uuid)
12857

    
12858
    return (run_nodes, run_nodes)
12859

    
12860
  def Exec(self, feedback_fn):
12861
    """Rename the node group.
12862

12863
    """
12864
    group = self.cfg.GetNodeGroup(self.group_uuid)
12865

    
12866
    if group is None:
12867
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12868
                               (self.op.group_name, self.group_uuid))
12869

    
12870
    group.name = self.op.new_name
12871
    self.cfg.Update(group, feedback_fn)
12872

    
12873
    return self.op.new_name
12874

    
12875

    
12876
class LUGroupEvacuate(LogicalUnit):
12877
  HPATH = "group-evacuate"
12878
  HTYPE = constants.HTYPE_GROUP
12879
  REQ_BGL = False
12880

    
12881
  def ExpandNames(self):
12882
    # This raises errors.OpPrereqError on its own:
12883
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12884

    
12885
    if self.op.target_groups:
12886
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12887
                                  self.op.target_groups)
12888
    else:
12889
      self.req_target_uuids = []
12890

    
12891
    if self.group_uuid in self.req_target_uuids:
12892
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12893
                                 " as a target group (targets are %s)" %
12894
                                 (self.group_uuid,
12895
                                  utils.CommaJoin(self.req_target_uuids)),
12896
                                 errors.ECODE_INVAL)
12897

    
12898
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12899

    
12900
    self.share_locks = _ShareAll()
12901
    self.needed_locks = {
12902
      locking.LEVEL_INSTANCE: [],
12903
      locking.LEVEL_NODEGROUP: [],
12904
      locking.LEVEL_NODE: [],
12905
      }
12906

    
12907
  def DeclareLocks(self, level):
12908
    if level == locking.LEVEL_INSTANCE:
12909
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12910

    
12911
      # Lock instances optimistically, needs verification once node and group
12912
      # locks have been acquired
12913
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12914
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12915

    
12916
    elif level == locking.LEVEL_NODEGROUP:
12917
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12918

    
12919
      if self.req_target_uuids:
12920
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12921

    
12922
        # Lock all groups used by instances optimistically; this requires going
12923
        # via the node before it's locked, requiring verification later on
12924
        lock_groups.update(group_uuid
12925
                           for instance_name in
12926
                             self.owned_locks(locking.LEVEL_INSTANCE)
12927
                           for group_uuid in
12928
                             self.cfg.GetInstanceNodeGroups(instance_name))
12929
      else:
12930
        # No target groups, need to lock all of them
12931
        lock_groups = locking.ALL_SET
12932

    
12933
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12934

    
12935
    elif level == locking.LEVEL_NODE:
12936
      # This will only lock the nodes in the group to be evacuated which
12937
      # contain actual instances
12938
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12939
      self._LockInstancesNodes()
12940

    
12941
      # Lock all nodes in group to be evacuated and target groups
12942
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12943
      assert self.group_uuid in owned_groups
12944
      member_nodes = [node_name
12945
                      for group in owned_groups
12946
                      for node_name in self.cfg.GetNodeGroup(group).members]
12947
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12948

    
12949
  def CheckPrereq(self):
12950
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12951
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12952
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12953

    
12954
    assert owned_groups.issuperset(self.req_target_uuids)
12955
    assert self.group_uuid in owned_groups
12956

    
12957
    # Check if locked instances are still correct
12958
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12959

    
12960
    # Get instance information
12961
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12962

    
12963
    # Check if node groups for locked instances are still correct
12964
    for instance_name in owned_instances:
12965
      inst = self.instances[instance_name]
12966
      assert owned_nodes.issuperset(inst.all_nodes), \
12967
        "Instance %s's nodes changed while we kept the lock" % instance_name
12968

    
12969
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12970
                                             owned_groups)
12971

    
12972
      assert self.group_uuid in inst_groups, \
12973
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12974

    
12975
    if self.req_target_uuids:
12976
      # User requested specific target groups
12977
      self.target_uuids = self.req_target_uuids
12978
    else:
12979
      # All groups except the one to be evacuated are potential targets
12980
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12981
                           if group_uuid != self.group_uuid]
12982

    
12983
      if not self.target_uuids:
12984
        raise errors.OpPrereqError("There are no possible target groups",
12985
                                   errors.ECODE_INVAL)
12986

    
12987
  def BuildHooksEnv(self):
12988
    """Build hooks env.
12989

12990
    """
12991
    return {
12992
      "GROUP_NAME": self.op.group_name,
12993
      "TARGET_GROUPS": " ".join(self.target_uuids),
12994
      }
12995

    
12996
  def BuildHooksNodes(self):
12997
    """Build hooks nodes.
12998

12999
    """
13000
    mn = self.cfg.GetMasterNode()
13001

    
13002
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
13003

    
13004
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
13005

    
13006
    return (run_nodes, run_nodes)
13007

    
13008
  def Exec(self, feedback_fn):
13009
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
13010

    
13011
    assert self.group_uuid not in self.target_uuids
13012

    
13013
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
13014
                     instances=instances, target_groups=self.target_uuids)
13015

    
13016
    ial.Run(self.op.iallocator)
13017

    
13018
    if not ial.success:
13019
      raise errors.OpPrereqError("Can't compute group evacuation using"
13020
                                 " iallocator '%s': %s" %
13021
                                 (self.op.iallocator, ial.info),
13022
                                 errors.ECODE_NORES)
13023

    
13024
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
13025

    
13026
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
13027
                 len(jobs), self.op.group_name)
13028

    
13029
    return ResultWithJobs(jobs)
13030

    
13031

    
13032
class TagsLU(NoHooksLU): # pylint: disable=W0223
13033
  """Generic tags LU.
13034

13035
  This is an abstract class which is the parent of all the other tags LUs.
13036

13037
  """
13038
  def ExpandNames(self):
13039
    self.group_uuid = None
13040
    self.needed_locks = {}
13041
    if self.op.kind == constants.TAG_NODE:
13042
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
13043
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
13044
    elif self.op.kind == constants.TAG_INSTANCE:
13045
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
13046
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
13047
    elif self.op.kind == constants.TAG_NODEGROUP:
13048
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
13049

    
13050
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
13051
    # not possible to acquire the BGL based on opcode parameters)
13052

    
13053
  def CheckPrereq(self):
13054
    """Check prerequisites.
13055

13056
    """
13057
    if self.op.kind == constants.TAG_CLUSTER:
13058
      self.target = self.cfg.GetClusterInfo()
13059
    elif self.op.kind == constants.TAG_NODE:
13060
      self.target = self.cfg.GetNodeInfo(self.op.name)
13061
    elif self.op.kind == constants.TAG_INSTANCE:
13062
      self.target = self.cfg.GetInstanceInfo(self.op.name)
13063
    elif self.op.kind == constants.TAG_NODEGROUP:
13064
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
13065
    else:
13066
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
13067
                                 str(self.op.kind), errors.ECODE_INVAL)
13068

    
13069

    
13070
class LUTagsGet(TagsLU):
13071
  """Returns the tags of a given object.
13072

13073
  """
13074
  REQ_BGL = False
13075

    
13076
  def ExpandNames(self):
13077
    TagsLU.ExpandNames(self)
13078

    
13079
    # Share locks as this is only a read operation
13080
    self.share_locks = _ShareAll()
13081

    
13082
  def Exec(self, feedback_fn):
13083
    """Returns the tag list.
13084

13085
    """
13086
    return list(self.target.GetTags())
13087

    
13088

    
13089
class LUTagsSearch(NoHooksLU):
13090
  """Searches the tags for a given pattern.
13091

13092
  """
13093
  REQ_BGL = False
13094

    
13095
  def ExpandNames(self):
13096
    self.needed_locks = {}
13097

    
13098
  def CheckPrereq(self):
13099
    """Check prerequisites.
13100

13101
    This checks the pattern passed for validity by compiling it.
13102

13103
    """
13104
    try:
13105
      self.re = re.compile(self.op.pattern)
13106
    except re.error, err:
13107
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
13108
                                 (self.op.pattern, err), errors.ECODE_INVAL)
13109

    
13110
  def Exec(self, feedback_fn):
13111
    """Returns the tag list.
13112

13113
    """
13114
    cfg = self.cfg
13115
    tgts = [("/cluster", cfg.GetClusterInfo())]
13116
    ilist = cfg.GetAllInstancesInfo().values()
13117
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
13118
    nlist = cfg.GetAllNodesInfo().values()
13119
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
13120
    tgts.extend(("/nodegroup/%s" % n.name, n)
13121
                for n in cfg.GetAllNodeGroupsInfo().values())
13122
    results = []
13123
    for path, target in tgts:
13124
      for tag in target.GetTags():
13125
        if self.re.search(tag):
13126
          results.append((path, tag))
13127
    return results
13128

    
13129

    
13130
class LUTagsSet(TagsLU):
13131
  """Sets a tag on a given object.
13132

13133
  """
13134
  REQ_BGL = False
13135

    
13136
  def CheckPrereq(self):
13137
    """Check prerequisites.
13138

13139
    This checks the type and length of the tag name and value.
13140

13141
    """
13142
    TagsLU.CheckPrereq(self)
13143
    for tag in self.op.tags:
13144
      objects.TaggableObject.ValidateTag(tag)
13145

    
13146
  def Exec(self, feedback_fn):
13147
    """Sets the tag.
13148

13149
    """
13150
    try:
13151
      for tag in self.op.tags:
13152
        self.target.AddTag(tag)
13153
    except errors.TagError, err:
13154
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13155
    self.cfg.Update(self.target, feedback_fn)
13156

    
13157

    
13158
class LUTagsDel(TagsLU):
13159
  """Delete a list of tags from a given object.
13160

13161
  """
13162
  REQ_BGL = False
13163

    
13164
  def CheckPrereq(self):
13165
    """Check prerequisites.
13166

13167
    This checks that we have the given tag.
13168

13169
    """
13170
    TagsLU.CheckPrereq(self)
13171
    for tag in self.op.tags:
13172
      objects.TaggableObject.ValidateTag(tag)
13173
    del_tags = frozenset(self.op.tags)
13174
    cur_tags = self.target.GetTags()
13175

    
13176
    diff_tags = del_tags - cur_tags
13177
    if diff_tags:
13178
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13179
      raise errors.OpPrereqError("Tag(s) %s not found" %
13180
                                 (utils.CommaJoin(diff_names), ),
13181
                                 errors.ECODE_NOENT)
13182

    
13183
  def Exec(self, feedback_fn):
13184
    """Remove the tag from the object.
13185

13186
    """
13187
    for tag in self.op.tags:
13188
      self.target.RemoveTag(tag)
13189
    self.cfg.Update(self.target, feedback_fn)
13190

    
13191

    
13192
class LUTestDelay(NoHooksLU):
13193
  """Sleep for a specified amount of time.
13194

13195
  This LU sleeps on the master and/or nodes for a specified amount of
13196
  time.
13197

13198
  """
13199
  REQ_BGL = False
13200

    
13201
  def ExpandNames(self):
13202
    """Expand names and set required locks.
13203

13204
    This expands the node list, if any.
13205

13206
    """
13207
    self.needed_locks = {}
13208
    if self.op.on_nodes:
13209
      # _GetWantedNodes can be used here, but is not always appropriate to use
13210
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13211
      # more information.
13212
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13213
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13214

    
13215
  def _TestDelay(self):
13216
    """Do the actual sleep.
13217

13218
    """
13219
    if self.op.on_master:
13220
      if not utils.TestDelay(self.op.duration):
13221
        raise errors.OpExecError("Error during master delay test")
13222
    if self.op.on_nodes:
13223
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13224
      for node, node_result in result.items():
13225
        node_result.Raise("Failure during rpc call to node %s" % node)
13226

    
13227
  def Exec(self, feedback_fn):
13228
    """Execute the test delay opcode, with the wanted repetitions.
13229

13230
    """
13231
    if self.op.repeat == 0:
13232
      self._TestDelay()
13233
    else:
13234
      top_value = self.op.repeat - 1
13235
      for i in range(self.op.repeat):
13236
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13237
        self._TestDelay()
13238

    
13239

    
13240
class LUTestJqueue(NoHooksLU):
13241
  """Utility LU to test some aspects of the job queue.
13242

13243
  """
13244
  REQ_BGL = False
13245

    
13246
  # Must be lower than default timeout for WaitForJobChange to see whether it
13247
  # notices changed jobs
13248
  _CLIENT_CONNECT_TIMEOUT = 20.0
13249
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13250

    
13251
  @classmethod
13252
  def _NotifyUsingSocket(cls, cb, errcls):
13253
    """Opens a Unix socket and waits for another program to connect.
13254

13255
    @type cb: callable
13256
    @param cb: Callback to send socket name to client
13257
    @type errcls: class
13258
    @param errcls: Exception class to use for errors
13259

13260
    """
13261
    # Using a temporary directory as there's no easy way to create temporary
13262
    # sockets without writing a custom loop around tempfile.mktemp and
13263
    # socket.bind
13264
    tmpdir = tempfile.mkdtemp()
13265
    try:
13266
      tmpsock = utils.PathJoin(tmpdir, "sock")
13267

    
13268
      logging.debug("Creating temporary socket at %s", tmpsock)
13269
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13270
      try:
13271
        sock.bind(tmpsock)
13272
        sock.listen(1)
13273

    
13274
        # Send details to client
13275
        cb(tmpsock)
13276

    
13277
        # Wait for client to connect before continuing
13278
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13279
        try:
13280
          (conn, _) = sock.accept()
13281
        except socket.error, err:
13282
          raise errcls("Client didn't connect in time (%s)" % err)
13283
      finally:
13284
        sock.close()
13285
    finally:
13286
      # Remove as soon as client is connected
13287
      shutil.rmtree(tmpdir)
13288

    
13289
    # Wait for client to close
13290
    try:
13291
      try:
13292
        # pylint: disable=E1101
13293
        # Instance of '_socketobject' has no ... member
13294
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13295
        conn.recv(1)
13296
      except socket.error, err:
13297
        raise errcls("Client failed to confirm notification (%s)" % err)
13298
    finally:
13299
      conn.close()
13300

    
13301
  def _SendNotification(self, test, arg, sockname):
13302
    """Sends a notification to the client.
13303

13304
    @type test: string
13305
    @param test: Test name
13306
    @param arg: Test argument (depends on test)
13307
    @type sockname: string
13308
    @param sockname: Socket path
13309

13310
    """
13311
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13312

    
13313
  def _Notify(self, prereq, test, arg):
13314
    """Notifies the client of a test.
13315

13316
    @type prereq: bool
13317
    @param prereq: Whether this is a prereq-phase test
13318
    @type test: string
13319
    @param test: Test name
13320
    @param arg: Test argument (depends on test)
13321

13322
    """
13323
    if prereq:
13324
      errcls = errors.OpPrereqError
13325
    else:
13326
      errcls = errors.OpExecError
13327

    
13328
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13329
                                                  test, arg),
13330
                                   errcls)
13331

    
13332
  def CheckArguments(self):
13333
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13334
    self.expandnames_calls = 0
13335

    
13336
  def ExpandNames(self):
13337
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13338
    if checkargs_calls < 1:
13339
      raise errors.ProgrammerError("CheckArguments was not called")
13340

    
13341
    self.expandnames_calls += 1
13342

    
13343
    if self.op.notify_waitlock:
13344
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13345

    
13346
    self.LogInfo("Expanding names")
13347

    
13348
    # Get lock on master node (just to get a lock, not for a particular reason)
13349
    self.needed_locks = {
13350
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13351
      }
13352

    
13353
  def Exec(self, feedback_fn):
13354
    if self.expandnames_calls < 1:
13355
      raise errors.ProgrammerError("ExpandNames was not called")
13356

    
13357
    if self.op.notify_exec:
13358
      self._Notify(False, constants.JQT_EXEC, None)
13359

    
13360
    self.LogInfo("Executing")
13361

    
13362
    if self.op.log_messages:
13363
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13364
      for idx, msg in enumerate(self.op.log_messages):
13365
        self.LogInfo("Sending log message %s", idx + 1)
13366
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13367
        # Report how many test messages have been sent
13368
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13369

    
13370
    if self.op.fail:
13371
      raise errors.OpExecError("Opcode failure was requested")
13372

    
13373
    return True
13374

    
13375

    
13376
class IAllocator(object):
13377
  """IAllocator framework.
13378

13379
  An IAllocator instance has three sets of attributes:
13380
    - cfg that is needed to query the cluster
13381
    - input data (all members of the _KEYS class attribute are required)
13382
    - four buffer attributes (in|out_data|text), that represent the
13383
      input (to the external script) in text and data structure format,
13384
      and the output from it, again in two formats
13385
    - the result variables from the script (success, info, nodes) for
13386
      easy usage
13387

13388
  """
13389
  # pylint: disable=R0902
13390
  # lots of instance attributes
13391

    
13392
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13393
    self.cfg = cfg
13394
    self.rpc = rpc_runner
13395
    # init buffer variables
13396
    self.in_text = self.out_text = self.in_data = self.out_data = None
13397
    # init all input fields so that pylint is happy
13398
    self.mode = mode
13399
    self.memory = self.disks = self.disk_template = None
13400
    self.os = self.tags = self.nics = self.vcpus = None
13401
    self.hypervisor = None
13402
    self.relocate_from = None
13403
    self.name = None
13404
    self.instances = None
13405
    self.evac_mode = None
13406
    self.target_groups = []
13407
    # computed fields
13408
    self.required_nodes = None
13409
    # init result fields
13410
    self.success = self.info = self.result = None
13411

    
13412
    try:
13413
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13414
    except KeyError:
13415
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13416
                                   " IAllocator" % self.mode)
13417

    
13418
    keyset = [n for (n, _) in keydata]
13419

    
13420
    for key in kwargs:
13421
      if key not in keyset:
13422
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13423
                                     " IAllocator" % key)
13424
      setattr(self, key, kwargs[key])
13425

    
13426
    for key in keyset:
13427
      if key not in kwargs:
13428
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13429
                                     " IAllocator" % key)
13430
    self._BuildInputData(compat.partial(fn, self), keydata)
13431

    
13432
  def _ComputeClusterData(self):
13433
    """Compute the generic allocator input data.
13434

13435
    This is the data that is independent of the actual operation.
13436

13437
    """
13438
    cfg = self.cfg
13439
    cluster_info = cfg.GetClusterInfo()
13440
    # cluster data
13441
    data = {
13442
      "version": constants.IALLOCATOR_VERSION,
13443
      "cluster_name": cfg.GetClusterName(),
13444
      "cluster_tags": list(cluster_info.GetTags()),
13445
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13446
      # we don't have job IDs
13447
      }
13448
    ninfo = cfg.GetAllNodesInfo()
13449
    iinfo = cfg.GetAllInstancesInfo().values()
13450
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13451

    
13452
    # node data
13453
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13454

    
13455
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13456
      hypervisor_name = self.hypervisor
13457
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13458
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13459
    else:
13460
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13461

    
13462
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13463
                                        hypervisor_name)
13464
    node_iinfo = \
13465
      self.rpc.call_all_instances_info(node_list,
13466
                                       cluster_info.enabled_hypervisors)
13467

    
13468
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13469

    
13470
    config_ndata = self._ComputeBasicNodeData(ninfo)
13471
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13472
                                                 i_list, config_ndata)
13473
    assert len(data["nodes"]) == len(ninfo), \
13474
        "Incomplete node data computed"
13475

    
13476
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13477

    
13478
    self.in_data = data
13479

    
13480
  @staticmethod
13481
  def _ComputeNodeGroupData(cfg):
13482
    """Compute node groups data.
13483

13484
    """
13485
    ng = dict((guuid, {
13486
      "name": gdata.name,
13487
      "alloc_policy": gdata.alloc_policy,
13488
      })
13489
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13490

    
13491
    return ng
13492

    
13493
  @staticmethod
13494
  def _ComputeBasicNodeData(node_cfg):
13495
    """Compute global node data.
13496

13497
    @rtype: dict
13498
    @returns: a dict of name: (node dict, node config)
13499

13500
    """
13501
    # fill in static (config-based) values
13502
    node_results = dict((ninfo.name, {
13503
      "tags": list(ninfo.GetTags()),
13504
      "primary_ip": ninfo.primary_ip,
13505
      "secondary_ip": ninfo.secondary_ip,
13506
      "offline": ninfo.offline,
13507
      "drained": ninfo.drained,
13508
      "master_candidate": ninfo.master_candidate,
13509
      "group": ninfo.group,
13510
      "master_capable": ninfo.master_capable,
13511
      "vm_capable": ninfo.vm_capable,
13512
      })
13513
      for ninfo in node_cfg.values())
13514

    
13515
    return node_results
13516

    
13517
  @staticmethod
13518
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13519
                              node_results):
13520
    """Compute global node data.
13521

13522
    @param node_results: the basic node structures as filled from the config
13523

13524
    """
13525
    #TODO(dynmem): compute the right data on MAX and MIN memory
13526
    # make a copy of the current dict
13527
    node_results = dict(node_results)
13528
    for nname, nresult in node_data.items():
13529
      assert nname in node_results, "Missing basic data for node %s" % nname
13530
      ninfo = node_cfg[nname]
13531

    
13532
      if not (ninfo.offline or ninfo.drained):
13533
        nresult.Raise("Can't get data for node %s" % nname)
13534
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13535
                                nname)
13536
        remote_info = nresult.payload
13537

    
13538
        for attr in ["memory_total", "memory_free", "memory_dom0",
13539
                     "vg_size", "vg_free", "cpu_total"]:
13540
          if attr not in remote_info:
13541
            raise errors.OpExecError("Node '%s' didn't return attribute"
13542
                                     " '%s'" % (nname, attr))
13543
          if not isinstance(remote_info[attr], int):
13544
            raise errors.OpExecError("Node '%s' returned invalid value"
13545
                                     " for '%s': %s" %
13546
                                     (nname, attr, remote_info[attr]))
13547
        # compute memory used by primary instances
13548
        i_p_mem = i_p_up_mem = 0
13549
        for iinfo, beinfo in i_list:
13550
          if iinfo.primary_node == nname:
13551
            i_p_mem += beinfo[constants.BE_MAXMEM]
13552
            if iinfo.name not in node_iinfo[nname].payload:
13553
              i_used_mem = 0
13554
            else:
13555
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13556
            i_mem_diff = beinfo[constants.BE_MAXMEM] - i_used_mem
13557
            remote_info["memory_free"] -= max(0, i_mem_diff)
13558

    
13559
            if iinfo.admin_state == constants.ADMINST_UP:
13560
              i_p_up_mem += beinfo[constants.BE_MAXMEM]
13561

    
13562
        # compute memory used by instances
13563
        pnr_dyn = {
13564
          "total_memory": remote_info["memory_total"],
13565
          "reserved_memory": remote_info["memory_dom0"],
13566
          "free_memory": remote_info["memory_free"],
13567
          "total_disk": remote_info["vg_size"],
13568
          "free_disk": remote_info["vg_free"],
13569
          "total_cpus": remote_info["cpu_total"],
13570
          "i_pri_memory": i_p_mem,
13571
          "i_pri_up_memory": i_p_up_mem,
13572
          }
13573
        pnr_dyn.update(node_results[nname])
13574
        node_results[nname] = pnr_dyn
13575

    
13576
    return node_results
13577

    
13578
  @staticmethod
13579
  def _ComputeInstanceData(cluster_info, i_list):
13580
    """Compute global instance data.
13581

13582
    """
13583
    instance_data = {}
13584
    for iinfo, beinfo in i_list:
13585
      nic_data = []
13586
      for nic in iinfo.nics:
13587
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13588
        nic_dict = {
13589
          "mac": nic.mac,
13590
          "ip": nic.ip,
13591
          "mode": filled_params[constants.NIC_MODE],
13592
          "link": filled_params[constants.NIC_LINK],
13593
          }
13594
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13595
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13596
        nic_data.append(nic_dict)
13597
      pir = {
13598
        "tags": list(iinfo.GetTags()),
13599
        "admin_state": iinfo.admin_state,
13600
        "vcpus": beinfo[constants.BE_VCPUS],
13601
        "memory": beinfo[constants.BE_MAXMEM],
13602
        "os": iinfo.os,
13603
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13604
        "nics": nic_data,
13605
        "disks": [{constants.IDISK_SIZE: dsk.size,
13606
                   constants.IDISK_MODE: dsk.mode}
13607
                  for dsk in iinfo.disks],
13608
        "disk_template": iinfo.disk_template,
13609
        "hypervisor": iinfo.hypervisor,
13610
        }
13611
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13612
                                                 pir["disks"])
13613
      instance_data[iinfo.name] = pir
13614

    
13615
    return instance_data
13616

    
13617
  def _AddNewInstance(self):
13618
    """Add new instance data to allocator structure.
13619

13620
    This in combination with _AllocatorGetClusterData will create the
13621
    correct structure needed as input for the allocator.
13622

13623
    The checks for the completeness of the opcode must have already been
13624
    done.
13625

13626
    """
13627
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13628

    
13629
    if self.disk_template in constants.DTS_INT_MIRROR:
13630
      self.required_nodes = 2
13631
    else:
13632
      self.required_nodes = 1
13633

    
13634
    request = {
13635
      "name": self.name,
13636
      "disk_template": self.disk_template,
13637
      "tags": self.tags,
13638
      "os": self.os,
13639
      "vcpus": self.vcpus,
13640
      "memory": self.memory,
13641
      "disks": self.disks,
13642
      "disk_space_total": disk_space,
13643
      "nics": self.nics,
13644
      "required_nodes": self.required_nodes,
13645
      "hypervisor": self.hypervisor,
13646
      }
13647

    
13648
    return request
13649

    
13650
  def _AddRelocateInstance(self):
13651
    """Add relocate instance data to allocator structure.
13652

13653
    This in combination with _IAllocatorGetClusterData will create the
13654
    correct structure needed as input for the allocator.
13655

13656
    The checks for the completeness of the opcode must have already been
13657
    done.
13658

13659
    """
13660
    instance = self.cfg.GetInstanceInfo(self.name)
13661
    if instance is None:
13662
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13663
                                   " IAllocator" % self.name)
13664

    
13665
    if instance.disk_template not in constants.DTS_MIRRORED:
13666
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13667
                                 errors.ECODE_INVAL)
13668

    
13669
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13670
        len(instance.secondary_nodes) != 1:
13671
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13672
                                 errors.ECODE_STATE)
13673

    
13674
    self.required_nodes = 1
13675
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13676
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13677

    
13678
    request = {
13679
      "name": self.name,
13680
      "disk_space_total": disk_space,
13681
      "required_nodes": self.required_nodes,
13682
      "relocate_from": self.relocate_from,
13683
      }
13684
    return request
13685

    
13686
  def _AddNodeEvacuate(self):
13687
    """Get data for node-evacuate requests.
13688

13689
    """
13690
    return {
13691
      "instances": self.instances,
13692
      "evac_mode": self.evac_mode,
13693
      }
13694

    
13695
  def _AddChangeGroup(self):
13696
    """Get data for node-evacuate requests.
13697

13698
    """
13699
    return {
13700
      "instances": self.instances,
13701
      "target_groups": self.target_groups,
13702
      }
13703

    
13704
  def _BuildInputData(self, fn, keydata):
13705
    """Build input data structures.
13706

13707
    """
13708
    self._ComputeClusterData()
13709

    
13710
    request = fn()
13711
    request["type"] = self.mode
13712
    for keyname, keytype in keydata:
13713
      if keyname not in request:
13714
        raise errors.ProgrammerError("Request parameter %s is missing" %
13715
                                     keyname)
13716
      val = request[keyname]
13717
      if not keytype(val):
13718
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13719
                                     " validation, value %s, expected"
13720
                                     " type %s" % (keyname, val, keytype))
13721
    self.in_data["request"] = request
13722

    
13723
    self.in_text = serializer.Dump(self.in_data)
13724

    
13725
  _STRING_LIST = ht.TListOf(ht.TString)
13726
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13727
     # pylint: disable=E1101
13728
     # Class '...' has no 'OP_ID' member
13729
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13730
                          opcodes.OpInstanceMigrate.OP_ID,
13731
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13732
     })))
13733

    
13734
  _NEVAC_MOVED = \
13735
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13736
                       ht.TItems([ht.TNonEmptyString,
13737
                                  ht.TNonEmptyString,
13738
                                  ht.TListOf(ht.TNonEmptyString),
13739
                                 ])))
13740
  _NEVAC_FAILED = \
13741
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13742
                       ht.TItems([ht.TNonEmptyString,
13743
                                  ht.TMaybeString,
13744
                                 ])))
13745
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13746
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13747

    
13748
  _MODE_DATA = {
13749
    constants.IALLOCATOR_MODE_ALLOC:
13750
      (_AddNewInstance,
13751
       [
13752
        ("name", ht.TString),
13753
        ("memory", ht.TInt),
13754
        ("disks", ht.TListOf(ht.TDict)),
13755
        ("disk_template", ht.TString),
13756
        ("os", ht.TString),
13757
        ("tags", _STRING_LIST),
13758
        ("nics", ht.TListOf(ht.TDict)),
13759
        ("vcpus", ht.TInt),
13760
        ("hypervisor", ht.TString),
13761
        ], ht.TList),
13762
    constants.IALLOCATOR_MODE_RELOC:
13763
      (_AddRelocateInstance,
13764
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13765
       ht.TList),
13766
     constants.IALLOCATOR_MODE_NODE_EVAC:
13767
      (_AddNodeEvacuate, [
13768
        ("instances", _STRING_LIST),
13769
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13770
        ], _NEVAC_RESULT),
13771
     constants.IALLOCATOR_MODE_CHG_GROUP:
13772
      (_AddChangeGroup, [
13773
        ("instances", _STRING_LIST),
13774
        ("target_groups", _STRING_LIST),
13775
        ], _NEVAC_RESULT),
13776
    }
13777

    
13778
  def Run(self, name, validate=True, call_fn=None):
13779
    """Run an instance allocator and return the results.
13780

13781
    """
13782
    if call_fn is None:
13783
      call_fn = self.rpc.call_iallocator_runner
13784

    
13785
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13786
    result.Raise("Failure while running the iallocator script")
13787

    
13788
    self.out_text = result.payload
13789
    if validate:
13790
      self._ValidateResult()
13791

    
13792
  def _ValidateResult(self):
13793
    """Process the allocator results.
13794

13795
    This will process and if successful save the result in
13796
    self.out_data and the other parameters.
13797

13798
    """
13799
    try:
13800
      rdict = serializer.Load(self.out_text)
13801
    except Exception, err:
13802
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13803

    
13804
    if not isinstance(rdict, dict):
13805
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13806

    
13807
    # TODO: remove backwards compatiblity in later versions
13808
    if "nodes" in rdict and "result" not in rdict:
13809
      rdict["result"] = rdict["nodes"]
13810
      del rdict["nodes"]
13811

    
13812
    for key in "success", "info", "result":
13813
      if key not in rdict:
13814
        raise errors.OpExecError("Can't parse iallocator results:"
13815
                                 " missing key '%s'" % key)
13816
      setattr(self, key, rdict[key])
13817

    
13818
    if not self._result_check(self.result):
13819
      raise errors.OpExecError("Iallocator returned invalid result,"
13820
                               " expected %s, got %s" %
13821
                               (self._result_check, self.result),
13822
                               errors.ECODE_INVAL)
13823

    
13824
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13825
      assert self.relocate_from is not None
13826
      assert self.required_nodes == 1
13827

    
13828
      node2group = dict((name, ndata["group"])
13829
                        for (name, ndata) in self.in_data["nodes"].items())
13830

    
13831
      fn = compat.partial(self._NodesToGroups, node2group,
13832
                          self.in_data["nodegroups"])
13833

    
13834
      instance = self.cfg.GetInstanceInfo(self.name)
13835
      request_groups = fn(self.relocate_from + [instance.primary_node])
13836
      result_groups = fn(rdict["result"] + [instance.primary_node])
13837

    
13838
      if self.success and not set(result_groups).issubset(request_groups):
13839
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13840
                                 " differ from original groups (%s)" %
13841
                                 (utils.CommaJoin(result_groups),
13842
                                  utils.CommaJoin(request_groups)))
13843

    
13844
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13845
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13846

    
13847
    self.out_data = rdict
13848

    
13849
  @staticmethod
13850
  def _NodesToGroups(node2group, groups, nodes):
13851
    """Returns a list of unique group names for a list of nodes.
13852

13853
    @type node2group: dict
13854
    @param node2group: Map from node name to group UUID
13855
    @type groups: dict
13856
    @param groups: Group information
13857
    @type nodes: list
13858
    @param nodes: Node names
13859

13860
    """
13861
    result = set()
13862

    
13863
    for node in nodes:
13864
      try:
13865
        group_uuid = node2group[node]
13866
      except KeyError:
13867
        # Ignore unknown node
13868
        pass
13869
      else:
13870
        try:
13871
          group = groups[group_uuid]
13872
        except KeyError:
13873
          # Can't find group, let's use UUID
13874
          group_name = group_uuid
13875
        else:
13876
          group_name = group["name"]
13877

    
13878
        result.add(group_name)
13879

    
13880
    return sorted(result)
13881

    
13882

    
13883
class LUTestAllocator(NoHooksLU):
13884
  """Run allocator tests.
13885

13886
  This LU runs the allocator tests
13887

13888
  """
13889
  def CheckPrereq(self):
13890
    """Check prerequisites.
13891

13892
    This checks the opcode parameters depending on the director and mode test.
13893

13894
    """
13895
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13896
      for attr in ["memory", "disks", "disk_template",
13897
                   "os", "tags", "nics", "vcpus"]:
13898
        if not hasattr(self.op, attr):
13899
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13900
                                     attr, errors.ECODE_INVAL)
13901
      iname = self.cfg.ExpandInstanceName(self.op.name)
13902
      if iname is not None:
13903
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13904
                                   iname, errors.ECODE_EXISTS)
13905
      if not isinstance(self.op.nics, list):
13906
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13907
                                   errors.ECODE_INVAL)
13908
      if not isinstance(self.op.disks, list):
13909
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13910
                                   errors.ECODE_INVAL)
13911
      for row in self.op.disks:
13912
        if (not isinstance(row, dict) or
13913
            constants.IDISK_SIZE not in row or
13914
            not isinstance(row[constants.IDISK_SIZE], int) or
13915
            constants.IDISK_MODE not in row or
13916
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13917
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13918
                                     " parameter", errors.ECODE_INVAL)
13919
      if self.op.hypervisor is None:
13920
        self.op.hypervisor = self.cfg.GetHypervisorType()
13921
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13922
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13923
      self.op.name = fname
13924
      self.relocate_from = \
13925
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13926
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13927
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13928
      if not self.op.instances:
13929
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13930
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13931
    else:
13932
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13933
                                 self.op.mode, errors.ECODE_INVAL)
13934

    
13935
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13936
      if self.op.allocator is None:
13937
        raise errors.OpPrereqError("Missing allocator name",
13938
                                   errors.ECODE_INVAL)
13939
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13940
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13941
                                 self.op.direction, errors.ECODE_INVAL)
13942

    
13943
  def Exec(self, feedback_fn):
13944
    """Run the allocator test.
13945

13946
    """
13947
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13948
      ial = IAllocator(self.cfg, self.rpc,
13949
                       mode=self.op.mode,
13950
                       name=self.op.name,
13951
                       memory=self.op.memory,
13952
                       disks=self.op.disks,
13953
                       disk_template=self.op.disk_template,
13954
                       os=self.op.os,
13955
                       tags=self.op.tags,
13956
                       nics=self.op.nics,
13957
                       vcpus=self.op.vcpus,
13958
                       hypervisor=self.op.hypervisor,
13959
                       )
13960
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13961
      ial = IAllocator(self.cfg, self.rpc,
13962
                       mode=self.op.mode,
13963
                       name=self.op.name,
13964
                       relocate_from=list(self.relocate_from),
13965
                       )
13966
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13967
      ial = IAllocator(self.cfg, self.rpc,
13968
                       mode=self.op.mode,
13969
                       instances=self.op.instances,
13970
                       target_groups=self.op.target_groups)
13971
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13972
      ial = IAllocator(self.cfg, self.rpc,
13973
                       mode=self.op.mode,
13974
                       instances=self.op.instances,
13975
                       evac_mode=self.op.evac_mode)
13976
    else:
13977
      raise errors.ProgrammerError("Uncatched mode %s in"
13978
                                   " LUTestAllocator.Exec", self.op.mode)
13979

    
13980
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13981
      result = ial.in_text
13982
    else:
13983
      ial.Run(self.op.allocator, validate=False)
13984
      result = ial.out_text
13985
    return result
13986

    
13987

    
13988
#: Query type implementations
13989
_QUERY_IMPL = {
13990
  constants.QR_INSTANCE: _InstanceQuery,
13991
  constants.QR_NODE: _NodeQuery,
13992
  constants.QR_GROUP: _GroupQuery,
13993
  constants.QR_OS: _OsQuery,
13994
  }
13995

    
13996
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13997

    
13998

    
13999
def _GetQueryImplementation(name):
14000
  """Returns the implemtnation for a query type.
14001

14002
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
14003

14004
  """
14005
  try:
14006
    return _QUERY_IMPL[name]
14007
  except KeyError:
14008
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
14009
                               errors.ECODE_INVAL)