Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 63742d78

History | View | Annotate | Download (489.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay too many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62
from ganeti import rpc
63

    
64
import ganeti.masterd.instance # pylint: disable=W0611
65

    
66

    
67
#: Size of DRBD meta block device
68
DRBD_META_SIZE = 128
69

    
70

    
71
class ResultWithJobs:
72
  """Data container for LU results with jobs.
73

74
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
75
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
76
  contained in the C{jobs} attribute and include the job IDs in the opcode
77
  result.
78

79
  """
80
  def __init__(self, jobs, **kwargs):
81
    """Initializes this class.
82

83
    Additional return values can be specified as keyword arguments.
84

85
    @type jobs: list of lists of L{opcode.OpCode}
86
    @param jobs: A list of lists of opcode objects
87

88
    """
89
    self.jobs = jobs
90
    self.other = kwargs
91

    
92

    
93
class LogicalUnit(object):
94
  """Logical Unit base class.
95

96
  Subclasses must follow these rules:
97
    - implement ExpandNames
98
    - implement CheckPrereq (except when tasklets are used)
99
    - implement Exec (except when tasklets are used)
100
    - implement BuildHooksEnv
101
    - implement BuildHooksNodes
102
    - redefine HPATH and HTYPE
103
    - optionally redefine their run requirements:
104
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
105

106
  Note that all commands require root permissions.
107

108
  @ivar dry_run_result: the value (if any) that will be returned to the caller
109
      in dry-run mode (signalled by opcode dry_run parameter)
110

111
  """
112
  HPATH = None
113
  HTYPE = None
114
  REQ_BGL = True
115

    
116
  def __init__(self, processor, op, context, rpc_runner):
117
    """Constructor for LogicalUnit.
118

119
    This needs to be overridden in derived classes in order to check op
120
    validity.
121

122
    """
123
    self.proc = processor
124
    self.op = op
125
    self.cfg = context.cfg
126
    self.glm = context.glm
127
    # readability alias
128
    self.owned_locks = context.glm.list_owned
129
    self.context = context
130
    self.rpc = rpc_runner
131
    # Dicts used to declare locking needs to mcpu
132
    self.needed_locks = None
133
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
134
    self.add_locks = {}
135
    self.remove_locks = {}
136
    # Used to force good behavior when calling helper functions
137
    self.recalculate_locks = {}
138
    # logging
139
    self.Log = processor.Log # pylint: disable=C0103
140
    self.LogWarning = processor.LogWarning # pylint: disable=C0103
141
    self.LogInfo = processor.LogInfo # pylint: disable=C0103
142
    self.LogStep = processor.LogStep # pylint: disable=C0103
143
    # support for dry-run
144
    self.dry_run_result = None
145
    # support for generic debug attribute
146
    if (not hasattr(self.op, "debug_level") or
147
        not isinstance(self.op.debug_level, int)):
148
      self.op.debug_level = 0
149

    
150
    # Tasklets
151
    self.tasklets = None
152

    
153
    # Validate opcode parameters and set defaults
154
    self.op.Validate(True)
155

    
156
    self.CheckArguments()
157

    
158
  def CheckArguments(self):
159
    """Check syntactic validity for the opcode arguments.
160

161
    This method is for doing a simple syntactic check and ensure
162
    validity of opcode parameters, without any cluster-related
163
    checks. While the same can be accomplished in ExpandNames and/or
164
    CheckPrereq, doing these separate is better because:
165

166
      - ExpandNames is left as as purely a lock-related function
167
      - CheckPrereq is run after we have acquired locks (and possible
168
        waited for them)
169

170
    The function is allowed to change the self.op attribute so that
171
    later methods can no longer worry about missing parameters.
172

173
    """
174
    pass
175

    
176
  def ExpandNames(self):
177
    """Expand names for this LU.
178

179
    This method is called before starting to execute the opcode, and it should
180
    update all the parameters of the opcode to their canonical form (e.g. a
181
    short node name must be fully expanded after this method has successfully
182
    completed). This way locking, hooks, logging, etc. can work correctly.
183

184
    LUs which implement this method must also populate the self.needed_locks
185
    member, as a dict with lock levels as keys, and a list of needed lock names
186
    as values. Rules:
187

188
      - use an empty dict if you don't need any lock
189
      - if you don't need any lock at a particular level omit that level
190
      - don't put anything for the BGL level
191
      - if you want all locks at a level use locking.ALL_SET as a value
192

193
    If you need to share locks (rather than acquire them exclusively) at one
194
    level you can modify self.share_locks, setting a true value (usually 1) for
195
    that level. By default locks are not shared.
196

197
    This function can also define a list of tasklets, which then will be
198
    executed in order instead of the usual LU-level CheckPrereq and Exec
199
    functions, if those are not defined by the LU.
200

201
    Examples::
202

203
      # Acquire all nodes and one instance
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: locking.ALL_SET,
206
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
207
      }
208
      # Acquire just two nodes
209
      self.needed_locks = {
210
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
211
      }
212
      # Acquire no locks
213
      self.needed_locks = {} # No, you can't leave it to the default value None
214

215
    """
216
    # The implementation of this method is mandatory only if the new LU is
217
    # concurrent, so that old LUs don't need to be changed all at the same
218
    # time.
219
    if self.REQ_BGL:
220
      self.needed_locks = {} # Exclusive LUs don't need locks.
221
    else:
222
      raise NotImplementedError
223

    
224
  def DeclareLocks(self, level):
225
    """Declare LU locking needs for a level
226

227
    While most LUs can just declare their locking needs at ExpandNames time,
228
    sometimes there's the need to calculate some locks after having acquired
229
    the ones before. This function is called just before acquiring locks at a
230
    particular level, but after acquiring the ones at lower levels, and permits
231
    such calculations. It can be used to modify self.needed_locks, and by
232
    default it does nothing.
233

234
    This function is only called if you have something already set in
235
    self.needed_locks for the level.
236

237
    @param level: Locking level which is going to be locked
238
    @type level: member of ganeti.locking.LEVELS
239

240
    """
241

    
242
  def CheckPrereq(self):
243
    """Check prerequisites for this LU.
244

245
    This method should check that the prerequisites for the execution
246
    of this LU are fulfilled. It can do internode communication, but
247
    it should be idempotent - no cluster or system changes are
248
    allowed.
249

250
    The method should raise errors.OpPrereqError in case something is
251
    not fulfilled. Its return value is ignored.
252

253
    This method should also update all the parameters of the opcode to
254
    their canonical form if it hasn't been done by ExpandNames before.
255

256
    """
257
    if self.tasklets is not None:
258
      for (idx, tl) in enumerate(self.tasklets):
259
        logging.debug("Checking prerequisites for tasklet %s/%s",
260
                      idx + 1, len(self.tasklets))
261
        tl.CheckPrereq()
262
    else:
263
      pass
264

    
265
  def Exec(self, feedback_fn):
266
    """Execute the LU.
267

268
    This method should implement the actual work. It should raise
269
    errors.OpExecError for failures that are somewhat dealt with in
270
    code, or expected.
271

272
    """
273
    if self.tasklets is not None:
274
      for (idx, tl) in enumerate(self.tasklets):
275
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
276
        tl.Exec(feedback_fn)
277
    else:
278
      raise NotImplementedError
279

    
280
  def BuildHooksEnv(self):
281
    """Build hooks environment for this LU.
282

283
    @rtype: dict
284
    @return: Dictionary containing the environment that will be used for
285
      running the hooks for this LU. The keys of the dict must not be prefixed
286
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
287
      will extend the environment with additional variables. If no environment
288
      should be defined, an empty dictionary should be returned (not C{None}).
289
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
290
      will not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def BuildHooksNodes(self):
296
    """Build list of nodes to run LU's hooks.
297

298
    @rtype: tuple; (list, list)
299
    @return: Tuple containing a list of node names on which the hook
300
      should run before the execution and a list of node names on which the
301
      hook should run after the execution. No nodes should be returned as an
302
      empty list (and not None).
303
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
304
      will not be called.
305

306
    """
307
    raise NotImplementedError
308

    
309
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
310
    """Notify the LU about the results of its hooks.
311

312
    This method is called every time a hooks phase is executed, and notifies
313
    the Logical Unit about the hooks' result. The LU can then use it to alter
314
    its result based on the hooks.  By default the method does nothing and the
315
    previous result is passed back unchanged but any LU can define it if it
316
    wants to use the local cluster hook-scripts somehow.
317

318
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
319
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
320
    @param hook_results: the results of the multi-node hooks rpc call
321
    @param feedback_fn: function used send feedback back to the caller
322
    @param lu_result: the previous Exec result this LU had, or None
323
        in the PRE phase
324
    @return: the new Exec result, based on the previous result
325
        and hook results
326

327
    """
328
    # API must be kept, thus we ignore the unused argument and could
329
    # be a function warnings
330
    # pylint: disable=W0613,R0201
331
    return lu_result
332

    
333
  def _ExpandAndLockInstance(self):
334
    """Helper function to expand and lock an instance.
335

336
    Many LUs that work on an instance take its name in self.op.instance_name
337
    and need to expand it and then declare the expanded name for locking. This
338
    function does it, and then updates self.op.instance_name to the expanded
339
    name. It also initializes needed_locks as a dict, if this hasn't been done
340
    before.
341

342
    """
343
    if self.needed_locks is None:
344
      self.needed_locks = {}
345
    else:
346
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
347
        "_ExpandAndLockInstance called with instance-level locks set"
348
    self.op.instance_name = _ExpandInstanceName(self.cfg,
349
                                                self.op.instance_name)
350
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
351

    
352
  def _LockInstancesNodes(self, primary_only=False,
353
                          level=locking.LEVEL_NODE):
354
    """Helper function to declare instances' nodes for locking.
355

356
    This function should be called after locking one or more instances to lock
357
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
358
    with all primary or secondary nodes for instances already locked and
359
    present in self.needed_locks[locking.LEVEL_INSTANCE].
360

361
    It should be called from DeclareLocks, and for safety only works if
362
    self.recalculate_locks[locking.LEVEL_NODE] is set.
363

364
    In the future it may grow parameters to just lock some instance's nodes, or
365
    to just lock primaries or secondary nodes, if needed.
366

367
    If should be called in DeclareLocks in a way similar to::
368

369
      if level == locking.LEVEL_NODE:
370
        self._LockInstancesNodes()
371

372
    @type primary_only: boolean
373
    @param primary_only: only lock primary nodes of locked instances
374
    @param level: Which lock level to use for locking nodes
375

376
    """
377
    assert level in self.recalculate_locks, \
378
      "_LockInstancesNodes helper function called with no nodes to recalculate"
379

    
380
    # TODO: check if we're really been called with the instance locks held
381

    
382
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
383
    # future we might want to have different behaviors depending on the value
384
    # of self.recalculate_locks[locking.LEVEL_NODE]
385
    wanted_nodes = []
386
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
387
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
388
      wanted_nodes.append(instance.primary_node)
389
      if not primary_only:
390
        wanted_nodes.extend(instance.secondary_nodes)
391

    
392
    if self.recalculate_locks[level] == constants.LOCKS_REPLACE:
393
      self.needed_locks[level] = wanted_nodes
394
    elif self.recalculate_locks[level] == constants.LOCKS_APPEND:
395
      self.needed_locks[level].extend(wanted_nodes)
396
    else:
397
      raise errors.ProgrammerError("Unknown recalculation mode")
398

    
399
    del self.recalculate_locks[level]
400

    
401

    
402
class NoHooksLU(LogicalUnit): # pylint: disable=W0223
403
  """Simple LU which runs no hooks.
404

405
  This LU is intended as a parent for other LogicalUnits which will
406
  run no hooks, in order to reduce duplicate code.
407

408
  """
409
  HPATH = None
410
  HTYPE = None
411

    
412
  def BuildHooksEnv(self):
413
    """Empty BuildHooksEnv for NoHooksLu.
414

415
    This just raises an error.
416

417
    """
418
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
419

    
420
  def BuildHooksNodes(self):
421
    """Empty BuildHooksNodes for NoHooksLU.
422

423
    """
424
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
425

    
426

    
427
class Tasklet:
428
  """Tasklet base class.
429

430
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
431
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
432
  tasklets know nothing about locks.
433

434
  Subclasses must follow these rules:
435
    - Implement CheckPrereq
436
    - Implement Exec
437

438
  """
439
  def __init__(self, lu):
440
    self.lu = lu
441

    
442
    # Shortcuts
443
    self.cfg = lu.cfg
444
    self.rpc = lu.rpc
445

    
446
  def CheckPrereq(self):
447
    """Check prerequisites for this tasklets.
448

449
    This method should check whether the prerequisites for the execution of
450
    this tasklet are fulfilled. It can do internode communication, but it
451
    should be idempotent - no cluster or system changes are allowed.
452

453
    The method should raise errors.OpPrereqError in case something is not
454
    fulfilled. Its return value is ignored.
455

456
    This method should also update all parameters to their canonical form if it
457
    hasn't been done before.
458

459
    """
460
    pass
461

    
462
  def Exec(self, feedback_fn):
463
    """Execute the tasklet.
464

465
    This method should implement the actual work. It should raise
466
    errors.OpExecError for failures that are somewhat dealt with in code, or
467
    expected.
468

469
    """
470
    raise NotImplementedError
471

    
472

    
473
class _QueryBase:
474
  """Base for query utility classes.
475

476
  """
477
  #: Attribute holding field definitions
478
  FIELDS = None
479

    
480
  def __init__(self, qfilter, fields, use_locking):
481
    """Initializes this class.
482

483
    """
484
    self.use_locking = use_locking
485

    
486
    self.query = query.Query(self.FIELDS, fields, qfilter=qfilter,
487
                             namefield="name")
488
    self.requested_data = self.query.RequestedData()
489
    self.names = self.query.RequestedNames()
490

    
491
    # Sort only if no names were requested
492
    self.sort_by_name = not self.names
493

    
494
    self.do_locking = None
495
    self.wanted = None
496

    
497
  def _GetNames(self, lu, all_names, lock_level):
498
    """Helper function to determine names asked for in the query.
499

500
    """
501
    if self.do_locking:
502
      names = lu.owned_locks(lock_level)
503
    else:
504
      names = all_names
505

    
506
    if self.wanted == locking.ALL_SET:
507
      assert not self.names
508
      # caller didn't specify names, so ordering is not important
509
      return utils.NiceSort(names)
510

    
511
    # caller specified names and we must keep the same order
512
    assert self.names
513
    assert not self.do_locking or lu.glm.is_owned(lock_level)
514

    
515
    missing = set(self.wanted).difference(names)
516
    if missing:
517
      raise errors.OpExecError("Some items were removed before retrieving"
518
                               " their data: %s" % missing)
519

    
520
    # Return expanded names
521
    return self.wanted
522

    
523
  def ExpandNames(self, lu):
524
    """Expand names for this query.
525

526
    See L{LogicalUnit.ExpandNames}.
527

528
    """
529
    raise NotImplementedError()
530

    
531
  def DeclareLocks(self, lu, level):
532
    """Declare locks for this query.
533

534
    See L{LogicalUnit.DeclareLocks}.
535

536
    """
537
    raise NotImplementedError()
538

    
539
  def _GetQueryData(self, lu):
540
    """Collects all data for this query.
541

542
    @return: Query data object
543

544
    """
545
    raise NotImplementedError()
546

    
547
  def NewStyleQuery(self, lu):
548
    """Collect data and execute query.
549

550
    """
551
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
552
                                  sort_by_name=self.sort_by_name)
553

    
554
  def OldStyleQuery(self, lu):
555
    """Collect data and execute query.
556

557
    """
558
    return self.query.OldStyleQuery(self._GetQueryData(lu),
559
                                    sort_by_name=self.sort_by_name)
560

    
561

    
562
def _ShareAll():
563
  """Returns a dict declaring all lock levels shared.
564

565
  """
566
  return dict.fromkeys(locking.LEVELS, 1)
567

    
568

    
569
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
570
  """Checks if the owned node groups are still correct for an instance.
571

572
  @type cfg: L{config.ConfigWriter}
573
  @param cfg: The cluster configuration
574
  @type instance_name: string
575
  @param instance_name: Instance name
576
  @type owned_groups: set or frozenset
577
  @param owned_groups: List of currently owned node groups
578

579
  """
580
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
581

    
582
  if not owned_groups.issuperset(inst_groups):
583
    raise errors.OpPrereqError("Instance %s's node groups changed since"
584
                               " locks were acquired, current groups are"
585
                               " are '%s', owning groups '%s'; retry the"
586
                               " operation" %
587
                               (instance_name,
588
                                utils.CommaJoin(inst_groups),
589
                                utils.CommaJoin(owned_groups)),
590
                               errors.ECODE_STATE)
591

    
592
  return inst_groups
593

    
594

    
595
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
596
  """Checks if the instances in a node group are still correct.
597

598
  @type cfg: L{config.ConfigWriter}
599
  @param cfg: The cluster configuration
600
  @type group_uuid: string
601
  @param group_uuid: Node group UUID
602
  @type owned_instances: set or frozenset
603
  @param owned_instances: List of currently owned instances
604

605
  """
606
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
607
  if owned_instances != wanted_instances:
608
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
609
                               " locks were acquired, wanted '%s', have '%s';"
610
                               " retry the operation" %
611
                               (group_uuid,
612
                                utils.CommaJoin(wanted_instances),
613
                                utils.CommaJoin(owned_instances)),
614
                               errors.ECODE_STATE)
615

    
616
  return wanted_instances
617

    
618

    
619
def _SupportsOob(cfg, node):
620
  """Tells if node supports OOB.
621

622
  @type cfg: L{config.ConfigWriter}
623
  @param cfg: The cluster configuration
624
  @type node: L{objects.Node}
625
  @param node: The node
626
  @return: The OOB script if supported or an empty string otherwise
627

628
  """
629
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
630

    
631

    
632
def _GetWantedNodes(lu, nodes):
633
  """Returns list of checked and expanded node names.
634

635
  @type lu: L{LogicalUnit}
636
  @param lu: the logical unit on whose behalf we execute
637
  @type nodes: list
638
  @param nodes: list of node names or None for all nodes
639
  @rtype: list
640
  @return: the list of nodes, sorted
641
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
642

643
  """
644
  if nodes:
645
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
646

    
647
  return utils.NiceSort(lu.cfg.GetNodeList())
648

    
649

    
650
def _GetWantedInstances(lu, instances):
651
  """Returns list of checked and expanded instance names.
652

653
  @type lu: L{LogicalUnit}
654
  @param lu: the logical unit on whose behalf we execute
655
  @type instances: list
656
  @param instances: list of instance names or None for all instances
657
  @rtype: list
658
  @return: the list of instances, sorted
659
  @raise errors.OpPrereqError: if the instances parameter is wrong type
660
  @raise errors.OpPrereqError: if any of the passed instances is not found
661

662
  """
663
  if instances:
664
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
665
  else:
666
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
667
  return wanted
668

    
669

    
670
def _GetUpdatedParams(old_params, update_dict,
671
                      use_default=True, use_none=False):
672
  """Return the new version of a parameter dictionary.
673

674
  @type old_params: dict
675
  @param old_params: old parameters
676
  @type update_dict: dict
677
  @param update_dict: dict containing new parameter values, or
678
      constants.VALUE_DEFAULT to reset the parameter to its default
679
      value
680
  @param use_default: boolean
681
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
682
      values as 'to be deleted' values
683
  @param use_none: boolean
684
  @type use_none: whether to recognise C{None} values as 'to be
685
      deleted' values
686
  @rtype: dict
687
  @return: the new parameter dictionary
688

689
  """
690
  params_copy = copy.deepcopy(old_params)
691
  for key, val in update_dict.iteritems():
692
    if ((use_default and val == constants.VALUE_DEFAULT) or
693
        (use_none and val is None)):
694
      try:
695
        del params_copy[key]
696
      except KeyError:
697
        pass
698
    else:
699
      params_copy[key] = val
700
  return params_copy
701

    
702

    
703
def _ReleaseLocks(lu, level, names=None, keep=None):
704
  """Releases locks owned by an LU.
705

706
  @type lu: L{LogicalUnit}
707
  @param level: Lock level
708
  @type names: list or None
709
  @param names: Names of locks to release
710
  @type keep: list or None
711
  @param keep: Names of locks to retain
712

713
  """
714
  assert not (keep is not None and names is not None), \
715
         "Only one of the 'names' and the 'keep' parameters can be given"
716

    
717
  if names is not None:
718
    should_release = names.__contains__
719
  elif keep:
720
    should_release = lambda name: name not in keep
721
  else:
722
    should_release = None
723

    
724
  if should_release:
725
    retain = []
726
    release = []
727

    
728
    # Determine which locks to release
729
    for name in lu.owned_locks(level):
730
      if should_release(name):
731
        release.append(name)
732
      else:
733
        retain.append(name)
734

    
735
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
736

    
737
    # Release just some locks
738
    lu.glm.release(level, names=release)
739

    
740
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
741
  else:
742
    # Release everything
743
    lu.glm.release(level)
744

    
745
    assert not lu.glm.is_owned(level), "No locks should be owned"
746

    
747

    
748
def _MapInstanceDisksToNodes(instances):
749
  """Creates a map from (node, volume) to instance name.
750

751
  @type instances: list of L{objects.Instance}
752
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
753

754
  """
755
  return dict(((node, vol), inst.name)
756
              for inst in instances
757
              for (node, vols) in inst.MapLVsByNode().items()
758
              for vol in vols)
759

    
760

    
761
def _RunPostHook(lu, node_name):
762
  """Runs the post-hook for an opcode on a single node.
763

764
  """
765
  hm = lu.proc.BuildHooksManager(lu)
766
  try:
767
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
768
  except:
769
    # pylint: disable=W0702
770
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
771

    
772

    
773
def _CheckOutputFields(static, dynamic, selected):
774
  """Checks whether all selected fields are valid.
775

776
  @type static: L{utils.FieldSet}
777
  @param static: static fields set
778
  @type dynamic: L{utils.FieldSet}
779
  @param dynamic: dynamic fields set
780

781
  """
782
  f = utils.FieldSet()
783
  f.Extend(static)
784
  f.Extend(dynamic)
785

    
786
  delta = f.NonMatching(selected)
787
  if delta:
788
    raise errors.OpPrereqError("Unknown output fields selected: %s"
789
                               % ",".join(delta), errors.ECODE_INVAL)
790

    
791

    
792
def _CheckGlobalHvParams(params):
793
  """Validates that given hypervisor params are not global ones.
794

795
  This will ensure that instances don't get customised versions of
796
  global params.
797

798
  """
799
  used_globals = constants.HVC_GLOBALS.intersection(params)
800
  if used_globals:
801
    msg = ("The following hypervisor parameters are global and cannot"
802
           " be customized at instance level, please modify them at"
803
           " cluster level: %s" % utils.CommaJoin(used_globals))
804
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
805

    
806

    
807
def _CheckNodeOnline(lu, node, msg=None):
808
  """Ensure that a given node is online.
809

810
  @param lu: the LU on behalf of which we make the check
811
  @param node: the node to check
812
  @param msg: if passed, should be a message to replace the default one
813
  @raise errors.OpPrereqError: if the node is offline
814

815
  """
816
  if msg is None:
817
    msg = "Can't use offline node"
818
  if lu.cfg.GetNodeInfo(node).offline:
819
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
820

    
821

    
822
def _CheckNodeNotDrained(lu, node):
823
  """Ensure that a given node is not drained.
824

825
  @param lu: the LU on behalf of which we make the check
826
  @param node: the node to check
827
  @raise errors.OpPrereqError: if the node is drained
828

829
  """
830
  if lu.cfg.GetNodeInfo(node).drained:
831
    raise errors.OpPrereqError("Can't use drained node %s" % node,
832
                               errors.ECODE_STATE)
833

    
834

    
835
def _CheckNodeVmCapable(lu, node):
836
  """Ensure that a given node is vm capable.
837

838
  @param lu: the LU on behalf of which we make the check
839
  @param node: the node to check
840
  @raise errors.OpPrereqError: if the node is not vm capable
841

842
  """
843
  if not lu.cfg.GetNodeInfo(node).vm_capable:
844
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
845
                               errors.ECODE_STATE)
846

    
847

    
848
def _CheckNodeHasOS(lu, node, os_name, force_variant):
849
  """Ensure that a node supports a given OS.
850

851
  @param lu: the LU on behalf of which we make the check
852
  @param node: the node to check
853
  @param os_name: the OS to query about
854
  @param force_variant: whether to ignore variant errors
855
  @raise errors.OpPrereqError: if the node is not supporting the OS
856

857
  """
858
  result = lu.rpc.call_os_get(node, os_name)
859
  result.Raise("OS '%s' not in supported OS list for node %s" %
860
               (os_name, node),
861
               prereq=True, ecode=errors.ECODE_INVAL)
862
  if not force_variant:
863
    _CheckOSVariant(result.payload, os_name)
864

    
865

    
866
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
867
  """Ensure that a node has the given secondary ip.
868

869
  @type lu: L{LogicalUnit}
870
  @param lu: the LU on behalf of which we make the check
871
  @type node: string
872
  @param node: the node to check
873
  @type secondary_ip: string
874
  @param secondary_ip: the ip to check
875
  @type prereq: boolean
876
  @param prereq: whether to throw a prerequisite or an execute error
877
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
878
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
879

880
  """
881
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
882
  result.Raise("Failure checking secondary ip on node %s" % node,
883
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
884
  if not result.payload:
885
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
886
           " please fix and re-run this command" % secondary_ip)
887
    if prereq:
888
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
889
    else:
890
      raise errors.OpExecError(msg)
891

    
892

    
893
def _GetClusterDomainSecret():
894
  """Reads the cluster domain secret.
895

896
  """
897
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
898
                               strict=True)
899

    
900

    
901
def _CheckInstanceDown(lu, instance, reason):
902
  """Ensure that an instance is not running."""
903
  if instance.admin_up:
904
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907
  pnode = instance.primary_node
908
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
909
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
910
              prereq=True, ecode=errors.ECODE_ENVIRON)
911

    
912
  if instance.name in ins_l.payload:
913
    raise errors.OpPrereqError("Instance %s is running, %s" %
914
                               (instance.name, reason), errors.ECODE_STATE)
915

    
916

    
917
def _ExpandItemName(fn, name, kind):
918
  """Expand an item name.
919

920
  @param fn: the function to use for expansion
921
  @param name: requested item name
922
  @param kind: text description ('Node' or 'Instance')
923
  @return: the resolved (full) name
924
  @raise errors.OpPrereqError: if the item is not found
925

926
  """
927
  full_name = fn(name)
928
  if full_name is None:
929
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
930
                               errors.ECODE_NOENT)
931
  return full_name
932

    
933

    
934
def _ExpandNodeName(cfg, name):
935
  """Wrapper over L{_ExpandItemName} for nodes."""
936
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
937

    
938

    
939
def _ExpandInstanceName(cfg, name):
940
  """Wrapper over L{_ExpandItemName} for instance."""
941
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
942

    
943

    
944
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
945
                          memory, vcpus, nics, disk_template, disks,
946
                          bep, hvp, hypervisor_name, tags):
947
  """Builds instance related env variables for hooks
948

949
  This builds the hook environment from individual variables.
950

951
  @type name: string
952
  @param name: the name of the instance
953
  @type primary_node: string
954
  @param primary_node: the name of the instance's primary node
955
  @type secondary_nodes: list
956
  @param secondary_nodes: list of secondary nodes as strings
957
  @type os_type: string
958
  @param os_type: the name of the instance's OS
959
  @type status: boolean
960
  @param status: the should_run status of the instance
961
  @type memory: string
962
  @param memory: the memory size of the instance
963
  @type vcpus: string
964
  @param vcpus: the count of VCPUs the instance has
965
  @type nics: list
966
  @param nics: list of tuples (ip, mac, mode, link) representing
967
      the NICs the instance has
968
  @type disk_template: string
969
  @param disk_template: the disk template of the instance
970
  @type disks: list
971
  @param disks: the list of (size, mode) pairs
972
  @type bep: dict
973
  @param bep: the backend parameters for the instance
974
  @type hvp: dict
975
  @param hvp: the hypervisor parameters for the instance
976
  @type hypervisor_name: string
977
  @param hypervisor_name: the hypervisor for the instance
978
  @type tags: list
979
  @param tags: list of instance tags as strings
980
  @rtype: dict
981
  @return: the hook environment for this instance
982

983
  """
984
  if status:
985
    str_status = "up"
986
  else:
987
    str_status = "down"
988
  env = {
989
    "OP_TARGET": name,
990
    "INSTANCE_NAME": name,
991
    "INSTANCE_PRIMARY": primary_node,
992
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
993
    "INSTANCE_OS_TYPE": os_type,
994
    "INSTANCE_STATUS": str_status,
995
    "INSTANCE_MEMORY": memory,
996
    "INSTANCE_VCPUS": vcpus,
997
    "INSTANCE_DISK_TEMPLATE": disk_template,
998
    "INSTANCE_HYPERVISOR": hypervisor_name,
999
  }
1000

    
1001
  if nics:
1002
    nic_count = len(nics)
1003
    for idx, (ip, mac, mode, link) in enumerate(nics):
1004
      if ip is None:
1005
        ip = ""
1006
      env["INSTANCE_NIC%d_IP" % idx] = ip
1007
      env["INSTANCE_NIC%d_MAC" % idx] = mac
1008
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1009
      env["INSTANCE_NIC%d_LINK" % idx] = link
1010
      if mode == constants.NIC_MODE_BRIDGED:
1011
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1012
  else:
1013
    nic_count = 0
1014

    
1015
  env["INSTANCE_NIC_COUNT"] = nic_count
1016

    
1017
  if disks:
1018
    disk_count = len(disks)
1019
    for idx, (size, mode) in enumerate(disks):
1020
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1021
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1022
  else:
1023
    disk_count = 0
1024

    
1025
  env["INSTANCE_DISK_COUNT"] = disk_count
1026

    
1027
  if not tags:
1028
    tags = []
1029

    
1030
  env["INSTANCE_TAGS"] = " ".join(tags)
1031

    
1032
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1033
    for key, value in source.items():
1034
      env["INSTANCE_%s_%s" % (kind, key)] = value
1035

    
1036
  return env
1037

    
1038

    
1039
def _NICListToTuple(lu, nics):
1040
  """Build a list of nic information tuples.
1041

1042
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1043
  value in LUInstanceQueryData.
1044

1045
  @type lu:  L{LogicalUnit}
1046
  @param lu: the logical unit on whose behalf we execute
1047
  @type nics: list of L{objects.NIC}
1048
  @param nics: list of nics to convert to hooks tuples
1049

1050
  """
1051
  hooks_nics = []
1052
  cluster = lu.cfg.GetClusterInfo()
1053
  for nic in nics:
1054
    ip = nic.ip
1055
    mac = nic.mac
1056
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1057
    mode = filled_params[constants.NIC_MODE]
1058
    link = filled_params[constants.NIC_LINK]
1059
    hooks_nics.append((ip, mac, mode, link))
1060
  return hooks_nics
1061

    
1062

    
1063
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1064
  """Builds instance related env variables for hooks from an object.
1065

1066
  @type lu: L{LogicalUnit}
1067
  @param lu: the logical unit on whose behalf we execute
1068
  @type instance: L{objects.Instance}
1069
  @param instance: the instance for which we should build the
1070
      environment
1071
  @type override: dict
1072
  @param override: dictionary with key/values that will override
1073
      our values
1074
  @rtype: dict
1075
  @return: the hook environment dictionary
1076

1077
  """
1078
  cluster = lu.cfg.GetClusterInfo()
1079
  bep = cluster.FillBE(instance)
1080
  hvp = cluster.FillHV(instance)
1081
  args = {
1082
    "name": instance.name,
1083
    "primary_node": instance.primary_node,
1084
    "secondary_nodes": instance.secondary_nodes,
1085
    "os_type": instance.os,
1086
    "status": instance.admin_up,
1087
    "memory": bep[constants.BE_MEMORY],
1088
    "vcpus": bep[constants.BE_VCPUS],
1089
    "nics": _NICListToTuple(lu, instance.nics),
1090
    "disk_template": instance.disk_template,
1091
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1092
    "bep": bep,
1093
    "hvp": hvp,
1094
    "hypervisor_name": instance.hypervisor,
1095
    "tags": instance.tags,
1096
  }
1097
  if override:
1098
    args.update(override)
1099
  return _BuildInstanceHookEnv(**args) # pylint: disable=W0142
1100

    
1101

    
1102
def _AdjustCandidatePool(lu, exceptions):
1103
  """Adjust the candidate pool after node operations.
1104

1105
  """
1106
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1107
  if mod_list:
1108
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1109
               utils.CommaJoin(node.name for node in mod_list))
1110
    for name in mod_list:
1111
      lu.context.ReaddNode(name)
1112
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1113
  if mc_now > mc_max:
1114
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1115
               (mc_now, mc_max))
1116

    
1117

    
1118
def _DecideSelfPromotion(lu, exceptions=None):
1119
  """Decide whether I should promote myself as a master candidate.
1120

1121
  """
1122
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1123
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1124
  # the new node will increase mc_max with one, so:
1125
  mc_should = min(mc_should + 1, cp_size)
1126
  return mc_now < mc_should
1127

    
1128

    
1129
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1130
  """Check that the brigdes needed by a list of nics exist.
1131

1132
  """
1133
  cluster = lu.cfg.GetClusterInfo()
1134
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1135
  brlist = [params[constants.NIC_LINK] for params in paramslist
1136
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1137
  if brlist:
1138
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1139
    result.Raise("Error checking bridges on destination node '%s'" %
1140
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1141

    
1142

    
1143
def _CheckInstanceBridgesExist(lu, instance, node=None):
1144
  """Check that the brigdes needed by an instance exist.
1145

1146
  """
1147
  if node is None:
1148
    node = instance.primary_node
1149
  _CheckNicsBridgesExist(lu, instance.nics, node)
1150

    
1151

    
1152
def _CheckOSVariant(os_obj, name):
1153
  """Check whether an OS name conforms to the os variants specification.
1154

1155
  @type os_obj: L{objects.OS}
1156
  @param os_obj: OS object to check
1157
  @type name: string
1158
  @param name: OS name passed by the user, to check for validity
1159

1160
  """
1161
  variant = objects.OS.GetVariant(name)
1162
  if not os_obj.supported_variants:
1163
    if variant:
1164
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1165
                                 " passed)" % (os_obj.name, variant),
1166
                                 errors.ECODE_INVAL)
1167
    return
1168
  if not variant:
1169
    raise errors.OpPrereqError("OS name must include a variant",
1170
                               errors.ECODE_INVAL)
1171

    
1172
  if variant not in os_obj.supported_variants:
1173
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1174

    
1175

    
1176
def _GetNodeInstancesInner(cfg, fn):
1177
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1178

    
1179

    
1180
def _GetNodeInstances(cfg, node_name):
1181
  """Returns a list of all primary and secondary instances on a node.
1182

1183
  """
1184

    
1185
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1186

    
1187

    
1188
def _GetNodePrimaryInstances(cfg, node_name):
1189
  """Returns primary instances on a node.
1190

1191
  """
1192
  return _GetNodeInstancesInner(cfg,
1193
                                lambda inst: node_name == inst.primary_node)
1194

    
1195

    
1196
def _GetNodeSecondaryInstances(cfg, node_name):
1197
  """Returns secondary instances on a node.
1198

1199
  """
1200
  return _GetNodeInstancesInner(cfg,
1201
                                lambda inst: node_name in inst.secondary_nodes)
1202

    
1203

    
1204
def _GetStorageTypeArgs(cfg, storage_type):
1205
  """Returns the arguments for a storage type.
1206

1207
  """
1208
  # Special case for file storage
1209
  if storage_type == constants.ST_FILE:
1210
    # storage.FileStorage wants a list of storage directories
1211
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1212

    
1213
  return []
1214

    
1215

    
1216
def _FindFaultyInstanceDisks(cfg, rpc_runner, instance, node_name, prereq):
1217
  faulty = []
1218

    
1219
  for dev in instance.disks:
1220
    cfg.SetDiskID(dev, node_name)
1221

    
1222
  result = rpc_runner.call_blockdev_getmirrorstatus(node_name, instance.disks)
1223
  result.Raise("Failed to get disk status from node %s" % node_name,
1224
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1225

    
1226
  for idx, bdev_status in enumerate(result.payload):
1227
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1228
      faulty.append(idx)
1229

    
1230
  return faulty
1231

    
1232

    
1233
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1234
  """Check the sanity of iallocator and node arguments and use the
1235
  cluster-wide iallocator if appropriate.
1236

1237
  Check that at most one of (iallocator, node) is specified. If none is
1238
  specified, then the LU's opcode's iallocator slot is filled with the
1239
  cluster-wide default iallocator.
1240

1241
  @type iallocator_slot: string
1242
  @param iallocator_slot: the name of the opcode iallocator slot
1243
  @type node_slot: string
1244
  @param node_slot: the name of the opcode target node slot
1245

1246
  """
1247
  node = getattr(lu.op, node_slot, None)
1248
  iallocator = getattr(lu.op, iallocator_slot, None)
1249

    
1250
  if node is not None and iallocator is not None:
1251
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1252
                               errors.ECODE_INVAL)
1253
  elif node is None and iallocator is None:
1254
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1255
    if default_iallocator:
1256
      setattr(lu.op, iallocator_slot, default_iallocator)
1257
    else:
1258
      raise errors.OpPrereqError("No iallocator or node given and no"
1259
                                 " cluster-wide default iallocator found;"
1260
                                 " please specify either an iallocator or a"
1261
                                 " node, or set a cluster-wide default"
1262
                                 " iallocator")
1263

    
1264

    
1265
def _GetDefaultIAllocator(cfg, iallocator):
1266
  """Decides on which iallocator to use.
1267

1268
  @type cfg: L{config.ConfigWriter}
1269
  @param cfg: Cluster configuration object
1270
  @type iallocator: string or None
1271
  @param iallocator: Iallocator specified in opcode
1272
  @rtype: string
1273
  @return: Iallocator name
1274

1275
  """
1276
  if not iallocator:
1277
    # Use default iallocator
1278
    iallocator = cfg.GetDefaultIAllocator()
1279

    
1280
  if not iallocator:
1281
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1282
                               " opcode nor as a cluster-wide default",
1283
                               errors.ECODE_INVAL)
1284

    
1285
  return iallocator
1286

    
1287

    
1288
class LUClusterPostInit(LogicalUnit):
1289
  """Logical unit for running hooks after cluster initialization.
1290

1291
  """
1292
  HPATH = "cluster-init"
1293
  HTYPE = constants.HTYPE_CLUSTER
1294

    
1295
  def BuildHooksEnv(self):
1296
    """Build hooks env.
1297

1298
    """
1299
    return {
1300
      "OP_TARGET": self.cfg.GetClusterName(),
1301
      }
1302

    
1303
  def BuildHooksNodes(self):
1304
    """Build hooks nodes.
1305

1306
    """
1307
    return ([], [self.cfg.GetMasterNode()])
1308

    
1309
  def Exec(self, feedback_fn):
1310
    """Nothing to do.
1311

1312
    """
1313
    return True
1314

    
1315

    
1316
class LUClusterDestroy(LogicalUnit):
1317
  """Logical unit for destroying the cluster.
1318

1319
  """
1320
  HPATH = "cluster-destroy"
1321
  HTYPE = constants.HTYPE_CLUSTER
1322

    
1323
  def BuildHooksEnv(self):
1324
    """Build hooks env.
1325

1326
    """
1327
    return {
1328
      "OP_TARGET": self.cfg.GetClusterName(),
1329
      }
1330

    
1331
  def BuildHooksNodes(self):
1332
    """Build hooks nodes.
1333

1334
    """
1335
    return ([], [])
1336

    
1337
  def CheckPrereq(self):
1338
    """Check prerequisites.
1339

1340
    This checks whether the cluster is empty.
1341

1342
    Any errors are signaled by raising errors.OpPrereqError.
1343

1344
    """
1345
    master = self.cfg.GetMasterNode()
1346

    
1347
    nodelist = self.cfg.GetNodeList()
1348
    if len(nodelist) != 1 or nodelist[0] != master:
1349
      raise errors.OpPrereqError("There are still %d node(s) in"
1350
                                 " this cluster." % (len(nodelist) - 1),
1351
                                 errors.ECODE_INVAL)
1352
    instancelist = self.cfg.GetInstanceList()
1353
    if instancelist:
1354
      raise errors.OpPrereqError("There are still %d instance(s) in"
1355
                                 " this cluster." % len(instancelist),
1356
                                 errors.ECODE_INVAL)
1357

    
1358
  def Exec(self, feedback_fn):
1359
    """Destroys the cluster.
1360

1361
    """
1362
    master_params = self.cfg.GetMasterNetworkParameters()
1363

    
1364
    # Run post hooks on master node before it's removed
1365
    _RunPostHook(self, master_params.name)
1366

    
1367
    ems = self.cfg.GetUseExternalMipScript()
1368
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
1369
                                                     master_params, ems)
1370
    result.Raise("Could not disable the master role")
1371

    
1372
    return master_params.name
1373

    
1374

    
1375
def _VerifyCertificate(filename):
1376
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1377

1378
  @type filename: string
1379
  @param filename: Path to PEM file
1380

1381
  """
1382
  try:
1383
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1384
                                           utils.ReadFile(filename))
1385
  except Exception, err: # pylint: disable=W0703
1386
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1387
            "Failed to load X509 certificate %s: %s" % (filename, err))
1388

    
1389
  (errcode, msg) = \
1390
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1391
                                constants.SSL_CERT_EXPIRATION_ERROR)
1392

    
1393
  if msg:
1394
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1395
  else:
1396
    fnamemsg = None
1397

    
1398
  if errcode is None:
1399
    return (None, fnamemsg)
1400
  elif errcode == utils.CERT_WARNING:
1401
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1402
  elif errcode == utils.CERT_ERROR:
1403
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1404

    
1405
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1406

    
1407

    
1408
def _GetAllHypervisorParameters(cluster, instances):
1409
  """Compute the set of all hypervisor parameters.
1410

1411
  @type cluster: L{objects.Cluster}
1412
  @param cluster: the cluster object
1413
  @param instances: list of L{objects.Instance}
1414
  @param instances: additional instances from which to obtain parameters
1415
  @rtype: list of (origin, hypervisor, parameters)
1416
  @return: a list with all parameters found, indicating the hypervisor they
1417
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1418

1419
  """
1420
  hvp_data = []
1421

    
1422
  for hv_name in cluster.enabled_hypervisors:
1423
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1424

    
1425
  for os_name, os_hvp in cluster.os_hvp.items():
1426
    for hv_name, hv_params in os_hvp.items():
1427
      if hv_params:
1428
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1429
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1430

    
1431
  # TODO: collapse identical parameter values in a single one
1432
  for instance in instances:
1433
    if instance.hvparams:
1434
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1435
                       cluster.FillHV(instance)))
1436

    
1437
  return hvp_data
1438

    
1439

    
1440
class _VerifyErrors(object):
1441
  """Mix-in for cluster/group verify LUs.
1442

1443
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1444
  self.op and self._feedback_fn to be available.)
1445

1446
  """
1447

    
1448
  ETYPE_FIELD = "code"
1449
  ETYPE_ERROR = "ERROR"
1450
  ETYPE_WARNING = "WARNING"
1451

    
1452
  def _Error(self, ecode, item, msg, *args, **kwargs):
1453
    """Format an error message.
1454

1455
    Based on the opcode's error_codes parameter, either format a
1456
    parseable error code, or a simpler error string.
1457

1458
    This must be called only from Exec and functions called from Exec.
1459

1460
    """
1461
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1462
    itype, etxt, _ = ecode
1463
    # first complete the msg
1464
    if args:
1465
      msg = msg % args
1466
    # then format the whole message
1467
    if self.op.error_codes: # This is a mix-in. pylint: disable=E1101
1468
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1469
    else:
1470
      if item:
1471
        item = " " + item
1472
      else:
1473
        item = ""
1474
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1475
    # and finally report it via the feedback_fn
1476
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable=E1101
1477

    
1478
  def _ErrorIf(self, cond, ecode, *args, **kwargs):
1479
    """Log an error message if the passed condition is True.
1480

1481
    """
1482
    cond = (bool(cond)
1483
            or self.op.debug_simulate_errors) # pylint: disable=E1101
1484

    
1485
    # If the error code is in the list of ignored errors, demote the error to a
1486
    # warning
1487
    (_, etxt, _) = ecode
1488
    if etxt in self.op.ignore_errors:     # pylint: disable=E1101
1489
      kwargs[self.ETYPE_FIELD] = self.ETYPE_WARNING
1490

    
1491
    if cond:
1492
      self._Error(ecode, *args, **kwargs)
1493

    
1494
    # do not mark the operation as failed for WARN cases only
1495
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1496
      self.bad = self.bad or cond
1497

    
1498

    
1499
class LUClusterVerify(NoHooksLU):
1500
  """Submits all jobs necessary to verify the cluster.
1501

1502
  """
1503
  REQ_BGL = False
1504

    
1505
  def ExpandNames(self):
1506
    self.needed_locks = {}
1507

    
1508
  def Exec(self, feedback_fn):
1509
    jobs = []
1510

    
1511
    if self.op.group_name:
1512
      groups = [self.op.group_name]
1513
      depends_fn = lambda: None
1514
    else:
1515
      groups = self.cfg.GetNodeGroupList()
1516

    
1517
      # Verify global configuration
1518
      jobs.append([
1519
        opcodes.OpClusterVerifyConfig(ignore_errors=self.op.ignore_errors)
1520
        ])
1521

    
1522
      # Always depend on global verification
1523
      depends_fn = lambda: [(-len(jobs), [])]
1524

    
1525
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1526
                                            ignore_errors=self.op.ignore_errors,
1527
                                            depends=depends_fn())]
1528
                for group in groups)
1529

    
1530
    # Fix up all parameters
1531
    for op in itertools.chain(*jobs): # pylint: disable=W0142
1532
      op.debug_simulate_errors = self.op.debug_simulate_errors
1533
      op.verbose = self.op.verbose
1534
      op.error_codes = self.op.error_codes
1535
      try:
1536
        op.skip_checks = self.op.skip_checks
1537
      except AttributeError:
1538
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1539

    
1540
    return ResultWithJobs(jobs)
1541

    
1542

    
1543
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1544
  """Verifies the cluster config.
1545

1546
  """
1547
  REQ_BGL = True
1548

    
1549
  def _VerifyHVP(self, hvp_data):
1550
    """Verifies locally the syntax of the hypervisor parameters.
1551

1552
    """
1553
    for item, hv_name, hv_params in hvp_data:
1554
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1555
             (item, hv_name))
1556
      try:
1557
        hv_class = hypervisor.GetHypervisor(hv_name)
1558
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1559
        hv_class.CheckParameterSyntax(hv_params)
1560
      except errors.GenericError, err:
1561
        self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg % str(err))
1562

    
1563
  def ExpandNames(self):
1564
    # Information can be safely retrieved as the BGL is acquired in exclusive
1565
    # mode
1566
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER)
1567
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1568
    self.all_node_info = self.cfg.GetAllNodesInfo()
1569
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1570
    self.needed_locks = {}
1571

    
1572
  def Exec(self, feedback_fn):
1573
    """Verify integrity of cluster, performing various test on nodes.
1574

1575
    """
1576
    self.bad = False
1577
    self._feedback_fn = feedback_fn
1578

    
1579
    feedback_fn("* Verifying cluster config")
1580

    
1581
    for msg in self.cfg.VerifyConfig():
1582
      self._ErrorIf(True, constants.CV_ECLUSTERCFG, None, msg)
1583

    
1584
    feedback_fn("* Verifying cluster certificate files")
1585

    
1586
    for cert_filename in constants.ALL_CERT_FILES:
1587
      (errcode, msg) = _VerifyCertificate(cert_filename)
1588
      self._ErrorIf(errcode, constants.CV_ECLUSTERCERT, None, msg, code=errcode)
1589

    
1590
    feedback_fn("* Verifying hypervisor parameters")
1591

    
1592
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1593
                                                self.all_inst_info.values()))
1594

    
1595
    feedback_fn("* Verifying all nodes belong to an existing group")
1596

    
1597
    # We do this verification here because, should this bogus circumstance
1598
    # occur, it would never be caught by VerifyGroup, which only acts on
1599
    # nodes/instances reachable from existing node groups.
1600

    
1601
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1602
                         if node.group not in self.all_group_info)
1603

    
1604
    dangling_instances = {}
1605
    no_node_instances = []
1606

    
1607
    for inst in self.all_inst_info.values():
1608
      if inst.primary_node in dangling_nodes:
1609
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1610
      elif inst.primary_node not in self.all_node_info:
1611
        no_node_instances.append(inst.name)
1612

    
1613
    pretty_dangling = [
1614
        "%s (%s)" %
1615
        (node.name,
1616
         utils.CommaJoin(dangling_instances.get(node.name,
1617
                                                ["no instances"])))
1618
        for node in dangling_nodes]
1619

    
1620
    self._ErrorIf(bool(dangling_nodes), constants.CV_ECLUSTERDANGLINGNODES,
1621
                  None,
1622
                  "the following nodes (and their instances) belong to a non"
1623
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1624

    
1625
    self._ErrorIf(bool(no_node_instances), constants.CV_ECLUSTERDANGLINGINST,
1626
                  None,
1627
                  "the following instances have a non-existing primary-node:"
1628
                  " %s", utils.CommaJoin(no_node_instances))
1629

    
1630
    return not self.bad
1631

    
1632

    
1633
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1634
  """Verifies the status of a node group.
1635

1636
  """
1637
  HPATH = "cluster-verify"
1638
  HTYPE = constants.HTYPE_CLUSTER
1639
  REQ_BGL = False
1640

    
1641
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1642

    
1643
  class NodeImage(object):
1644
    """A class representing the logical and physical status of a node.
1645

1646
    @type name: string
1647
    @ivar name: the node name to which this object refers
1648
    @ivar volumes: a structure as returned from
1649
        L{ganeti.backend.GetVolumeList} (runtime)
1650
    @ivar instances: a list of running instances (runtime)
1651
    @ivar pinst: list of configured primary instances (config)
1652
    @ivar sinst: list of configured secondary instances (config)
1653
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1654
        instances for which this node is secondary (config)
1655
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1656
    @ivar dfree: free disk, as reported by the node (runtime)
1657
    @ivar offline: the offline status (config)
1658
    @type rpc_fail: boolean
1659
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1660
        not whether the individual keys were correct) (runtime)
1661
    @type lvm_fail: boolean
1662
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1663
    @type hyp_fail: boolean
1664
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1665
    @type ghost: boolean
1666
    @ivar ghost: whether this is a known node or not (config)
1667
    @type os_fail: boolean
1668
    @ivar os_fail: whether the RPC call didn't return valid OS data
1669
    @type oslist: list
1670
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1671
    @type vm_capable: boolean
1672
    @ivar vm_capable: whether the node can host instances
1673

1674
    """
1675
    def __init__(self, offline=False, name=None, vm_capable=True):
1676
      self.name = name
1677
      self.volumes = {}
1678
      self.instances = []
1679
      self.pinst = []
1680
      self.sinst = []
1681
      self.sbp = {}
1682
      self.mfree = 0
1683
      self.dfree = 0
1684
      self.offline = offline
1685
      self.vm_capable = vm_capable
1686
      self.rpc_fail = False
1687
      self.lvm_fail = False
1688
      self.hyp_fail = False
1689
      self.ghost = False
1690
      self.os_fail = False
1691
      self.oslist = {}
1692

    
1693
  def ExpandNames(self):
1694
    # This raises errors.OpPrereqError on its own:
1695
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1696

    
1697
    # Get instances in node group; this is unsafe and needs verification later
1698
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1699

    
1700
    self.needed_locks = {
1701
      locking.LEVEL_INSTANCE: inst_names,
1702
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1703
      locking.LEVEL_NODE: [],
1704
      }
1705

    
1706
    self.share_locks = _ShareAll()
1707

    
1708
  def DeclareLocks(self, level):
1709
    if level == locking.LEVEL_NODE:
1710
      # Get members of node group; this is unsafe and needs verification later
1711
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1712

    
1713
      all_inst_info = self.cfg.GetAllInstancesInfo()
1714

    
1715
      # In Exec(), we warn about mirrored instances that have primary and
1716
      # secondary living in separate node groups. To fully verify that
1717
      # volumes for these instances are healthy, we will need to do an
1718
      # extra call to their secondaries. We ensure here those nodes will
1719
      # be locked.
1720
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1721
        # Important: access only the instances whose lock is owned
1722
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1723
          nodes.update(all_inst_info[inst].secondary_nodes)
1724

    
1725
      self.needed_locks[locking.LEVEL_NODE] = nodes
1726

    
1727
  def CheckPrereq(self):
1728
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
1729
    self.group_info = self.cfg.GetNodeGroup(self.group_uuid)
1730

    
1731
    group_nodes = set(self.group_info.members)
1732
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1733

    
1734
    unlocked_nodes = \
1735
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1736

    
1737
    unlocked_instances = \
1738
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1739

    
1740
    if unlocked_nodes:
1741
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1742
                                 utils.CommaJoin(unlocked_nodes))
1743

    
1744
    if unlocked_instances:
1745
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1746
                                 utils.CommaJoin(unlocked_instances))
1747

    
1748
    self.all_node_info = self.cfg.GetAllNodesInfo()
1749
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1750

    
1751
    self.my_node_names = utils.NiceSort(group_nodes)
1752
    self.my_inst_names = utils.NiceSort(group_instances)
1753

    
1754
    self.my_node_info = dict((name, self.all_node_info[name])
1755
                             for name in self.my_node_names)
1756

    
1757
    self.my_inst_info = dict((name, self.all_inst_info[name])
1758
                             for name in self.my_inst_names)
1759

    
1760
    # We detect here the nodes that will need the extra RPC calls for verifying
1761
    # split LV volumes; they should be locked.
1762
    extra_lv_nodes = set()
1763

    
1764
    for inst in self.my_inst_info.values():
1765
      if inst.disk_template in constants.DTS_INT_MIRROR:
1766
        group = self.my_node_info[inst.primary_node].group
1767
        for nname in inst.secondary_nodes:
1768
          if self.all_node_info[nname].group != group:
1769
            extra_lv_nodes.add(nname)
1770

    
1771
    unlocked_lv_nodes = \
1772
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1773

    
1774
    if unlocked_lv_nodes:
1775
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1776
                                 utils.CommaJoin(unlocked_lv_nodes))
1777
    self.extra_lv_nodes = list(extra_lv_nodes)
1778

    
1779
  def _VerifyNode(self, ninfo, nresult):
1780
    """Perform some basic validation on data returned from a node.
1781

1782
      - check the result data structure is well formed and has all the
1783
        mandatory fields
1784
      - check ganeti version
1785

1786
    @type ninfo: L{objects.Node}
1787
    @param ninfo: the node to check
1788
    @param nresult: the results from the node
1789
    @rtype: boolean
1790
    @return: whether overall this call was successful (and we can expect
1791
         reasonable values in the respose)
1792

1793
    """
1794
    node = ninfo.name
1795
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1796

    
1797
    # main result, nresult should be a non-empty dict
1798
    test = not nresult or not isinstance(nresult, dict)
1799
    _ErrorIf(test, constants.CV_ENODERPC, node,
1800
                  "unable to verify node: no data returned")
1801
    if test:
1802
      return False
1803

    
1804
    # compares ganeti version
1805
    local_version = constants.PROTOCOL_VERSION
1806
    remote_version = nresult.get("version", None)
1807
    test = not (remote_version and
1808
                isinstance(remote_version, (list, tuple)) and
1809
                len(remote_version) == 2)
1810
    _ErrorIf(test, constants.CV_ENODERPC, node,
1811
             "connection to node returned invalid data")
1812
    if test:
1813
      return False
1814

    
1815
    test = local_version != remote_version[0]
1816
    _ErrorIf(test, constants.CV_ENODEVERSION, node,
1817
             "incompatible protocol versions: master %s,"
1818
             " node %s", local_version, remote_version[0])
1819
    if test:
1820
      return False
1821

    
1822
    # node seems compatible, we can actually try to look into its results
1823

    
1824
    # full package version
1825
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1826
                  constants.CV_ENODEVERSION, node,
1827
                  "software version mismatch: master %s, node %s",
1828
                  constants.RELEASE_VERSION, remote_version[1],
1829
                  code=self.ETYPE_WARNING)
1830

    
1831
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1832
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1833
      for hv_name, hv_result in hyp_result.iteritems():
1834
        test = hv_result is not None
1835
        _ErrorIf(test, constants.CV_ENODEHV, node,
1836
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1837

    
1838
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1839
    if ninfo.vm_capable and isinstance(hvp_result, list):
1840
      for item, hv_name, hv_result in hvp_result:
1841
        _ErrorIf(True, constants.CV_ENODEHV, node,
1842
                 "hypervisor %s parameter verify failure (source %s): %s",
1843
                 hv_name, item, hv_result)
1844

    
1845
    test = nresult.get(constants.NV_NODESETUP,
1846
                       ["Missing NODESETUP results"])
1847
    _ErrorIf(test, constants.CV_ENODESETUP, node, "node setup error: %s",
1848
             "; ".join(test))
1849

    
1850
    return True
1851

    
1852
  def _VerifyNodeTime(self, ninfo, nresult,
1853
                      nvinfo_starttime, nvinfo_endtime):
1854
    """Check the node time.
1855

1856
    @type ninfo: L{objects.Node}
1857
    @param ninfo: the node to check
1858
    @param nresult: the remote results for the node
1859
    @param nvinfo_starttime: the start time of the RPC call
1860
    @param nvinfo_endtime: the end time of the RPC call
1861

1862
    """
1863
    node = ninfo.name
1864
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1865

    
1866
    ntime = nresult.get(constants.NV_TIME, None)
1867
    try:
1868
      ntime_merged = utils.MergeTime(ntime)
1869
    except (ValueError, TypeError):
1870
      _ErrorIf(True, constants.CV_ENODETIME, node, "Node returned invalid time")
1871
      return
1872

    
1873
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1874
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1875
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1876
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1877
    else:
1878
      ntime_diff = None
1879

    
1880
    _ErrorIf(ntime_diff is not None, constants.CV_ENODETIME, node,
1881
             "Node time diverges by at least %s from master node time",
1882
             ntime_diff)
1883

    
1884
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1885
    """Check the node LVM results.
1886

1887
    @type ninfo: L{objects.Node}
1888
    @param ninfo: the node to check
1889
    @param nresult: the remote results for the node
1890
    @param vg_name: the configured VG name
1891

1892
    """
1893
    if vg_name is None:
1894
      return
1895

    
1896
    node = ninfo.name
1897
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1898

    
1899
    # checks vg existence and size > 20G
1900
    vglist = nresult.get(constants.NV_VGLIST, None)
1901
    test = not vglist
1902
    _ErrorIf(test, constants.CV_ENODELVM, node, "unable to check volume groups")
1903
    if not test:
1904
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1905
                                            constants.MIN_VG_SIZE)
1906
      _ErrorIf(vgstatus, constants.CV_ENODELVM, node, vgstatus)
1907

    
1908
    # check pv names
1909
    pvlist = nresult.get(constants.NV_PVLIST, None)
1910
    test = pvlist is None
1911
    _ErrorIf(test, constants.CV_ENODELVM, node, "Can't get PV list from node")
1912
    if not test:
1913
      # check that ':' is not present in PV names, since it's a
1914
      # special character for lvcreate (denotes the range of PEs to
1915
      # use on the PV)
1916
      for _, pvname, owner_vg in pvlist:
1917
        test = ":" in pvname
1918
        _ErrorIf(test, constants.CV_ENODELVM, node,
1919
                 "Invalid character ':' in PV '%s' of VG '%s'",
1920
                 pvname, owner_vg)
1921

    
1922
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1923
    """Check the node bridges.
1924

1925
    @type ninfo: L{objects.Node}
1926
    @param ninfo: the node to check
1927
    @param nresult: the remote results for the node
1928
    @param bridges: the expected list of bridges
1929

1930
    """
1931
    if not bridges:
1932
      return
1933

    
1934
    node = ninfo.name
1935
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1936

    
1937
    missing = nresult.get(constants.NV_BRIDGES, None)
1938
    test = not isinstance(missing, list)
1939
    _ErrorIf(test, constants.CV_ENODENET, node,
1940
             "did not return valid bridge information")
1941
    if not test:
1942
      _ErrorIf(bool(missing), constants.CV_ENODENET, node,
1943
               "missing bridges: %s" % utils.CommaJoin(sorted(missing)))
1944

    
1945
  def _VerifyNodeUserScripts(self, ninfo, nresult):
1946
    """Check the results of user scripts presence and executability on the node
1947

1948
    @type ninfo: L{objects.Node}
1949
    @param ninfo: the node to check
1950
    @param nresult: the remote results for the node
1951

1952
    """
1953
    node = ninfo.name
1954

    
1955
    test = not constants.NV_USERSCRIPTS in nresult
1956
    self._ErrorIf(test, constants.CV_ENODEUSERSCRIPTS, node,
1957
                  "did not return user scripts information")
1958

    
1959
    broken_scripts = nresult.get(constants.NV_USERSCRIPTS, None)
1960
    if not test:
1961
      self._ErrorIf(broken_scripts, constants.CV_ENODEUSERSCRIPTS, node,
1962
                    "user scripts not present or not executable: %s" %
1963
                    utils.CommaJoin(sorted(broken_scripts)))
1964

    
1965
  def _VerifyNodeNetwork(self, ninfo, nresult):
1966
    """Check the node network connectivity results.
1967

1968
    @type ninfo: L{objects.Node}
1969
    @param ninfo: the node to check
1970
    @param nresult: the remote results for the node
1971

1972
    """
1973
    node = ninfo.name
1974
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
1975

    
1976
    test = constants.NV_NODELIST not in nresult
1977
    _ErrorIf(test, constants.CV_ENODESSH, node,
1978
             "node hasn't returned node ssh connectivity data")
1979
    if not test:
1980
      if nresult[constants.NV_NODELIST]:
1981
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1982
          _ErrorIf(True, constants.CV_ENODESSH, node,
1983
                   "ssh communication with node '%s': %s", a_node, a_msg)
1984

    
1985
    test = constants.NV_NODENETTEST not in nresult
1986
    _ErrorIf(test, constants.CV_ENODENET, node,
1987
             "node hasn't returned node tcp connectivity data")
1988
    if not test:
1989
      if nresult[constants.NV_NODENETTEST]:
1990
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1991
        for anode in nlist:
1992
          _ErrorIf(True, constants.CV_ENODENET, node,
1993
                   "tcp communication with node '%s': %s",
1994
                   anode, nresult[constants.NV_NODENETTEST][anode])
1995

    
1996
    test = constants.NV_MASTERIP not in nresult
1997
    _ErrorIf(test, constants.CV_ENODENET, node,
1998
             "node hasn't returned node master IP reachability data")
1999
    if not test:
2000
      if not nresult[constants.NV_MASTERIP]:
2001
        if node == self.master_node:
2002
          msg = "the master node cannot reach the master IP (not configured?)"
2003
        else:
2004
          msg = "cannot reach the master IP"
2005
        _ErrorIf(True, constants.CV_ENODENET, node, msg)
2006

    
2007
  def _VerifyInstance(self, instance, instanceconfig, node_image,
2008
                      diskstatus):
2009
    """Verify an instance.
2010

2011
    This function checks to see if the required block devices are
2012
    available on the instance's node.
2013

2014
    """
2015
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2016
    node_current = instanceconfig.primary_node
2017

    
2018
    node_vol_should = {}
2019
    instanceconfig.MapLVsByNode(node_vol_should)
2020

    
2021
    for node in node_vol_should:
2022
      n_img = node_image[node]
2023
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2024
        # ignore missing volumes on offline or broken nodes
2025
        continue
2026
      for volume in node_vol_should[node]:
2027
        test = volume not in n_img.volumes
2028
        _ErrorIf(test, constants.CV_EINSTANCEMISSINGDISK, instance,
2029
                 "volume %s missing on node %s", volume, node)
2030

    
2031
    if instanceconfig.admin_up:
2032
      pri_img = node_image[node_current]
2033
      test = instance not in pri_img.instances and not pri_img.offline
2034
      _ErrorIf(test, constants.CV_EINSTANCEDOWN, instance,
2035
               "instance not running on its primary node %s",
2036
               node_current)
2037

    
2038
    diskdata = [(nname, success, status, idx)
2039
                for (nname, disks) in diskstatus.items()
2040
                for idx, (success, status) in enumerate(disks)]
2041

    
2042
    for nname, success, bdev_status, idx in diskdata:
2043
      # the 'ghost node' construction in Exec() ensures that we have a
2044
      # node here
2045
      snode = node_image[nname]
2046
      bad_snode = snode.ghost or snode.offline
2047
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2048
               constants.CV_EINSTANCEFAULTYDISK, instance,
2049
               "couldn't retrieve status for disk/%s on %s: %s",
2050
               idx, nname, bdev_status)
2051
      _ErrorIf((instanceconfig.admin_up and success and
2052
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2053
               constants.CV_EINSTANCEFAULTYDISK, instance,
2054
               "disk/%s on %s is faulty", idx, nname)
2055

    
2056
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2057
    """Verify if there are any unknown volumes in the cluster.
2058

2059
    The .os, .swap and backup volumes are ignored. All other volumes are
2060
    reported as unknown.
2061

2062
    @type reserved: L{ganeti.utils.FieldSet}
2063
    @param reserved: a FieldSet of reserved volume names
2064

2065
    """
2066
    for node, n_img in node_image.items():
2067
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2068
        # skip non-healthy nodes
2069
        continue
2070
      for volume in n_img.volumes:
2071
        test = ((node not in node_vol_should or
2072
                volume not in node_vol_should[node]) and
2073
                not reserved.Matches(volume))
2074
        self._ErrorIf(test, constants.CV_ENODEORPHANLV, node,
2075
                      "volume %s is unknown", volume)
2076

    
2077
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2078
    """Verify N+1 Memory Resilience.
2079

2080
    Check that if one single node dies we can still start all the
2081
    instances it was primary for.
2082

2083
    """
2084
    cluster_info = self.cfg.GetClusterInfo()
2085
    for node, n_img in node_image.items():
2086
      # This code checks that every node which is now listed as
2087
      # secondary has enough memory to host all instances it is
2088
      # supposed to should a single other node in the cluster fail.
2089
      # FIXME: not ready for failover to an arbitrary node
2090
      # FIXME: does not support file-backed instances
2091
      # WARNING: we currently take into account down instances as well
2092
      # as up ones, considering that even if they're down someone
2093
      # might want to start them even in the event of a node failure.
2094
      if n_img.offline:
2095
        # we're skipping offline nodes from the N+1 warning, since
2096
        # most likely we don't have good memory infromation from them;
2097
        # we already list instances living on such nodes, and that's
2098
        # enough warning
2099
        continue
2100
      for prinode, instances in n_img.sbp.items():
2101
        needed_mem = 0
2102
        for instance in instances:
2103
          bep = cluster_info.FillBE(instance_cfg[instance])
2104
          if bep[constants.BE_AUTO_BALANCE]:
2105
            needed_mem += bep[constants.BE_MEMORY]
2106
        test = n_img.mfree < needed_mem
2107
        self._ErrorIf(test, constants.CV_ENODEN1, node,
2108
                      "not enough memory to accomodate instance failovers"
2109
                      " should node %s fail (%dMiB needed, %dMiB available)",
2110
                      prinode, needed_mem, n_img.mfree)
2111

    
2112
  @classmethod
2113
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2114
                   (files_all, files_opt, files_mc, files_vm)):
2115
    """Verifies file checksums collected from all nodes.
2116

2117
    @param errorif: Callback for reporting errors
2118
    @param nodeinfo: List of L{objects.Node} objects
2119
    @param master_node: Name of master node
2120
    @param all_nvinfo: RPC results
2121

2122
    """
2123
    # Define functions determining which nodes to consider for a file
2124
    files2nodefn = [
2125
      (files_all, None),
2126
      (files_mc, lambda node: (node.master_candidate or
2127
                               node.name == master_node)),
2128
      (files_vm, lambda node: node.vm_capable),
2129
      ]
2130

    
2131
    # Build mapping from filename to list of nodes which should have the file
2132
    nodefiles = {}
2133
    for (files, fn) in files2nodefn:
2134
      if fn is None:
2135
        filenodes = nodeinfo
2136
      else:
2137
        filenodes = filter(fn, nodeinfo)
2138
      nodefiles.update((filename,
2139
                        frozenset(map(operator.attrgetter("name"), filenodes)))
2140
                       for filename in files)
2141

    
2142
    assert set(nodefiles) == (files_all | files_mc | files_vm)
2143

    
2144
    fileinfo = dict((filename, {}) for filename in nodefiles)
2145
    ignore_nodes = set()
2146

    
2147
    for node in nodeinfo:
2148
      if node.offline:
2149
        ignore_nodes.add(node.name)
2150
        continue
2151

    
2152
      nresult = all_nvinfo[node.name]
2153

    
2154
      if nresult.fail_msg or not nresult.payload:
2155
        node_files = None
2156
      else:
2157
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2158

    
2159
      test = not (node_files and isinstance(node_files, dict))
2160
      errorif(test, constants.CV_ENODEFILECHECK, node.name,
2161
              "Node did not return file checksum data")
2162
      if test:
2163
        ignore_nodes.add(node.name)
2164
        continue
2165

    
2166
      # Build per-checksum mapping from filename to nodes having it
2167
      for (filename, checksum) in node_files.items():
2168
        assert filename in nodefiles
2169
        fileinfo[filename].setdefault(checksum, set()).add(node.name)
2170

    
2171
    for (filename, checksums) in fileinfo.items():
2172
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2173

    
2174
      # Nodes having the file
2175
      with_file = frozenset(node_name
2176
                            for nodes in fileinfo[filename].values()
2177
                            for node_name in nodes) - ignore_nodes
2178

    
2179
      expected_nodes = nodefiles[filename] - ignore_nodes
2180

    
2181
      # Nodes missing file
2182
      missing_file = expected_nodes - with_file
2183

    
2184
      if filename in files_opt:
2185
        # All or no nodes
2186
        errorif(missing_file and missing_file != expected_nodes,
2187
                constants.CV_ECLUSTERFILECHECK, None,
2188
                "File %s is optional, but it must exist on all or no"
2189
                " nodes (not found on %s)",
2190
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2191
      else:
2192
        errorif(missing_file, constants.CV_ECLUSTERFILECHECK, None,
2193
                "File %s is missing from node(s) %s", filename,
2194
                utils.CommaJoin(utils.NiceSort(missing_file)))
2195

    
2196
        # Warn if a node has a file it shouldn't
2197
        unexpected = with_file - expected_nodes
2198
        errorif(unexpected,
2199
                constants.CV_ECLUSTERFILECHECK, None,
2200
                "File %s should not exist on node(s) %s",
2201
                filename, utils.CommaJoin(utils.NiceSort(unexpected)))
2202

    
2203
      # See if there are multiple versions of the file
2204
      test = len(checksums) > 1
2205
      if test:
2206
        variants = ["variant %s on %s" %
2207
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2208
                    for (idx, (checksum, nodes)) in
2209
                      enumerate(sorted(checksums.items()))]
2210
      else:
2211
        variants = []
2212

    
2213
      errorif(test, constants.CV_ECLUSTERFILECHECK, None,
2214
              "File %s found with %s different checksums (%s)",
2215
              filename, len(checksums), "; ".join(variants))
2216

    
2217
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2218
                      drbd_map):
2219
    """Verifies and the node DRBD status.
2220

2221
    @type ninfo: L{objects.Node}
2222
    @param ninfo: the node to check
2223
    @param nresult: the remote results for the node
2224
    @param instanceinfo: the dict of instances
2225
    @param drbd_helper: the configured DRBD usermode helper
2226
    @param drbd_map: the DRBD map as returned by
2227
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2228

2229
    """
2230
    node = ninfo.name
2231
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2232

    
2233
    if drbd_helper:
2234
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2235
      test = (helper_result == None)
2236
      _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2237
               "no drbd usermode helper returned")
2238
      if helper_result:
2239
        status, payload = helper_result
2240
        test = not status
2241
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2242
                 "drbd usermode helper check unsuccessful: %s", payload)
2243
        test = status and (payload != drbd_helper)
2244
        _ErrorIf(test, constants.CV_ENODEDRBDHELPER, node,
2245
                 "wrong drbd usermode helper: %s", payload)
2246

    
2247
    # compute the DRBD minors
2248
    node_drbd = {}
2249
    for minor, instance in drbd_map[node].items():
2250
      test = instance not in instanceinfo
2251
      _ErrorIf(test, constants.CV_ECLUSTERCFG, None,
2252
               "ghost instance '%s' in temporary DRBD map", instance)
2253
        # ghost instance should not be running, but otherwise we
2254
        # don't give double warnings (both ghost instance and
2255
        # unallocated minor in use)
2256
      if test:
2257
        node_drbd[minor] = (instance, False)
2258
      else:
2259
        instance = instanceinfo[instance]
2260
        node_drbd[minor] = (instance.name, instance.admin_up)
2261

    
2262
    # and now check them
2263
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2264
    test = not isinstance(used_minors, (tuple, list))
2265
    _ErrorIf(test, constants.CV_ENODEDRBD, node,
2266
             "cannot parse drbd status file: %s", str(used_minors))
2267
    if test:
2268
      # we cannot check drbd status
2269
      return
2270

    
2271
    for minor, (iname, must_exist) in node_drbd.items():
2272
      test = minor not in used_minors and must_exist
2273
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2274
               "drbd minor %d of instance %s is not active", minor, iname)
2275
    for minor in used_minors:
2276
      test = minor not in node_drbd
2277
      _ErrorIf(test, constants.CV_ENODEDRBD, node,
2278
               "unallocated drbd minor %d is in use", minor)
2279

    
2280
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2281
    """Builds the node OS structures.
2282

2283
    @type ninfo: L{objects.Node}
2284
    @param ninfo: the node to check
2285
    @param nresult: the remote results for the node
2286
    @param nimg: the node image object
2287

2288
    """
2289
    node = ninfo.name
2290
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2291

    
2292
    remote_os = nresult.get(constants.NV_OSLIST, None)
2293
    test = (not isinstance(remote_os, list) or
2294
            not compat.all(isinstance(v, list) and len(v) == 7
2295
                           for v in remote_os))
2296

    
2297
    _ErrorIf(test, constants.CV_ENODEOS, node,
2298
             "node hasn't returned valid OS data")
2299

    
2300
    nimg.os_fail = test
2301

    
2302
    if test:
2303
      return
2304

    
2305
    os_dict = {}
2306

    
2307
    for (name, os_path, status, diagnose,
2308
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2309

    
2310
      if name not in os_dict:
2311
        os_dict[name] = []
2312

    
2313
      # parameters is a list of lists instead of list of tuples due to
2314
      # JSON lacking a real tuple type, fix it:
2315
      parameters = [tuple(v) for v in parameters]
2316
      os_dict[name].append((os_path, status, diagnose,
2317
                            set(variants), set(parameters), set(api_ver)))
2318

    
2319
    nimg.oslist = os_dict
2320

    
2321
  def _VerifyNodeOS(self, ninfo, nimg, base):
2322
    """Verifies the node OS list.
2323

2324
    @type ninfo: L{objects.Node}
2325
    @param ninfo: the node to check
2326
    @param nimg: the node image object
2327
    @param base: the 'template' node we match against (e.g. from the master)
2328

2329
    """
2330
    node = ninfo.name
2331
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2332

    
2333
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2334

    
2335
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2336
    for os_name, os_data in nimg.oslist.items():
2337
      assert os_data, "Empty OS status for OS %s?!" % os_name
2338
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2339
      _ErrorIf(not f_status, constants.CV_ENODEOS, node,
2340
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2341
      _ErrorIf(len(os_data) > 1, constants.CV_ENODEOS, node,
2342
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2343
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2344
      # comparisons with the 'base' image
2345
      test = os_name not in base.oslist
2346
      _ErrorIf(test, constants.CV_ENODEOS, node,
2347
               "Extra OS %s not present on reference node (%s)",
2348
               os_name, base.name)
2349
      if test:
2350
        continue
2351
      assert base.oslist[os_name], "Base node has empty OS status?"
2352
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2353
      if not b_status:
2354
        # base OS is invalid, skipping
2355
        continue
2356
      for kind, a, b in [("API version", f_api, b_api),
2357
                         ("variants list", f_var, b_var),
2358
                         ("parameters", beautify_params(f_param),
2359
                          beautify_params(b_param))]:
2360
        _ErrorIf(a != b, constants.CV_ENODEOS, node,
2361
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2362
                 kind, os_name, base.name,
2363
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2364

    
2365
    # check any missing OSes
2366
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2367
    _ErrorIf(missing, constants.CV_ENODEOS, node,
2368
             "OSes present on reference node %s but missing on this node: %s",
2369
             base.name, utils.CommaJoin(missing))
2370

    
2371
  def _VerifyOob(self, ninfo, nresult):
2372
    """Verifies out of band functionality of a node.
2373

2374
    @type ninfo: L{objects.Node}
2375
    @param ninfo: the node to check
2376
    @param nresult: the remote results for the node
2377

2378
    """
2379
    node = ninfo.name
2380
    # We just have to verify the paths on master and/or master candidates
2381
    # as the oob helper is invoked on the master
2382
    if ((ninfo.master_candidate or ninfo.master_capable) and
2383
        constants.NV_OOB_PATHS in nresult):
2384
      for path_result in nresult[constants.NV_OOB_PATHS]:
2385
        self._ErrorIf(path_result, constants.CV_ENODEOOBPATH, node, path_result)
2386

    
2387
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2388
    """Verifies and updates the node volume data.
2389

2390
    This function will update a L{NodeImage}'s internal structures
2391
    with data from the remote call.
2392

2393
    @type ninfo: L{objects.Node}
2394
    @param ninfo: the node to check
2395
    @param nresult: the remote results for the node
2396
    @param nimg: the node image object
2397
    @param vg_name: the configured VG name
2398

2399
    """
2400
    node = ninfo.name
2401
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2402

    
2403
    nimg.lvm_fail = True
2404
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2405
    if vg_name is None:
2406
      pass
2407
    elif isinstance(lvdata, basestring):
2408
      _ErrorIf(True, constants.CV_ENODELVM, node, "LVM problem on node: %s",
2409
               utils.SafeEncode(lvdata))
2410
    elif not isinstance(lvdata, dict):
2411
      _ErrorIf(True, constants.CV_ENODELVM, node,
2412
               "rpc call to node failed (lvlist)")
2413
    else:
2414
      nimg.volumes = lvdata
2415
      nimg.lvm_fail = False
2416

    
2417
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2418
    """Verifies and updates the node instance list.
2419

2420
    If the listing was successful, then updates this node's instance
2421
    list. Otherwise, it marks the RPC call as failed for the instance
2422
    list key.
2423

2424
    @type ninfo: L{objects.Node}
2425
    @param ninfo: the node to check
2426
    @param nresult: the remote results for the node
2427
    @param nimg: the node image object
2428

2429
    """
2430
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2431
    test = not isinstance(idata, list)
2432
    self._ErrorIf(test, constants.CV_ENODEHV, ninfo.name,
2433
                  "rpc call to node failed (instancelist): %s",
2434
                  utils.SafeEncode(str(idata)))
2435
    if test:
2436
      nimg.hyp_fail = True
2437
    else:
2438
      nimg.instances = idata
2439

    
2440
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2441
    """Verifies and computes a node information map
2442

2443
    @type ninfo: L{objects.Node}
2444
    @param ninfo: the node to check
2445
    @param nresult: the remote results for the node
2446
    @param nimg: the node image object
2447
    @param vg_name: the configured VG name
2448

2449
    """
2450
    node = ninfo.name
2451
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2452

    
2453
    # try to read free memory (from the hypervisor)
2454
    hv_info = nresult.get(constants.NV_HVINFO, None)
2455
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2456
    _ErrorIf(test, constants.CV_ENODEHV, node,
2457
             "rpc call to node failed (hvinfo)")
2458
    if not test:
2459
      try:
2460
        nimg.mfree = int(hv_info["memory_free"])
2461
      except (ValueError, TypeError):
2462
        _ErrorIf(True, constants.CV_ENODERPC, node,
2463
                 "node returned invalid nodeinfo, check hypervisor")
2464

    
2465
    # FIXME: devise a free space model for file based instances as well
2466
    if vg_name is not None:
2467
      test = (constants.NV_VGLIST not in nresult or
2468
              vg_name not in nresult[constants.NV_VGLIST])
2469
      _ErrorIf(test, constants.CV_ENODELVM, node,
2470
               "node didn't return data for the volume group '%s'"
2471
               " - it is either missing or broken", vg_name)
2472
      if not test:
2473
        try:
2474
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2475
        except (ValueError, TypeError):
2476
          _ErrorIf(True, constants.CV_ENODERPC, node,
2477
                   "node returned invalid LVM info, check LVM status")
2478

    
2479
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2480
    """Gets per-disk status information for all instances.
2481

2482
    @type nodelist: list of strings
2483
    @param nodelist: Node names
2484
    @type node_image: dict of (name, L{objects.Node})
2485
    @param node_image: Node objects
2486
    @type instanceinfo: dict of (name, L{objects.Instance})
2487
    @param instanceinfo: Instance objects
2488
    @rtype: {instance: {node: [(succes, payload)]}}
2489
    @return: a dictionary of per-instance dictionaries with nodes as
2490
        keys and disk information as values; the disk information is a
2491
        list of tuples (success, payload)
2492

2493
    """
2494
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2495

    
2496
    node_disks = {}
2497
    node_disks_devonly = {}
2498
    diskless_instances = set()
2499
    diskless = constants.DT_DISKLESS
2500

    
2501
    for nname in nodelist:
2502
      node_instances = list(itertools.chain(node_image[nname].pinst,
2503
                                            node_image[nname].sinst))
2504
      diskless_instances.update(inst for inst in node_instances
2505
                                if instanceinfo[inst].disk_template == diskless)
2506
      disks = [(inst, disk)
2507
               for inst in node_instances
2508
               for disk in instanceinfo[inst].disks]
2509

    
2510
      if not disks:
2511
        # No need to collect data
2512
        continue
2513

    
2514
      node_disks[nname] = disks
2515

    
2516
      # Creating copies as SetDiskID below will modify the objects and that can
2517
      # lead to incorrect data returned from nodes
2518
      devonly = [dev.Copy() for (_, dev) in disks]
2519

    
2520
      for dev in devonly:
2521
        self.cfg.SetDiskID(dev, nname)
2522

    
2523
      node_disks_devonly[nname] = devonly
2524

    
2525
    assert len(node_disks) == len(node_disks_devonly)
2526

    
2527
    # Collect data from all nodes with disks
2528
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2529
                                                          node_disks_devonly)
2530

    
2531
    assert len(result) == len(node_disks)
2532

    
2533
    instdisk = {}
2534

    
2535
    for (nname, nres) in result.items():
2536
      disks = node_disks[nname]
2537

    
2538
      if nres.offline:
2539
        # No data from this node
2540
        data = len(disks) * [(False, "node offline")]
2541
      else:
2542
        msg = nres.fail_msg
2543
        _ErrorIf(msg, constants.CV_ENODERPC, nname,
2544
                 "while getting disk information: %s", msg)
2545
        if msg:
2546
          # No data from this node
2547
          data = len(disks) * [(False, msg)]
2548
        else:
2549
          data = []
2550
          for idx, i in enumerate(nres.payload):
2551
            if isinstance(i, (tuple, list)) and len(i) == 2:
2552
              data.append(i)
2553
            else:
2554
              logging.warning("Invalid result from node %s, entry %d: %s",
2555
                              nname, idx, i)
2556
              data.append((False, "Invalid result from the remote node"))
2557

    
2558
      for ((inst, _), status) in zip(disks, data):
2559
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2560

    
2561
    # Add empty entries for diskless instances.
2562
    for inst in diskless_instances:
2563
      assert inst not in instdisk
2564
      instdisk[inst] = {}
2565

    
2566
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2567
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2568
                      compat.all(isinstance(s, (tuple, list)) and
2569
                                 len(s) == 2 for s in statuses)
2570
                      for inst, nnames in instdisk.items()
2571
                      for nname, statuses in nnames.items())
2572
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2573

    
2574
    return instdisk
2575

    
2576
  @staticmethod
2577
  def _SshNodeSelector(group_uuid, all_nodes):
2578
    """Create endless iterators for all potential SSH check hosts.
2579

2580
    """
2581
    nodes = [node for node in all_nodes
2582
             if (node.group != group_uuid and
2583
                 not node.offline)]
2584
    keyfunc = operator.attrgetter("group")
2585

    
2586
    return map(itertools.cycle,
2587
               [sorted(map(operator.attrgetter("name"), names))
2588
                for _, names in itertools.groupby(sorted(nodes, key=keyfunc),
2589
                                                  keyfunc)])
2590

    
2591
  @classmethod
2592
  def _SelectSshCheckNodes(cls, group_nodes, group_uuid, all_nodes):
2593
    """Choose which nodes should talk to which other nodes.
2594

2595
    We will make nodes contact all nodes in their group, and one node from
2596
    every other group.
2597

2598
    @warning: This algorithm has a known issue if one node group is much
2599
      smaller than others (e.g. just one node). In such a case all other
2600
      nodes will talk to the single node.
2601

2602
    """
2603
    online_nodes = sorted(node.name for node in group_nodes if not node.offline)
2604
    sel = cls._SshNodeSelector(group_uuid, all_nodes)
2605

    
2606
    return (online_nodes,
2607
            dict((name, sorted([i.next() for i in sel]))
2608
                 for name in online_nodes))
2609

    
2610
  def BuildHooksEnv(self):
2611
    """Build hooks env.
2612

2613
    Cluster-Verify hooks just ran in the post phase and their failure makes
2614
    the output be logged in the verify output and the verification to fail.
2615

2616
    """
2617
    env = {
2618
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2619
      }
2620

    
2621
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2622
               for node in self.my_node_info.values())
2623

    
2624
    return env
2625

    
2626
  def BuildHooksNodes(self):
2627
    """Build hooks nodes.
2628

2629
    """
2630
    return ([], self.my_node_names)
2631

    
2632
  def Exec(self, feedback_fn):
2633
    """Verify integrity of the node group, performing various test on nodes.
2634

2635
    """
2636
    # This method has too many local variables. pylint: disable=R0914
2637
    feedback_fn("* Verifying group '%s'" % self.group_info.name)
2638

    
2639
    if not self.my_node_names:
2640
      # empty node group
2641
      feedback_fn("* Empty node group, skipping verification")
2642
      return True
2643

    
2644
    self.bad = False
2645
    _ErrorIf = self._ErrorIf # pylint: disable=C0103
2646
    verbose = self.op.verbose
2647
    self._feedback_fn = feedback_fn
2648

    
2649
    vg_name = self.cfg.GetVGName()
2650
    drbd_helper = self.cfg.GetDRBDHelper()
2651
    cluster = self.cfg.GetClusterInfo()
2652
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2653
    hypervisors = cluster.enabled_hypervisors
2654
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2655

    
2656
    i_non_redundant = [] # Non redundant instances
2657
    i_non_a_balanced = [] # Non auto-balanced instances
2658
    n_offline = 0 # Count of offline nodes
2659
    n_drained = 0 # Count of nodes being drained
2660
    node_vol_should = {}
2661

    
2662
    # FIXME: verify OS list
2663

    
2664
    # File verification
2665
    filemap = _ComputeAncillaryFiles(cluster, False)
2666

    
2667
    # do local checksums
2668
    master_node = self.master_node = self.cfg.GetMasterNode()
2669
    master_ip = self.cfg.GetMasterIP()
2670

    
2671
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2672

    
2673
    user_scripts = []
2674
    if self.cfg.GetUseExternalMipScript():
2675
      user_scripts.append(constants.EXTERNAL_MASTER_SETUP_SCRIPT)
2676

    
2677
    node_verify_param = {
2678
      constants.NV_FILELIST:
2679
        utils.UniqueSequence(filename
2680
                             for files in filemap
2681
                             for filename in files),
2682
      constants.NV_NODELIST:
2683
        self._SelectSshCheckNodes(node_data_list, self.group_uuid,
2684
                                  self.all_node_info.values()),
2685
      constants.NV_HYPERVISOR: hypervisors,
2686
      constants.NV_HVPARAMS:
2687
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2688
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2689
                                 for node in node_data_list
2690
                                 if not node.offline],
2691
      constants.NV_INSTANCELIST: hypervisors,
2692
      constants.NV_VERSION: None,
2693
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2694
      constants.NV_NODESETUP: None,
2695
      constants.NV_TIME: None,
2696
      constants.NV_MASTERIP: (master_node, master_ip),
2697
      constants.NV_OSLIST: None,
2698
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2699
      constants.NV_USERSCRIPTS: user_scripts,
2700
      }
2701

    
2702
    if vg_name is not None:
2703
      node_verify_param[constants.NV_VGLIST] = None
2704
      node_verify_param[constants.NV_LVLIST] = vg_name
2705
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2706
      node_verify_param[constants.NV_DRBDLIST] = None
2707

    
2708
    if drbd_helper:
2709
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2710

    
2711
    # bridge checks
2712
    # FIXME: this needs to be changed per node-group, not cluster-wide
2713
    bridges = set()
2714
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2715
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2716
      bridges.add(default_nicpp[constants.NIC_LINK])
2717
    for instance in self.my_inst_info.values():
2718
      for nic in instance.nics:
2719
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2720
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2721
          bridges.add(full_nic[constants.NIC_LINK])
2722

    
2723
    if bridges:
2724
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2725

    
2726
    # Build our expected cluster state
2727
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2728
                                                 name=node.name,
2729
                                                 vm_capable=node.vm_capable))
2730
                      for node in node_data_list)
2731

    
2732
    # Gather OOB paths
2733
    oob_paths = []
2734
    for node in self.all_node_info.values():
2735
      path = _SupportsOob(self.cfg, node)
2736
      if path and path not in oob_paths:
2737
        oob_paths.append(path)
2738

    
2739
    if oob_paths:
2740
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2741

    
2742
    for instance in self.my_inst_names:
2743
      inst_config = self.my_inst_info[instance]
2744

    
2745
      for nname in inst_config.all_nodes:
2746
        if nname not in node_image:
2747
          gnode = self.NodeImage(name=nname)
2748
          gnode.ghost = (nname not in self.all_node_info)
2749
          node_image[nname] = gnode
2750

    
2751
      inst_config.MapLVsByNode(node_vol_should)
2752

    
2753
      pnode = inst_config.primary_node
2754
      node_image[pnode].pinst.append(instance)
2755

    
2756
      for snode in inst_config.secondary_nodes:
2757
        nimg = node_image[snode]
2758
        nimg.sinst.append(instance)
2759
        if pnode not in nimg.sbp:
2760
          nimg.sbp[pnode] = []
2761
        nimg.sbp[pnode].append(instance)
2762

    
2763
    # At this point, we have the in-memory data structures complete,
2764
    # except for the runtime information, which we'll gather next
2765

    
2766
    # Due to the way our RPC system works, exact response times cannot be
2767
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2768
    # time before and after executing the request, we can at least have a time
2769
    # window.
2770
    nvinfo_starttime = time.time()
2771
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2772
                                           node_verify_param,
2773
                                           self.cfg.GetClusterName())
2774
    nvinfo_endtime = time.time()
2775

    
2776
    if self.extra_lv_nodes and vg_name is not None:
2777
      extra_lv_nvinfo = \
2778
          self.rpc.call_node_verify(self.extra_lv_nodes,
2779
                                    {constants.NV_LVLIST: vg_name},
2780
                                    self.cfg.GetClusterName())
2781
    else:
2782
      extra_lv_nvinfo = {}
2783

    
2784
    all_drbd_map = self.cfg.ComputeDRBDMap()
2785

    
2786
    feedback_fn("* Gathering disk information (%s nodes)" %
2787
                len(self.my_node_names))
2788
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2789
                                     self.my_inst_info)
2790

    
2791
    feedback_fn("* Verifying configuration file consistency")
2792

    
2793
    # If not all nodes are being checked, we need to make sure the master node
2794
    # and a non-checked vm_capable node are in the list.
2795
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2796
    if absent_nodes:
2797
      vf_nvinfo = all_nvinfo.copy()
2798
      vf_node_info = list(self.my_node_info.values())
2799
      additional_nodes = []
2800
      if master_node not in self.my_node_info:
2801
        additional_nodes.append(master_node)
2802
        vf_node_info.append(self.all_node_info[master_node])
2803
      # Add the first vm_capable node we find which is not included
2804
      for node in absent_nodes:
2805
        nodeinfo = self.all_node_info[node]
2806
        if nodeinfo.vm_capable and not nodeinfo.offline:
2807
          additional_nodes.append(node)
2808
          vf_node_info.append(self.all_node_info[node])
2809
          break
2810
      key = constants.NV_FILELIST
2811
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2812
                                                 {key: node_verify_param[key]},
2813
                                                 self.cfg.GetClusterName()))
2814
    else:
2815
      vf_nvinfo = all_nvinfo
2816
      vf_node_info = self.my_node_info.values()
2817

    
2818
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2819

    
2820
    feedback_fn("* Verifying node status")
2821

    
2822
    refos_img = None
2823

    
2824
    for node_i in node_data_list:
2825
      node = node_i.name
2826
      nimg = node_image[node]
2827

    
2828
      if node_i.offline:
2829
        if verbose:
2830
          feedback_fn("* Skipping offline node %s" % (node,))
2831
        n_offline += 1
2832
        continue
2833

    
2834
      if node == master_node:
2835
        ntype = "master"
2836
      elif node_i.master_candidate:
2837
        ntype = "master candidate"
2838
      elif node_i.drained:
2839
        ntype = "drained"
2840
        n_drained += 1
2841
      else:
2842
        ntype = "regular"
2843
      if verbose:
2844
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2845

    
2846
      msg = all_nvinfo[node].fail_msg
2847
      _ErrorIf(msg, constants.CV_ENODERPC, node, "while contacting node: %s",
2848
               msg)
2849
      if msg:
2850
        nimg.rpc_fail = True
2851
        continue
2852

    
2853
      nresult = all_nvinfo[node].payload
2854

    
2855
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2856
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2857
      self._VerifyNodeNetwork(node_i, nresult)
2858
      self._VerifyNodeUserScripts(node_i, nresult)
2859
      self._VerifyOob(node_i, nresult)
2860

    
2861
      if nimg.vm_capable:
2862
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2863
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2864
                             all_drbd_map)
2865

    
2866
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2867
        self._UpdateNodeInstances(node_i, nresult, nimg)
2868
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2869
        self._UpdateNodeOS(node_i, nresult, nimg)
2870

    
2871
        if not nimg.os_fail:
2872
          if refos_img is None:
2873
            refos_img = nimg
2874
          self._VerifyNodeOS(node_i, nimg, refos_img)
2875
        self._VerifyNodeBridges(node_i, nresult, bridges)
2876

    
2877
        # Check whether all running instancies are primary for the node. (This
2878
        # can no longer be done from _VerifyInstance below, since some of the
2879
        # wrong instances could be from other node groups.)
2880
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2881

    
2882
        for inst in non_primary_inst:
2883
          test = inst in self.all_inst_info
2884
          _ErrorIf(test, constants.CV_EINSTANCEWRONGNODE, inst,
2885
                   "instance should not run on node %s", node_i.name)
2886
          _ErrorIf(not test, constants.CV_ENODEORPHANINSTANCE, node_i.name,
2887
                   "node is running unknown instance %s", inst)
2888

    
2889
    for node, result in extra_lv_nvinfo.items():
2890
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2891
                              node_image[node], vg_name)
2892

    
2893
    feedback_fn("* Verifying instance status")
2894
    for instance in self.my_inst_names:
2895
      if verbose:
2896
        feedback_fn("* Verifying instance %s" % instance)
2897
      inst_config = self.my_inst_info[instance]
2898
      self._VerifyInstance(instance, inst_config, node_image,
2899
                           instdisk[instance])
2900
      inst_nodes_offline = []
2901

    
2902
      pnode = inst_config.primary_node
2903
      pnode_img = node_image[pnode]
2904
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2905
               constants.CV_ENODERPC, pnode, "instance %s, connection to"
2906
               " primary node failed", instance)
2907

    
2908
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2909
               constants.CV_EINSTANCEBADNODE, instance,
2910
               "instance is marked as running and lives on offline node %s",
2911
               inst_config.primary_node)
2912

    
2913
      # If the instance is non-redundant we cannot survive losing its primary
2914
      # node, so we are not N+1 compliant. On the other hand we have no disk
2915
      # templates with more than one secondary so that situation is not well
2916
      # supported either.
2917
      # FIXME: does not support file-backed instances
2918
      if not inst_config.secondary_nodes:
2919
        i_non_redundant.append(instance)
2920

    
2921
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
2922
               constants.CV_EINSTANCELAYOUT,
2923
               instance, "instance has multiple secondary nodes: %s",
2924
               utils.CommaJoin(inst_config.secondary_nodes),
2925
               code=self.ETYPE_WARNING)
2926

    
2927
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2928
        pnode = inst_config.primary_node
2929
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2930
        instance_groups = {}
2931

    
2932
        for node in instance_nodes:
2933
          instance_groups.setdefault(self.all_node_info[node].group,
2934
                                     []).append(node)
2935

    
2936
        pretty_list = [
2937
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2938
          # Sort so that we always list the primary node first.
2939
          for group, nodes in sorted(instance_groups.items(),
2940
                                     key=lambda (_, nodes): pnode in nodes,
2941
                                     reverse=True)]
2942

    
2943
        self._ErrorIf(len(instance_groups) > 1,
2944
                      constants.CV_EINSTANCESPLITGROUPS,
2945
                      instance, "instance has primary and secondary nodes in"
2946
                      " different groups: %s", utils.CommaJoin(pretty_list),
2947
                      code=self.ETYPE_WARNING)
2948

    
2949
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2950
        i_non_a_balanced.append(instance)
2951

    
2952
      for snode in inst_config.secondary_nodes:
2953
        s_img = node_image[snode]
2954
        _ErrorIf(s_img.rpc_fail and not s_img.offline, constants.CV_ENODERPC,
2955
                 snode, "instance %s, connection to secondary node failed",
2956
                 instance)
2957

    
2958
        if s_img.offline:
2959
          inst_nodes_offline.append(snode)
2960

    
2961
      # warn that the instance lives on offline nodes
2962
      _ErrorIf(inst_nodes_offline, constants.CV_EINSTANCEBADNODE, instance,
2963
               "instance has offline secondary node(s) %s",
2964
               utils.CommaJoin(inst_nodes_offline))
2965
      # ... or ghost/non-vm_capable nodes
2966
      for node in inst_config.all_nodes:
2967
        _ErrorIf(node_image[node].ghost, constants.CV_EINSTANCEBADNODE,
2968
                 instance, "instance lives on ghost node %s", node)
2969
        _ErrorIf(not node_image[node].vm_capable, constants.CV_EINSTANCEBADNODE,
2970
                 instance, "instance lives on non-vm_capable node %s", node)
2971

    
2972
    feedback_fn("* Verifying orphan volumes")
2973
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2974

    
2975
    # We will get spurious "unknown volume" warnings if any node of this group
2976
    # is secondary for an instance whose primary is in another group. To avoid
2977
    # them, we find these instances and add their volumes to node_vol_should.
2978
    for inst in self.all_inst_info.values():
2979
      for secondary in inst.secondary_nodes:
2980
        if (secondary in self.my_node_info
2981
            and inst.name not in self.my_inst_info):
2982
          inst.MapLVsByNode(node_vol_should)
2983
          break
2984

    
2985
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2986

    
2987
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2988
      feedback_fn("* Verifying N+1 Memory redundancy")
2989
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2990

    
2991
    feedback_fn("* Other Notes")
2992
    if i_non_redundant:
2993
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2994
                  % len(i_non_redundant))
2995

    
2996
    if i_non_a_balanced:
2997
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2998
                  % len(i_non_a_balanced))
2999

    
3000
    if n_offline:
3001
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
3002

    
3003
    if n_drained:
3004
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
3005

    
3006
    return not self.bad
3007

    
3008
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
3009
    """Analyze the post-hooks' result
3010

3011
    This method analyses the hook result, handles it, and sends some
3012
    nicely-formatted feedback back to the user.
3013

3014
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
3015
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
3016
    @param hooks_results: the results of the multi-node hooks rpc call
3017
    @param feedback_fn: function used send feedback back to the caller
3018
    @param lu_result: previous Exec result
3019
    @return: the new Exec result, based on the previous result
3020
        and hook results
3021

3022
    """
3023
    # We only really run POST phase hooks, only for non-empty groups,
3024
    # and are only interested in their results
3025
    if not self.my_node_names:
3026
      # empty node group
3027
      pass
3028
    elif phase == constants.HOOKS_PHASE_POST:
3029
      # Used to change hooks' output to proper indentation
3030
      feedback_fn("* Hooks Results")
3031
      assert hooks_results, "invalid result from hooks"
3032

    
3033
      for node_name in hooks_results:
3034
        res = hooks_results[node_name]
3035
        msg = res.fail_msg
3036
        test = msg and not res.offline
3037
        self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3038
                      "Communication failure in hooks execution: %s", msg)
3039
        if res.offline or msg:
3040
          # No need to investigate payload if node is offline or gave
3041
          # an error.
3042
          continue
3043
        for script, hkr, output in res.payload:
3044
          test = hkr == constants.HKR_FAIL
3045
          self._ErrorIf(test, constants.CV_ENODEHOOKS, node_name,
3046
                        "Script %s failed, output:", script)
3047
          if test:
3048
            output = self._HOOKS_INDENT_RE.sub("      ", output)
3049
            feedback_fn("%s" % output)
3050
            lu_result = False
3051

    
3052
    return lu_result
3053

    
3054

    
3055
class LUClusterVerifyDisks(NoHooksLU):
3056
  """Verifies the cluster disks status.
3057

3058
  """
3059
  REQ_BGL = False
3060

    
3061
  def ExpandNames(self):
3062
    self.share_locks = _ShareAll()
3063
    self.needed_locks = {
3064
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
3065
      }
3066

    
3067
  def Exec(self, feedback_fn):
3068
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3069

    
3070
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3071
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3072
                           for group in group_names])
3073

    
3074

    
3075
class LUGroupVerifyDisks(NoHooksLU):
3076
  """Verifies the status of all disks in a node group.
3077

3078
  """
3079
  REQ_BGL = False
3080

    
3081
  def ExpandNames(self):
3082
    # Raises errors.OpPrereqError on its own if group can't be found
3083
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3084

    
3085
    self.share_locks = _ShareAll()
3086
    self.needed_locks = {
3087
      locking.LEVEL_INSTANCE: [],
3088
      locking.LEVEL_NODEGROUP: [],
3089
      locking.LEVEL_NODE: [],
3090
      }
3091

    
3092
  def DeclareLocks(self, level):
3093
    if level == locking.LEVEL_INSTANCE:
3094
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3095

    
3096
      # Lock instances optimistically, needs verification once node and group
3097
      # locks have been acquired
3098
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3099
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3100

    
3101
    elif level == locking.LEVEL_NODEGROUP:
3102
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3103

    
3104
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3105
        set([self.group_uuid] +
3106
            # Lock all groups used by instances optimistically; this requires
3107
            # going via the node before it's locked, requiring verification
3108
            # later on
3109
            [group_uuid
3110
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3111
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3112

    
3113
    elif level == locking.LEVEL_NODE:
3114
      # This will only lock the nodes in the group to be verified which contain
3115
      # actual instances
3116
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3117
      self._LockInstancesNodes()
3118

    
3119
      # Lock all nodes in group to be verified
3120
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3121
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3122
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3123

    
3124
  def CheckPrereq(self):
3125
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3126
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3127
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3128

    
3129
    assert self.group_uuid in owned_groups
3130

    
3131
    # Check if locked instances are still correct
3132
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3133

    
3134
    # Get instance information
3135
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3136

    
3137
    # Check if node groups for locked instances are still correct
3138
    for (instance_name, inst) in self.instances.items():
3139
      assert owned_nodes.issuperset(inst.all_nodes), \
3140
        "Instance %s's nodes changed while we kept the lock" % instance_name
3141

    
3142
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3143
                                             owned_groups)
3144

    
3145
      assert self.group_uuid in inst_groups, \
3146
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3147

    
3148
  def Exec(self, feedback_fn):
3149
    """Verify integrity of cluster disks.
3150

3151
    @rtype: tuple of three items
3152
    @return: a tuple of (dict of node-to-node_error, list of instances
3153
        which need activate-disks, dict of instance: (node, volume) for
3154
        missing volumes
3155

3156
    """
3157
    res_nodes = {}
3158
    res_instances = set()
3159
    res_missing = {}
3160

    
3161
    nv_dict = _MapInstanceDisksToNodes([inst
3162
                                        for inst in self.instances.values()
3163
                                        if inst.admin_up])
3164

    
3165
    if nv_dict:
3166
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3167
                             set(self.cfg.GetVmCapableNodeList()))
3168

    
3169
      node_lvs = self.rpc.call_lv_list(nodes, [])
3170

    
3171
      for (node, node_res) in node_lvs.items():
3172
        if node_res.offline:
3173
          continue
3174

    
3175
        msg = node_res.fail_msg
3176
        if msg:
3177
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3178
          res_nodes[node] = msg
3179
          continue
3180

    
3181
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3182
          inst = nv_dict.pop((node, lv_name), None)
3183
          if not (lv_online or inst is None):
3184
            res_instances.add(inst)
3185

    
3186
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3187
      # better
3188
      for key, inst in nv_dict.iteritems():
3189
        res_missing.setdefault(inst, []).append(list(key))
3190

    
3191
    return (res_nodes, list(res_instances), res_missing)
3192

    
3193

    
3194
class LUClusterRepairDiskSizes(NoHooksLU):
3195
  """Verifies the cluster disks sizes.
3196

3197
  """
3198
  REQ_BGL = False
3199

    
3200
  def ExpandNames(self):
3201
    if self.op.instances:
3202
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3203
      self.needed_locks = {
3204
        locking.LEVEL_NODE_RES: [],
3205
        locking.LEVEL_INSTANCE: self.wanted_names,
3206
        }
3207
      self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
3208
    else:
3209
      self.wanted_names = None
3210
      self.needed_locks = {
3211
        locking.LEVEL_NODE_RES: locking.ALL_SET,
3212
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3213
        }
3214
    self.share_locks = _ShareAll()
3215

    
3216
  def DeclareLocks(self, level):
3217
    if level == locking.LEVEL_NODE_RES and self.wanted_names is not None:
3218
      self._LockInstancesNodes(primary_only=True, level=level)
3219

    
3220
  def CheckPrereq(self):
3221
    """Check prerequisites.
3222

3223
    This only checks the optional instance list against the existing names.
3224

3225
    """
3226
    if self.wanted_names is None:
3227
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3228

    
3229
    self.wanted_instances = \
3230
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3231

    
3232
  def _EnsureChildSizes(self, disk):
3233
    """Ensure children of the disk have the needed disk size.
3234

3235
    This is valid mainly for DRBD8 and fixes an issue where the
3236
    children have smaller disk size.
3237

3238
    @param disk: an L{ganeti.objects.Disk} object
3239

3240
    """
3241
    if disk.dev_type == constants.LD_DRBD8:
3242
      assert disk.children, "Empty children for DRBD8?"
3243
      fchild = disk.children[0]
3244
      mismatch = fchild.size < disk.size
3245
      if mismatch:
3246
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3247
                     fchild.size, disk.size)
3248
        fchild.size = disk.size
3249

    
3250
      # and we recurse on this child only, not on the metadev
3251
      return self._EnsureChildSizes(fchild) or mismatch
3252
    else:
3253
      return False
3254

    
3255
  def Exec(self, feedback_fn):
3256
    """Verify the size of cluster disks.
3257

3258
    """
3259
    # TODO: check child disks too
3260
    # TODO: check differences in size between primary/secondary nodes
3261
    per_node_disks = {}
3262
    for instance in self.wanted_instances:
3263
      pnode = instance.primary_node
3264
      if pnode not in per_node_disks:
3265
        per_node_disks[pnode] = []
3266
      for idx, disk in enumerate(instance.disks):
3267
        per_node_disks[pnode].append((instance, idx, disk))
3268

    
3269
    assert not (frozenset(per_node_disks.keys()) -
3270
                self.owned_locks(locking.LEVEL_NODE_RES)), \
3271
      "Not owning correct locks"
3272
    assert not self.owned_locks(locking.LEVEL_NODE)
3273

    
3274
    changed = []
3275
    for node, dskl in per_node_disks.items():
3276
      newl = [v[2].Copy() for v in dskl]
3277
      for dsk in newl:
3278
        self.cfg.SetDiskID(dsk, node)
3279
      result = self.rpc.call_blockdev_getsize(node, newl)
3280
      if result.fail_msg:
3281
        self.LogWarning("Failure in blockdev_getsize call to node"
3282
                        " %s, ignoring", node)
3283
        continue
3284
      if len(result.payload) != len(dskl):
3285
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3286
                        " result.payload=%s", node, len(dskl), result.payload)
3287
        self.LogWarning("Invalid result from node %s, ignoring node results",
3288
                        node)
3289
        continue
3290
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3291
        if size is None:
3292
          self.LogWarning("Disk %d of instance %s did not return size"
3293
                          " information, ignoring", idx, instance.name)
3294
          continue
3295
        if not isinstance(size, (int, long)):
3296
          self.LogWarning("Disk %d of instance %s did not return valid"
3297
                          " size information, ignoring", idx, instance.name)
3298
          continue
3299
        size = size >> 20
3300
        if size != disk.size:
3301
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3302
                       " correcting: recorded %d, actual %d", idx,
3303
                       instance.name, disk.size, size)
3304
          disk.size = size
3305
          self.cfg.Update(instance, feedback_fn)
3306
          changed.append((instance.name, idx, size))
3307
        if self._EnsureChildSizes(disk):
3308
          self.cfg.Update(instance, feedback_fn)
3309
          changed.append((instance.name, idx, disk.size))
3310
    return changed
3311

    
3312

    
3313
class LUClusterRename(LogicalUnit):
3314
  """Rename the cluster.
3315

3316
  """
3317
  HPATH = "cluster-rename"
3318
  HTYPE = constants.HTYPE_CLUSTER
3319

    
3320
  def BuildHooksEnv(self):
3321
    """Build hooks env.
3322

3323
    """
3324
    return {
3325
      "OP_TARGET": self.cfg.GetClusterName(),
3326
      "NEW_NAME": self.op.name,
3327
      }
3328

    
3329
  def BuildHooksNodes(self):
3330
    """Build hooks nodes.
3331

3332
    """
3333
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3334

    
3335
  def CheckPrereq(self):
3336
    """Verify that the passed name is a valid one.
3337

3338
    """
3339
    hostname = netutils.GetHostname(name=self.op.name,
3340
                                    family=self.cfg.GetPrimaryIPFamily())
3341

    
3342
    new_name = hostname.name
3343
    self.ip = new_ip = hostname.ip
3344
    old_name = self.cfg.GetClusterName()
3345
    old_ip = self.cfg.GetMasterIP()
3346
    if new_name == old_name and new_ip == old_ip:
3347
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3348
                                 " cluster has changed",
3349
                                 errors.ECODE_INVAL)
3350
    if new_ip != old_ip:
3351
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3352
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3353
                                   " reachable on the network" %
3354
                                   new_ip, errors.ECODE_NOTUNIQUE)
3355

    
3356
    self.op.name = new_name
3357

    
3358
  def Exec(self, feedback_fn):
3359
    """Rename the cluster.
3360

3361
    """
3362
    clustername = self.op.name
3363
    new_ip = self.ip
3364

    
3365
    # shutdown the master IP
3366
    master_params = self.cfg.GetMasterNetworkParameters()
3367
    ems = self.cfg.GetUseExternalMipScript()
3368
    result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3369
                                                     master_params, ems)
3370
    result.Raise("Could not disable the master role")
3371

    
3372
    try:
3373
      cluster = self.cfg.GetClusterInfo()
3374
      cluster.cluster_name = clustername
3375
      cluster.master_ip = new_ip
3376
      self.cfg.Update(cluster, feedback_fn)
3377

    
3378
      # update the known hosts file
3379
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3380
      node_list = self.cfg.GetOnlineNodeList()
3381
      try:
3382
        node_list.remove(master_params.name)
3383
      except ValueError:
3384
        pass
3385
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3386
    finally:
3387
      master_params.ip = new_ip
3388
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3389
                                                     master_params, ems)
3390
      msg = result.fail_msg
3391
      if msg:
3392
        self.LogWarning("Could not re-enable the master role on"
3393
                        " the master, please restart manually: %s", msg)
3394

    
3395
    return clustername
3396

    
3397

    
3398
def _ValidateNetmask(cfg, netmask):
3399
  """Checks if a netmask is valid.
3400

3401
  @type cfg: L{config.ConfigWriter}
3402
  @param cfg: The cluster configuration
3403
  @type netmask: int
3404
  @param netmask: the netmask to be verified
3405
  @raise errors.OpPrereqError: if the validation fails
3406

3407
  """
3408
  ip_family = cfg.GetPrimaryIPFamily()
3409
  try:
3410
    ipcls = netutils.IPAddress.GetClassFromIpFamily(ip_family)
3411
  except errors.ProgrammerError:
3412
    raise errors.OpPrereqError("Invalid primary ip family: %s." %
3413
                               ip_family)
3414
  if not ipcls.ValidateNetmask(netmask):
3415
    raise errors.OpPrereqError("CIDR netmask (%s) not valid" %
3416
                                (netmask))
3417

    
3418

    
3419
class LUClusterSetParams(LogicalUnit):
3420
  """Change the parameters of the cluster.
3421

3422
  """
3423
  HPATH = "cluster-modify"
3424
  HTYPE = constants.HTYPE_CLUSTER
3425
  REQ_BGL = False
3426

    
3427
  def CheckArguments(self):
3428
    """Check parameters
3429

3430
    """
3431
    if self.op.uid_pool:
3432
      uidpool.CheckUidPool(self.op.uid_pool)
3433

    
3434
    if self.op.add_uids:
3435
      uidpool.CheckUidPool(self.op.add_uids)
3436

    
3437
    if self.op.remove_uids:
3438
      uidpool.CheckUidPool(self.op.remove_uids)
3439

    
3440
    if self.op.master_netmask is not None:
3441
      _ValidateNetmask(self.cfg, self.op.master_netmask)
3442

    
3443
  def ExpandNames(self):
3444
    # FIXME: in the future maybe other cluster params won't require checking on
3445
    # all nodes to be modified.
3446
    self.needed_locks = {
3447
      locking.LEVEL_NODE: locking.ALL_SET,
3448
    }
3449
    self.share_locks[locking.LEVEL_NODE] = 1
3450

    
3451
  def BuildHooksEnv(self):
3452
    """Build hooks env.
3453

3454
    """
3455
    return {
3456
      "OP_TARGET": self.cfg.GetClusterName(),
3457
      "NEW_VG_NAME": self.op.vg_name,
3458
      }
3459

    
3460
  def BuildHooksNodes(self):
3461
    """Build hooks nodes.
3462

3463
    """
3464
    mn = self.cfg.GetMasterNode()
3465
    return ([mn], [mn])
3466

    
3467
  def CheckPrereq(self):
3468
    """Check prerequisites.
3469

3470
    This checks whether the given params don't conflict and
3471
    if the given volume group is valid.
3472

3473
    """
3474
    if self.op.vg_name is not None and not self.op.vg_name:
3475
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3476
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3477
                                   " instances exist", errors.ECODE_INVAL)
3478

    
3479
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3480
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3481
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3482
                                   " drbd-based instances exist",
3483
                                   errors.ECODE_INVAL)
3484

    
3485
    node_list = self.owned_locks(locking.LEVEL_NODE)
3486

    
3487
    # if vg_name not None, checks given volume group on all nodes
3488
    if self.op.vg_name:
3489
      vglist = self.rpc.call_vg_list(node_list)
3490
      for node in node_list:
3491
        msg = vglist[node].fail_msg
3492
        if msg:
3493
          # ignoring down node
3494
          self.LogWarning("Error while gathering data on node %s"
3495
                          " (ignoring node): %s", node, msg)
3496
          continue
3497
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3498
                                              self.op.vg_name,
3499
                                              constants.MIN_VG_SIZE)
3500
        if vgstatus:
3501
          raise errors.OpPrereqError("Error on node '%s': %s" %
3502
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3503

    
3504
    if self.op.drbd_helper:
3505
      # checks given drbd helper on all nodes
3506
      helpers = self.rpc.call_drbd_helper(node_list)
3507
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3508
        if ninfo.offline:
3509
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3510
          continue
3511
        msg = helpers[node].fail_msg
3512
        if msg:
3513
          raise errors.OpPrereqError("Error checking drbd helper on node"
3514
                                     " '%s': %s" % (node, msg),
3515
                                     errors.ECODE_ENVIRON)
3516
        node_helper = helpers[node].payload
3517
        if node_helper != self.op.drbd_helper:
3518
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3519
                                     (node, node_helper), errors.ECODE_ENVIRON)
3520

    
3521
    self.cluster = cluster = self.cfg.GetClusterInfo()
3522
    # validate params changes
3523
    if self.op.beparams:
3524
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3525
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3526

    
3527
    if self.op.ndparams:
3528
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3529
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3530

    
3531
      # TODO: we need a more general way to handle resetting
3532
      # cluster-level parameters to default values
3533
      if self.new_ndparams["oob_program"] == "":
3534
        self.new_ndparams["oob_program"] = \
3535
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3536

    
3537
    if self.op.nicparams:
3538
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3539
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3540
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3541
      nic_errors = []
3542

    
3543
      # check all instances for consistency
3544
      for instance in self.cfg.GetAllInstancesInfo().values():
3545
        for nic_idx, nic in enumerate(instance.nics):
3546
          params_copy = copy.deepcopy(nic.nicparams)
3547
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3548

    
3549
          # check parameter syntax
3550
          try:
3551
            objects.NIC.CheckParameterSyntax(params_filled)
3552
          except errors.ConfigurationError, err:
3553
            nic_errors.append("Instance %s, nic/%d: %s" %
3554
                              (instance.name, nic_idx, err))
3555

    
3556
          # if we're moving instances to routed, check that they have an ip
3557
          target_mode = params_filled[constants.NIC_MODE]
3558
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3559
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3560
                              " address" % (instance.name, nic_idx))
3561
      if nic_errors:
3562
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3563
                                   "\n".join(nic_errors))
3564

    
3565
    # hypervisor list/parameters
3566
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3567
    if self.op.hvparams:
3568
      for hv_name, hv_dict in self.op.hvparams.items():
3569
        if hv_name not in self.new_hvparams:
3570
          self.new_hvparams[hv_name] = hv_dict
3571
        else:
3572
          self.new_hvparams[hv_name].update(hv_dict)
3573

    
3574
    # os hypervisor parameters
3575
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3576
    if self.op.os_hvp:
3577
      for os_name, hvs in self.op.os_hvp.items():
3578
        if os_name not in self.new_os_hvp:
3579
          self.new_os_hvp[os_name] = hvs
3580
        else:
3581
          for hv_name, hv_dict in hvs.items():
3582
            if hv_name not in self.new_os_hvp[os_name]:
3583
              self.new_os_hvp[os_name][hv_name] = hv_dict
3584
            else:
3585
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3586

    
3587
    # os parameters
3588
    self.new_osp = objects.FillDict(cluster.osparams, {})
3589
    if self.op.osparams:
3590
      for os_name, osp in self.op.osparams.items():
3591
        if os_name not in self.new_osp:
3592
          self.new_osp[os_name] = {}
3593

    
3594
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3595
                                                  use_none=True)
3596

    
3597
        if not self.new_osp[os_name]:
3598
          # we removed all parameters
3599
          del self.new_osp[os_name]
3600
        else:
3601
          # check the parameter validity (remote check)
3602
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3603
                         os_name, self.new_osp[os_name])
3604

    
3605
    # changes to the hypervisor list
3606
    if self.op.enabled_hypervisors is not None:
3607
      self.hv_list = self.op.enabled_hypervisors
3608
      for hv in self.hv_list:
3609
        # if the hypervisor doesn't already exist in the cluster
3610
        # hvparams, we initialize it to empty, and then (in both
3611
        # cases) we make sure to fill the defaults, as we might not
3612
        # have a complete defaults list if the hypervisor wasn't
3613
        # enabled before
3614
        if hv not in new_hvp:
3615
          new_hvp[hv] = {}
3616
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3617
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3618
    else:
3619
      self.hv_list = cluster.enabled_hypervisors
3620

    
3621
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3622
      # either the enabled list has changed, or the parameters have, validate
3623
      for hv_name, hv_params in self.new_hvparams.items():
3624
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3625
            (self.op.enabled_hypervisors and
3626
             hv_name in self.op.enabled_hypervisors)):
3627
          # either this is a new hypervisor, or its parameters have changed
3628
          hv_class = hypervisor.GetHypervisor(hv_name)
3629
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3630
          hv_class.CheckParameterSyntax(hv_params)
3631
          _CheckHVParams(self, node_list, hv_name, hv_params)
3632

    
3633
    if self.op.os_hvp:
3634
      # no need to check any newly-enabled hypervisors, since the
3635
      # defaults have already been checked in the above code-block
3636
      for os_name, os_hvp in self.new_os_hvp.items():
3637
        for hv_name, hv_params in os_hvp.items():
3638
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3639
          # we need to fill in the new os_hvp on top of the actual hv_p
3640
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3641
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3642
          hv_class = hypervisor.GetHypervisor(hv_name)
3643
          hv_class.CheckParameterSyntax(new_osp)
3644
          _CheckHVParams(self, node_list, hv_name, new_osp)
3645

    
3646
    if self.op.default_iallocator:
3647
      alloc_script = utils.FindFile(self.op.default_iallocator,
3648
                                    constants.IALLOCATOR_SEARCH_PATH,
3649
                                    os.path.isfile)
3650
      if alloc_script is None:
3651
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3652
                                   " specified" % self.op.default_iallocator,
3653
                                   errors.ECODE_INVAL)
3654

    
3655
  def Exec(self, feedback_fn):
3656
    """Change the parameters of the cluster.
3657

3658
    """
3659
    if self.op.vg_name is not None:
3660
      new_volume = self.op.vg_name
3661
      if not new_volume:
3662
        new_volume = None
3663
      if new_volume != self.cfg.GetVGName():
3664
        self.cfg.SetVGName(new_volume)
3665
      else:
3666
        feedback_fn("Cluster LVM configuration already in desired"
3667
                    " state, not changing")
3668
    if self.op.drbd_helper is not None:
3669
      new_helper = self.op.drbd_helper
3670
      if not new_helper:
3671
        new_helper = None
3672
      if new_helper != self.cfg.GetDRBDHelper():
3673
        self.cfg.SetDRBDHelper(new_helper)
3674
      else:
3675
        feedback_fn("Cluster DRBD helper already in desired state,"
3676
                    " not changing")
3677
    if self.op.hvparams:
3678
      self.cluster.hvparams = self.new_hvparams
3679
    if self.op.os_hvp:
3680
      self.cluster.os_hvp = self.new_os_hvp
3681
    if self.op.enabled_hypervisors is not None:
3682
      self.cluster.hvparams = self.new_hvparams
3683
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3684
    if self.op.beparams:
3685
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3686
    if self.op.nicparams:
3687
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3688
    if self.op.osparams:
3689
      self.cluster.osparams = self.new_osp
3690
    if self.op.ndparams:
3691
      self.cluster.ndparams = self.new_ndparams
3692

    
3693
    if self.op.candidate_pool_size is not None:
3694
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3695
      # we need to update the pool size here, otherwise the save will fail
3696
      _AdjustCandidatePool(self, [])
3697

    
3698
    if self.op.maintain_node_health is not None:
3699
      self.cluster.maintain_node_health = self.op.maintain_node_health
3700

    
3701
    if self.op.prealloc_wipe_disks is not None:
3702
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3703

    
3704
    if self.op.add_uids is not None:
3705
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3706

    
3707
    if self.op.remove_uids is not None:
3708
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3709

    
3710
    if self.op.uid_pool is not None:
3711
      self.cluster.uid_pool = self.op.uid_pool
3712

    
3713
    if self.op.default_iallocator is not None:
3714
      self.cluster.default_iallocator = self.op.default_iallocator
3715

    
3716
    if self.op.reserved_lvs is not None:
3717
      self.cluster.reserved_lvs = self.op.reserved_lvs
3718

    
3719
    if self.op.use_external_mip_script is not None:
3720
      self.cluster.use_external_mip_script = self.op.use_external_mip_script
3721

    
3722
    def helper_os(aname, mods, desc):
3723
      desc += " OS list"
3724
      lst = getattr(self.cluster, aname)
3725
      for key, val in mods:
3726
        if key == constants.DDM_ADD:
3727
          if val in lst:
3728
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3729
          else:
3730
            lst.append(val)
3731
        elif key == constants.DDM_REMOVE:
3732
          if val in lst:
3733
            lst.remove(val)
3734
          else:
3735
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3736
        else:
3737
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3738

    
3739
    if self.op.hidden_os:
3740
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3741

    
3742
    if self.op.blacklisted_os:
3743
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3744

    
3745
    if self.op.master_netdev:
3746
      master_params = self.cfg.GetMasterNetworkParameters()
3747
      ems = self.cfg.GetUseExternalMipScript()
3748
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3749
                  self.cluster.master_netdev)
3750
      result = self.rpc.call_node_deactivate_master_ip(master_params.name,
3751
                                                       master_params, ems)
3752
      result.Raise("Could not disable the master ip")
3753
      feedback_fn("Changing master_netdev from %s to %s" %
3754
                  (master_params.netdev, self.op.master_netdev))
3755
      self.cluster.master_netdev = self.op.master_netdev
3756

    
3757
    if self.op.master_netmask:
3758
      master_params = self.cfg.GetMasterNetworkParameters()
3759
      feedback_fn("Changing master IP netmask to %s" % self.op.master_netmask)
3760
      result = self.rpc.call_node_change_master_netmask(master_params.name,
3761
                                                        master_params.netmask,
3762
                                                        self.op.master_netmask,
3763
                                                        master_params.ip,
3764
                                                        master_params.netdev)
3765
      if result.fail_msg:
3766
        msg = "Could not change the master IP netmask: %s" % result.fail_msg
3767
        feedback_fn(msg)
3768

    
3769
      self.cluster.master_netmask = self.op.master_netmask
3770

    
3771
    self.cfg.Update(self.cluster, feedback_fn)
3772

    
3773
    if self.op.master_netdev:
3774
      master_params = self.cfg.GetMasterNetworkParameters()
3775
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3776
                  self.op.master_netdev)
3777
      ems = self.cfg.GetUseExternalMipScript()
3778
      result = self.rpc.call_node_activate_master_ip(master_params.name,
3779
                                                     master_params, ems)
3780
      if result.fail_msg:
3781
        self.LogWarning("Could not re-enable the master ip on"
3782
                        " the master, please restart manually: %s",
3783
                        result.fail_msg)
3784

    
3785

    
3786
def _UploadHelper(lu, nodes, fname):
3787
  """Helper for uploading a file and showing warnings.
3788

3789
  """
3790
  if os.path.exists(fname):
3791
    result = lu.rpc.call_upload_file(nodes, fname)
3792
    for to_node, to_result in result.items():
3793
      msg = to_result.fail_msg
3794
      if msg:
3795
        msg = ("Copy of file %s to node %s failed: %s" %
3796
               (fname, to_node, msg))
3797
        lu.proc.LogWarning(msg)
3798

    
3799

    
3800
def _ComputeAncillaryFiles(cluster, redist):
3801
  """Compute files external to Ganeti which need to be consistent.
3802

3803
  @type redist: boolean
3804
  @param redist: Whether to include files which need to be redistributed
3805

3806
  """
3807
  # Compute files for all nodes
3808
  files_all = set([
3809
    constants.SSH_KNOWN_HOSTS_FILE,
3810
    constants.CONFD_HMAC_KEY,
3811
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3812
    constants.SPICE_CERT_FILE,
3813
    constants.SPICE_CACERT_FILE,
3814
    constants.RAPI_USERS_FILE,
3815
    ])
3816

    
3817
  if not redist:
3818
    files_all.update(constants.ALL_CERT_FILES)
3819
    files_all.update(ssconf.SimpleStore().GetFileList())
3820
  else:
3821
    # we need to ship at least the RAPI certificate
3822
    files_all.add(constants.RAPI_CERT_FILE)
3823

    
3824
  if cluster.modify_etc_hosts:
3825
    files_all.add(constants.ETC_HOSTS)
3826

    
3827
  # Files which are optional, these must:
3828
  # - be present in one other category as well
3829
  # - either exist or not exist on all nodes of that category (mc, vm all)
3830
  files_opt = set([
3831
    constants.RAPI_USERS_FILE,
3832
    ])
3833

    
3834
  # Files which should only be on master candidates
3835
  files_mc = set()
3836

    
3837
  if not redist:
3838
    files_mc.add(constants.CLUSTER_CONF_FILE)
3839

    
3840
    # FIXME: this should also be replicated but Ganeti doesn't support files_mc
3841
    # replication
3842
    files_mc.add(constants.DEFAULT_MASTER_SETUP_SCRIPT)
3843

    
3844
  # Files which should only be on VM-capable nodes
3845
  files_vm = set(filename
3846
    for hv_name in cluster.enabled_hypervisors
3847
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[0])
3848

    
3849
  files_opt |= set(filename
3850
    for hv_name in cluster.enabled_hypervisors
3851
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles()[1])
3852

    
3853
  # Filenames in each category must be unique
3854
  all_files_set = files_all | files_mc | files_vm
3855
  assert (len(all_files_set) ==
3856
          sum(map(len, [files_all, files_mc, files_vm]))), \
3857
         "Found file listed in more than one file list"
3858

    
3859
  # Optional files must be present in one other category
3860
  assert all_files_set.issuperset(files_opt), \
3861
         "Optional file not in a different required list"
3862

    
3863
  return (files_all, files_opt, files_mc, files_vm)
3864

    
3865

    
3866
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3867
  """Distribute additional files which are part of the cluster configuration.
3868

3869
  ConfigWriter takes care of distributing the config and ssconf files, but
3870
  there are more files which should be distributed to all nodes. This function
3871
  makes sure those are copied.
3872

3873
  @param lu: calling logical unit
3874
  @param additional_nodes: list of nodes not in the config to distribute to
3875
  @type additional_vm: boolean
3876
  @param additional_vm: whether the additional nodes are vm-capable or not
3877

3878
  """
3879
  # Gather target nodes
3880
  cluster = lu.cfg.GetClusterInfo()
3881
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3882

    
3883
  online_nodes = lu.cfg.GetOnlineNodeList()
3884
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3885

    
3886
  if additional_nodes is not None:
3887
    online_nodes.extend(additional_nodes)
3888
    if additional_vm:
3889
      vm_nodes.extend(additional_nodes)
3890

    
3891
  # Never distribute to master node
3892
  for nodelist in [online_nodes, vm_nodes]:
3893
    if master_info.name in nodelist:
3894
      nodelist.remove(master_info.name)
3895

    
3896
  # Gather file lists
3897
  (files_all, _, files_mc, files_vm) = \
3898
    _ComputeAncillaryFiles(cluster, True)
3899

    
3900
  # Never re-distribute configuration file from here
3901
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3902
              constants.CLUSTER_CONF_FILE in files_vm)
3903
  assert not files_mc, "Master candidates not handled in this function"
3904

    
3905
  filemap = [
3906
    (online_nodes, files_all),
3907
    (vm_nodes, files_vm),
3908
    ]
3909

    
3910
  # Upload the files
3911
  for (node_list, files) in filemap:
3912
    for fname in files:
3913
      _UploadHelper(lu, node_list, fname)
3914

    
3915

    
3916
class LUClusterRedistConf(NoHooksLU):
3917
  """Force the redistribution of cluster configuration.
3918

3919
  This is a very simple LU.
3920

3921
  """
3922
  REQ_BGL = False
3923

    
3924
  def ExpandNames(self):
3925
    self.needed_locks = {
3926
      locking.LEVEL_NODE: locking.ALL_SET,
3927
    }
3928
    self.share_locks[locking.LEVEL_NODE] = 1
3929

    
3930
  def Exec(self, feedback_fn):
3931
    """Redistribute the configuration.
3932

3933
    """
3934
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3935
    _RedistributeAncillaryFiles(self)
3936

    
3937

    
3938
class LUClusterActivateMasterIp(NoHooksLU):
3939
  """Activate the master IP on the master node.
3940

3941
  """
3942
  def Exec(self, feedback_fn):
3943
    """Activate the master IP.
3944

3945
    """
3946
    master_params = self.cfg.GetMasterNetworkParameters()
3947
    ems = self.cfg.GetUseExternalMipScript()
3948
    self.rpc.call_node_activate_master_ip(master_params.name,
3949
                                          master_params, ems)
3950

    
3951

    
3952
class LUClusterDeactivateMasterIp(NoHooksLU):
3953
  """Deactivate the master IP on the master node.
3954

3955
  """
3956
  def Exec(self, feedback_fn):
3957
    """Deactivate the master IP.
3958

3959
    """
3960
    master_params = self.cfg.GetMasterNetworkParameters()
3961
    ems = self.cfg.GetUseExternalMipScript()
3962
    self.rpc.call_node_deactivate_master_ip(master_params.name, master_params,
3963
                                            ems)
3964

    
3965

    
3966
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3967
  """Sleep and poll for an instance's disk to sync.
3968

3969
  """
3970
  if not instance.disks or disks is not None and not disks:
3971
    return True
3972

    
3973
  disks = _ExpandCheckDisks(instance, disks)
3974

    
3975
  if not oneshot:
3976
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3977

    
3978
  node = instance.primary_node
3979

    
3980
  for dev in disks:
3981
    lu.cfg.SetDiskID(dev, node)
3982

    
3983
  # TODO: Convert to utils.Retry
3984

    
3985
  retries = 0
3986
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3987
  while True:
3988
    max_time = 0
3989
    done = True
3990
    cumul_degraded = False
3991
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3992
    msg = rstats.fail_msg
3993
    if msg:
3994
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3995
      retries += 1
3996
      if retries >= 10:
3997
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3998
                                 " aborting." % node)
3999
      time.sleep(6)
4000
      continue
4001
    rstats = rstats.payload
4002
    retries = 0
4003
    for i, mstat in enumerate(rstats):
4004
      if mstat is None:
4005
        lu.LogWarning("Can't compute data for node %s/%s",
4006
                           node, disks[i].iv_name)
4007
        continue
4008

    
4009
      cumul_degraded = (cumul_degraded or
4010
                        (mstat.is_degraded and mstat.sync_percent is None))
4011
      if mstat.sync_percent is not None:
4012
        done = False
4013
        if mstat.estimated_time is not None:
4014
          rem_time = ("%s remaining (estimated)" %
4015
                      utils.FormatSeconds(mstat.estimated_time))
4016
          max_time = mstat.estimated_time
4017
        else:
4018
          rem_time = "no time estimate"
4019
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
4020
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
4021

    
4022
    # if we're done but degraded, let's do a few small retries, to
4023
    # make sure we see a stable and not transient situation; therefore
4024
    # we force restart of the loop
4025
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
4026
      logging.info("Degraded disks found, %d retries left", degr_retries)
4027
      degr_retries -= 1
4028
      time.sleep(1)
4029
      continue
4030

    
4031
    if done or oneshot:
4032
      break
4033

    
4034
    time.sleep(min(60, max_time))
4035

    
4036
  if done:
4037
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
4038
  return not cumul_degraded
4039

    
4040

    
4041
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
4042
  """Check that mirrors are not degraded.
4043

4044
  The ldisk parameter, if True, will change the test from the
4045
  is_degraded attribute (which represents overall non-ok status for
4046
  the device(s)) to the ldisk (representing the local storage status).
4047

4048
  """
4049
  lu.cfg.SetDiskID(dev, node)
4050

    
4051
  result = True
4052

    
4053
  if on_primary or dev.AssembleOnSecondary():
4054
    rstats = lu.rpc.call_blockdev_find(node, dev)
4055
    msg = rstats.fail_msg
4056
    if msg:
4057
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
4058
      result = False
4059
    elif not rstats.payload:
4060
      lu.LogWarning("Can't find disk on node %s", node)
4061
      result = False
4062
    else:
4063
      if ldisk:
4064
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
4065
      else:
4066
        result = result and not rstats.payload.is_degraded
4067

    
4068
  if dev.children:
4069
    for child in dev.children:
4070
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
4071

    
4072
  return result
4073

    
4074

    
4075
class LUOobCommand(NoHooksLU):
4076
  """Logical unit for OOB handling.
4077

4078
  """
4079
  REG_BGL = False
4080
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
4081

    
4082
  def ExpandNames(self):
4083
    """Gather locks we need.
4084

4085
    """
4086
    if self.op.node_names:
4087
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
4088
      lock_names = self.op.node_names
4089
    else:
4090
      lock_names = locking.ALL_SET
4091

    
4092
    self.needed_locks = {
4093
      locking.LEVEL_NODE: lock_names,
4094
      }
4095

    
4096
  def CheckPrereq(self):
4097
    """Check prerequisites.
4098

4099
    This checks:
4100
     - the node exists in the configuration
4101
     - OOB is supported
4102

4103
    Any errors are signaled by raising errors.OpPrereqError.
4104

4105
    """
4106
    self.nodes = []
4107
    self.master_node = self.cfg.GetMasterNode()
4108

    
4109
    assert self.op.power_delay >= 0.0
4110

    
4111
    if self.op.node_names:
4112
      if (self.op.command in self._SKIP_MASTER and
4113
          self.master_node in self.op.node_names):
4114
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
4115
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
4116

    
4117
        if master_oob_handler:
4118
          additional_text = ("run '%s %s %s' if you want to operate on the"
4119
                             " master regardless") % (master_oob_handler,
4120
                                                      self.op.command,
4121
                                                      self.master_node)
4122
        else:
4123
          additional_text = "it does not support out-of-band operations"
4124

    
4125
        raise errors.OpPrereqError(("Operating on the master node %s is not"
4126
                                    " allowed for %s; %s") %
4127
                                   (self.master_node, self.op.command,
4128
                                    additional_text), errors.ECODE_INVAL)
4129
    else:
4130
      self.op.node_names = self.cfg.GetNodeList()
4131
      if self.op.command in self._SKIP_MASTER:
4132
        self.op.node_names.remove(self.master_node)
4133

    
4134
    if self.op.command in self._SKIP_MASTER:
4135
      assert self.master_node not in self.op.node_names
4136

    
4137
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
4138
      if node is None:
4139
        raise errors.OpPrereqError("Node %s not found" % node_name,
4140
                                   errors.ECODE_NOENT)
4141
      else:
4142
        self.nodes.append(node)
4143

    
4144
      if (not self.op.ignore_status and
4145
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
4146
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
4147
                                    " not marked offline") % node_name,
4148
                                   errors.ECODE_STATE)
4149

    
4150
  def Exec(self, feedback_fn):
4151
    """Execute OOB and return result if we expect any.
4152

4153
    """
4154
    master_node = self.master_node
4155
    ret = []
4156

    
4157
    for idx, node in enumerate(utils.NiceSort(self.nodes,
4158
                                              key=lambda node: node.name)):
4159
      node_entry = [(constants.RS_NORMAL, node.name)]
4160
      ret.append(node_entry)
4161

    
4162
      oob_program = _SupportsOob(self.cfg, node)
4163

    
4164
      if not oob_program:
4165
        node_entry.append((constants.RS_UNAVAIL, None))
4166
        continue
4167

    
4168
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
4169
                   self.op.command, oob_program, node.name)
4170
      result = self.rpc.call_run_oob(master_node, oob_program,
4171
                                     self.op.command, node.name,
4172
                                     self.op.timeout)
4173

    
4174
      if result.fail_msg:
4175
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4176
                        node.name, result.fail_msg)
4177
        node_entry.append((constants.RS_NODATA, None))
4178
      else:
4179
        try:
4180
          self._CheckPayload(result)
4181
        except errors.OpExecError, err:
4182
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4183
                          node.name, err)
4184
          node_entry.append((constants.RS_NODATA, None))
4185
        else:
4186
          if self.op.command == constants.OOB_HEALTH:
4187
            # For health we should log important events
4188
            for item, status in result.payload:
4189
              if status in [constants.OOB_STATUS_WARNING,
4190
                            constants.OOB_STATUS_CRITICAL]:
4191
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4192
                                item, node.name, status)
4193

    
4194
          if self.op.command == constants.OOB_POWER_ON:
4195
            node.powered = True
4196
          elif self.op.command == constants.OOB_POWER_OFF:
4197
            node.powered = False
4198
          elif self.op.command == constants.OOB_POWER_STATUS:
4199
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4200
            if powered != node.powered:
4201
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4202
                               " match actual power state (%s)"), node.powered,
4203
                              node.name, powered)
4204

    
4205
          # For configuration changing commands we should update the node
4206
          if self.op.command in (constants.OOB_POWER_ON,
4207
                                 constants.OOB_POWER_OFF):
4208
            self.cfg.Update(node, feedback_fn)
4209

    
4210
          node_entry.append((constants.RS_NORMAL, result.payload))
4211

    
4212
          if (self.op.command == constants.OOB_POWER_ON and
4213
              idx < len(self.nodes) - 1):
4214
            time.sleep(self.op.power_delay)
4215

    
4216
    return ret
4217

    
4218
  def _CheckPayload(self, result):
4219
    """Checks if the payload is valid.
4220

4221
    @param result: RPC result
4222
    @raises errors.OpExecError: If payload is not valid
4223

4224
    """
4225
    errs = []
4226
    if self.op.command == constants.OOB_HEALTH:
4227
      if not isinstance(result.payload, list):
4228
        errs.append("command 'health' is expected to return a list but got %s" %
4229
                    type(result.payload))
4230
      else:
4231
        for item, status in result.payload:
4232
          if status not in constants.OOB_STATUSES:
4233
            errs.append("health item '%s' has invalid status '%s'" %
4234
                        (item, status))
4235

    
4236
    if self.op.command == constants.OOB_POWER_STATUS:
4237
      if not isinstance(result.payload, dict):
4238
        errs.append("power-status is expected to return a dict but got %s" %
4239
                    type(result.payload))
4240

    
4241
    if self.op.command in [
4242
        constants.OOB_POWER_ON,
4243
        constants.OOB_POWER_OFF,
4244
        constants.OOB_POWER_CYCLE,
4245
        ]:
4246
      if result.payload is not None:
4247
        errs.append("%s is expected to not return payload but got '%s'" %
4248
                    (self.op.command, result.payload))
4249

    
4250
    if errs:
4251
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4252
                               utils.CommaJoin(errs))
4253

    
4254

    
4255
class _OsQuery(_QueryBase):
4256
  FIELDS = query.OS_FIELDS
4257

    
4258
  def ExpandNames(self, lu):
4259
    # Lock all nodes in shared mode
4260
    # Temporary removal of locks, should be reverted later
4261
    # TODO: reintroduce locks when they are lighter-weight
4262
    lu.needed_locks = {}
4263
    #self.share_locks[locking.LEVEL_NODE] = 1
4264
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4265

    
4266
    # The following variables interact with _QueryBase._GetNames
4267
    if self.names:
4268
      self.wanted = self.names
4269
    else:
4270
      self.wanted = locking.ALL_SET
4271

    
4272
    self.do_locking = self.use_locking
4273

    
4274
  def DeclareLocks(self, lu, level):
4275
    pass
4276

    
4277
  @staticmethod
4278
  def _DiagnoseByOS(rlist):
4279
    """Remaps a per-node return list into an a per-os per-node dictionary
4280

4281
    @param rlist: a map with node names as keys and OS objects as values
4282

4283
    @rtype: dict
4284
    @return: a dictionary with osnames as keys and as value another
4285
        map, with nodes as keys and tuples of (path, status, diagnose,
4286
        variants, parameters, api_versions) as values, eg::
4287

4288
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4289
                                     (/srv/..., False, "invalid api")],
4290
                           "node2": [(/srv/..., True, "", [], [])]}
4291
          }
4292

4293
    """
4294
    all_os = {}
4295
    # we build here the list of nodes that didn't fail the RPC (at RPC
4296
    # level), so that nodes with a non-responding node daemon don't
4297
    # make all OSes invalid
4298
    good_nodes = [node_name for node_name in rlist
4299
                  if not rlist[node_name].fail_msg]
4300
    for node_name, nr in rlist.items():
4301
      if nr.fail_msg or not nr.payload:
4302
        continue
4303
      for (name, path, status, diagnose, variants,
4304
           params, api_versions) in nr.payload:
4305
        if name not in all_os:
4306
          # build a list of nodes for this os containing empty lists
4307
          # for each node in node_list
4308
          all_os[name] = {}
4309
          for nname in good_nodes:
4310
            all_os[name][nname] = []
4311
        # convert params from [name, help] to (name, help)
4312
        params = [tuple(v) for v in params]
4313
        all_os[name][node_name].append((path, status, diagnose,
4314
                                        variants, params, api_versions))
4315
    return all_os
4316

    
4317
  def _GetQueryData(self, lu):
4318
    """Computes the list of nodes and their attributes.
4319

4320
    """
4321
    # Locking is not used
4322
    assert not (compat.any(lu.glm.is_owned(level)
4323
                           for level in locking.LEVELS
4324
                           if level != locking.LEVEL_CLUSTER) or
4325
                self.do_locking or self.use_locking)
4326

    
4327
    valid_nodes = [node.name
4328
                   for node in lu.cfg.GetAllNodesInfo().values()
4329
                   if not node.offline and node.vm_capable]
4330
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4331
    cluster = lu.cfg.GetClusterInfo()
4332

    
4333
    data = {}
4334

    
4335
    for (os_name, os_data) in pol.items():
4336
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4337
                          hidden=(os_name in cluster.hidden_os),
4338
                          blacklisted=(os_name in cluster.blacklisted_os))
4339

    
4340
      variants = set()
4341
      parameters = set()
4342
      api_versions = set()
4343

    
4344
      for idx, osl in enumerate(os_data.values()):
4345
        info.valid = bool(info.valid and osl and osl[0][1])
4346
        if not info.valid:
4347
          break
4348

    
4349
        (node_variants, node_params, node_api) = osl[0][3:6]
4350
        if idx == 0:
4351
          # First entry
4352
          variants.update(node_variants)
4353
          parameters.update(node_params)
4354
          api_versions.update(node_api)
4355
        else:
4356
          # Filter out inconsistent values
4357
          variants.intersection_update(node_variants)
4358
          parameters.intersection_update(node_params)
4359
          api_versions.intersection_update(node_api)
4360

    
4361
      info.variants = list(variants)
4362
      info.parameters = list(parameters)
4363
      info.api_versions = list(api_versions)
4364

    
4365
      data[os_name] = info
4366

    
4367
    # Prepare data in requested order
4368
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4369
            if name in data]
4370

    
4371

    
4372
class LUOsDiagnose(NoHooksLU):
4373
  """Logical unit for OS diagnose/query.
4374

4375
  """
4376
  REQ_BGL = False
4377

    
4378
  @staticmethod
4379
  def _BuildFilter(fields, names):
4380
    """Builds a filter for querying OSes.
4381

4382
    """
4383
    name_filter = qlang.MakeSimpleFilter("name", names)
4384

    
4385
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4386
    # respective field is not requested
4387
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4388
                     for fname in ["hidden", "blacklisted"]
4389
                     if fname not in fields]
4390
    if "valid" not in fields:
4391
      status_filter.append([qlang.OP_TRUE, "valid"])
4392

    
4393
    if status_filter:
4394
      status_filter.insert(0, qlang.OP_AND)
4395
    else:
4396
      status_filter = None
4397

    
4398
    if name_filter and status_filter:
4399
      return [qlang.OP_AND, name_filter, status_filter]
4400
    elif name_filter:
4401
      return name_filter
4402
    else:
4403
      return status_filter
4404

    
4405
  def CheckArguments(self):
4406
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4407
                       self.op.output_fields, False)
4408

    
4409
  def ExpandNames(self):
4410
    self.oq.ExpandNames(self)
4411

    
4412
  def Exec(self, feedback_fn):
4413
    return self.oq.OldStyleQuery(self)
4414

    
4415

    
4416
class LUNodeRemove(LogicalUnit):
4417
  """Logical unit for removing a node.
4418

4419
  """
4420
  HPATH = "node-remove"
4421
  HTYPE = constants.HTYPE_NODE
4422

    
4423
  def BuildHooksEnv(self):
4424
    """Build hooks env.
4425

4426
    This doesn't run on the target node in the pre phase as a failed
4427
    node would then be impossible to remove.
4428

4429
    """
4430
    return {
4431
      "OP_TARGET": self.op.node_name,
4432
      "NODE_NAME": self.op.node_name,
4433
      }
4434

    
4435
  def BuildHooksNodes(self):
4436
    """Build hooks nodes.
4437

4438
    """
4439
    all_nodes = self.cfg.GetNodeList()
4440
    try:
4441
      all_nodes.remove(self.op.node_name)
4442
    except ValueError:
4443
      logging.warning("Node '%s', which is about to be removed, was not found"
4444
                      " in the list of all nodes", self.op.node_name)
4445
    return (all_nodes, all_nodes)
4446

    
4447
  def CheckPrereq(self):
4448
    """Check prerequisites.
4449

4450
    This checks:
4451
     - the node exists in the configuration
4452
     - it does not have primary or secondary instances
4453
     - it's not the master
4454

4455
    Any errors are signaled by raising errors.OpPrereqError.
4456

4457
    """
4458
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4459
    node = self.cfg.GetNodeInfo(self.op.node_name)
4460
    assert node is not None
4461

    
4462
    masternode = self.cfg.GetMasterNode()
4463
    if node.name == masternode:
4464
      raise errors.OpPrereqError("Node is the master node, failover to another"
4465
                                 " node is required", errors.ECODE_INVAL)
4466

    
4467
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4468
      if node.name in instance.all_nodes:
4469
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4470
                                   " please remove first" % instance_name,
4471
                                   errors.ECODE_INVAL)
4472
    self.op.node_name = node.name
4473
    self.node = node
4474

    
4475
  def Exec(self, feedback_fn):
4476
    """Removes the node from the cluster.
4477

4478
    """
4479
    node = self.node
4480
    logging.info("Stopping the node daemon and removing configs from node %s",
4481
                 node.name)
4482

    
4483
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4484

    
4485
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
4486
      "Not owning BGL"
4487

    
4488
    # Promote nodes to master candidate as needed
4489
    _AdjustCandidatePool(self, exceptions=[node.name])
4490
    self.context.RemoveNode(node.name)
4491

    
4492
    # Run post hooks on the node before it's removed
4493
    _RunPostHook(self, node.name)
4494

    
4495
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4496
    msg = result.fail_msg
4497
    if msg:
4498
      self.LogWarning("Errors encountered on the remote node while leaving"
4499
                      " the cluster: %s", msg)
4500

    
4501
    # Remove node from our /etc/hosts
4502
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4503
      master_node = self.cfg.GetMasterNode()
4504
      result = self.rpc.call_etc_hosts_modify(master_node,
4505
                                              constants.ETC_HOSTS_REMOVE,
4506
                                              node.name, None)
4507
      result.Raise("Can't update hosts file with new host data")
4508
      _RedistributeAncillaryFiles(self)
4509

    
4510

    
4511
class _NodeQuery(_QueryBase):
4512
  FIELDS = query.NODE_FIELDS
4513

    
4514
  def ExpandNames(self, lu):
4515
    lu.needed_locks = {}
4516
    lu.share_locks = _ShareAll()
4517

    
4518
    if self.names:
4519
      self.wanted = _GetWantedNodes(lu, self.names)
4520
    else:
4521
      self.wanted = locking.ALL_SET
4522

    
4523
    self.do_locking = (self.use_locking and
4524
                       query.NQ_LIVE in self.requested_data)
4525

    
4526
    if self.do_locking:
4527
      # If any non-static field is requested we need to lock the nodes
4528
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4529

    
4530
  def DeclareLocks(self, lu, level):
4531
    pass
4532

    
4533
  def _GetQueryData(self, lu):
4534
    """Computes the list of nodes and their attributes.
4535

4536
    """
4537
    all_info = lu.cfg.GetAllNodesInfo()
4538

    
4539
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4540

    
4541
    # Gather data as requested
4542
    if query.NQ_LIVE in self.requested_data:
4543
      # filter out non-vm_capable nodes
4544
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4545

    
4546
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4547
                                        lu.cfg.GetHypervisorType())
4548
      live_data = dict((name, nresult.payload)
4549
                       for (name, nresult) in node_data.items()
4550
                       if not nresult.fail_msg and nresult.payload)
4551
    else:
4552
      live_data = None
4553

    
4554
    if query.NQ_INST in self.requested_data:
4555
      node_to_primary = dict([(name, set()) for name in nodenames])
4556
      node_to_secondary = dict([(name, set()) for name in nodenames])
4557

    
4558
      inst_data = lu.cfg.GetAllInstancesInfo()
4559

    
4560
      for inst in inst_data.values():
4561
        if inst.primary_node in node_to_primary:
4562
          node_to_primary[inst.primary_node].add(inst.name)
4563
        for secnode in inst.secondary_nodes:
4564
          if secnode in node_to_secondary:
4565
            node_to_secondary[secnode].add(inst.name)
4566
    else:
4567
      node_to_primary = None
4568
      node_to_secondary = None
4569

    
4570
    if query.NQ_OOB in self.requested_data:
4571
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4572
                         for name, node in all_info.iteritems())
4573
    else:
4574
      oob_support = None
4575

    
4576
    if query.NQ_GROUP in self.requested_data:
4577
      groups = lu.cfg.GetAllNodeGroupsInfo()
4578
    else:
4579
      groups = {}
4580

    
4581
    return query.NodeQueryData([all_info[name] for name in nodenames],
4582
                               live_data, lu.cfg.GetMasterNode(),
4583
                               node_to_primary, node_to_secondary, groups,
4584
                               oob_support, lu.cfg.GetClusterInfo())
4585

    
4586

    
4587
class LUNodeQuery(NoHooksLU):
4588
  """Logical unit for querying nodes.
4589

4590
  """
4591
  # pylint: disable=W0142
4592
  REQ_BGL = False
4593

    
4594
  def CheckArguments(self):
4595
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4596
                         self.op.output_fields, self.op.use_locking)
4597

    
4598
  def ExpandNames(self):
4599
    self.nq.ExpandNames(self)
4600

    
4601
  def DeclareLocks(self, level):
4602
    self.nq.DeclareLocks(self, level)
4603

    
4604
  def Exec(self, feedback_fn):
4605
    return self.nq.OldStyleQuery(self)
4606

    
4607

    
4608
class LUNodeQueryvols(NoHooksLU):
4609
  """Logical unit for getting volumes on node(s).
4610

4611
  """
4612
  REQ_BGL = False
4613
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4614
  _FIELDS_STATIC = utils.FieldSet("node")
4615

    
4616
  def CheckArguments(self):
4617
    _CheckOutputFields(static=self._FIELDS_STATIC,
4618
                       dynamic=self._FIELDS_DYNAMIC,
4619
                       selected=self.op.output_fields)
4620

    
4621
  def ExpandNames(self):
4622
    self.share_locks = _ShareAll()
4623
    self.needed_locks = {}
4624

    
4625
    if not self.op.nodes:
4626
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4627
    else:
4628
      self.needed_locks[locking.LEVEL_NODE] = \
4629
        _GetWantedNodes(self, self.op.nodes)
4630

    
4631
  def Exec(self, feedback_fn):
4632
    """Computes the list of nodes and their attributes.
4633

4634
    """
4635
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4636
    volumes = self.rpc.call_node_volumes(nodenames)
4637

    
4638
    ilist = self.cfg.GetAllInstancesInfo()
4639
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4640

    
4641
    output = []
4642
    for node in nodenames:
4643
      nresult = volumes[node]
4644
      if nresult.offline:
4645
        continue
4646
      msg = nresult.fail_msg
4647
      if msg:
4648
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4649
        continue
4650

    
4651
      node_vols = sorted(nresult.payload,
4652
                         key=operator.itemgetter("dev"))
4653

    
4654
      for vol in node_vols:
4655
        node_output = []
4656
        for field in self.op.output_fields:
4657
          if field == "node":
4658
            val = node
4659
          elif field == "phys":
4660
            val = vol["dev"]
4661
          elif field == "vg":
4662
            val = vol["vg"]
4663
          elif field == "name":
4664
            val = vol["name"]
4665
          elif field == "size":
4666
            val = int(float(vol["size"]))
4667
          elif field == "instance":
4668
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4669
          else:
4670
            raise errors.ParameterError(field)
4671
          node_output.append(str(val))
4672

    
4673
        output.append(node_output)
4674

    
4675
    return output
4676

    
4677

    
4678
class LUNodeQueryStorage(NoHooksLU):
4679
  """Logical unit for getting information on storage units on node(s).
4680

4681
  """
4682
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4683
  REQ_BGL = False
4684

    
4685
  def CheckArguments(self):
4686
    _CheckOutputFields(static=self._FIELDS_STATIC,
4687
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4688
                       selected=self.op.output_fields)
4689

    
4690
  def ExpandNames(self):
4691
    self.share_locks = _ShareAll()
4692
    self.needed_locks = {}
4693

    
4694
    if self.op.nodes:
4695
      self.needed_locks[locking.LEVEL_NODE] = \
4696
        _GetWantedNodes(self, self.op.nodes)
4697
    else:
4698
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4699

    
4700
  def Exec(self, feedback_fn):
4701
    """Computes the list of nodes and their attributes.
4702

4703
    """
4704
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4705

    
4706
    # Always get name to sort by
4707
    if constants.SF_NAME in self.op.output_fields:
4708
      fields = self.op.output_fields[:]
4709
    else:
4710
      fields = [constants.SF_NAME] + self.op.output_fields
4711

    
4712
    # Never ask for node or type as it's only known to the LU
4713
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4714
      while extra in fields:
4715
        fields.remove(extra)
4716

    
4717
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4718
    name_idx = field_idx[constants.SF_NAME]
4719

    
4720
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4721
    data = self.rpc.call_storage_list(self.nodes,
4722
                                      self.op.storage_type, st_args,
4723
                                      self.op.name, fields)
4724

    
4725
    result = []
4726

    
4727
    for node in utils.NiceSort(self.nodes):
4728
      nresult = data[node]
4729
      if nresult.offline:
4730
        continue
4731

    
4732
      msg = nresult.fail_msg
4733
      if msg:
4734
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4735
        continue
4736

    
4737
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4738

    
4739
      for name in utils.NiceSort(rows.keys()):
4740
        row = rows[name]
4741

    
4742
        out = []
4743

    
4744
        for field in self.op.output_fields:
4745
          if field == constants.SF_NODE:
4746
            val = node
4747
          elif field == constants.SF_TYPE:
4748
            val = self.op.storage_type
4749
          elif field in field_idx:
4750
            val = row[field_idx[field]]
4751
          else:
4752
            raise errors.ParameterError(field)
4753

    
4754
          out.append(val)
4755

    
4756
        result.append(out)
4757

    
4758
    return result
4759

    
4760

    
4761
class _InstanceQuery(_QueryBase):
4762
  FIELDS = query.INSTANCE_FIELDS
4763

    
4764
  def ExpandNames(self, lu):
4765
    lu.needed_locks = {}
4766
    lu.share_locks = _ShareAll()
4767

    
4768
    if self.names:
4769
      self.wanted = _GetWantedInstances(lu, self.names)
4770
    else:
4771
      self.wanted = locking.ALL_SET
4772

    
4773
    self.do_locking = (self.use_locking and
4774
                       query.IQ_LIVE in self.requested_data)
4775
    if self.do_locking:
4776
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4777
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4778
      lu.needed_locks[locking.LEVEL_NODE] = []
4779
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4780

    
4781
    self.do_grouplocks = (self.do_locking and
4782
                          query.IQ_NODES in self.requested_data)
4783

    
4784
  def DeclareLocks(self, lu, level):
4785
    if self.do_locking:
4786
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4787
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4788

    
4789
        # Lock all groups used by instances optimistically; this requires going
4790
        # via the node before it's locked, requiring verification later on
4791
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4792
          set(group_uuid
4793
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4794
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4795
      elif level == locking.LEVEL_NODE:
4796
        lu._LockInstancesNodes() # pylint: disable=W0212
4797

    
4798
  @staticmethod
4799
  def _CheckGroupLocks(lu):
4800
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4801
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4802

    
4803
    # Check if node groups for locked instances are still correct
4804
    for instance_name in owned_instances:
4805
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4806

    
4807
  def _GetQueryData(self, lu):
4808
    """Computes the list of instances and their attributes.
4809

4810
    """
4811
    if self.do_grouplocks:
4812
      self._CheckGroupLocks(lu)
4813

    
4814
    cluster = lu.cfg.GetClusterInfo()
4815
    all_info = lu.cfg.GetAllInstancesInfo()
4816

    
4817
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4818

    
4819
    instance_list = [all_info[name] for name in instance_names]
4820
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4821
                                        for inst in instance_list)))
4822
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4823
    bad_nodes = []
4824
    offline_nodes = []
4825
    wrongnode_inst = set()
4826

    
4827
    # Gather data as requested
4828
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4829
      live_data = {}
4830
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4831
      for name in nodes:
4832
        result = node_data[name]
4833
        if result.offline:
4834
          # offline nodes will be in both lists
4835
          assert result.fail_msg
4836
          offline_nodes.append(name)
4837
        if result.fail_msg:
4838
          bad_nodes.append(name)
4839
        elif result.payload:
4840
          for inst in result.payload:
4841
            if inst in all_info:
4842
              if all_info[inst].primary_node == name:
4843
                live_data.update(result.payload)
4844
              else:
4845
                wrongnode_inst.add(inst)
4846
            else:
4847
              # orphan instance; we don't list it here as we don't
4848
              # handle this case yet in the output of instance listing
4849
              logging.warning("Orphan instance '%s' found on node %s",
4850
                              inst, name)
4851
        # else no instance is alive
4852
    else:
4853
      live_data = {}
4854

    
4855
    if query.IQ_DISKUSAGE in self.requested_data:
4856
      disk_usage = dict((inst.name,
4857
                         _ComputeDiskSize(inst.disk_template,
4858
                                          [{constants.IDISK_SIZE: disk.size}
4859
                                           for disk in inst.disks]))
4860
                        for inst in instance_list)
4861
    else:
4862
      disk_usage = None
4863

    
4864
    if query.IQ_CONSOLE in self.requested_data:
4865
      consinfo = {}
4866
      for inst in instance_list:
4867
        if inst.name in live_data:
4868
          # Instance is running
4869
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4870
        else:
4871
          consinfo[inst.name] = None
4872
      assert set(consinfo.keys()) == set(instance_names)
4873
    else:
4874
      consinfo = None
4875

    
4876
    if query.IQ_NODES in self.requested_data:
4877
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4878
                                            instance_list)))
4879
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4880
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4881
                    for uuid in set(map(operator.attrgetter("group"),
4882
                                        nodes.values())))
4883
    else:
4884
      nodes = None
4885
      groups = None
4886

    
4887
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4888
                                   disk_usage, offline_nodes, bad_nodes,
4889
                                   live_data, wrongnode_inst, consinfo,
4890
                                   nodes, groups)
4891

    
4892

    
4893
class LUQuery(NoHooksLU):
4894
  """Query for resources/items of a certain kind.
4895

4896
  """
4897
  # pylint: disable=W0142
4898
  REQ_BGL = False
4899

    
4900
  def CheckArguments(self):
4901
    qcls = _GetQueryImplementation(self.op.what)
4902

    
4903
    self.impl = qcls(self.op.qfilter, self.op.fields, self.op.use_locking)
4904

    
4905
  def ExpandNames(self):
4906
    self.impl.ExpandNames(self)
4907

    
4908
  def DeclareLocks(self, level):
4909
    self.impl.DeclareLocks(self, level)
4910

    
4911
  def Exec(self, feedback_fn):
4912
    return self.impl.NewStyleQuery(self)
4913

    
4914

    
4915
class LUQueryFields(NoHooksLU):
4916
  """Query for resources/items of a certain kind.
4917

4918
  """
4919
  # pylint: disable=W0142
4920
  REQ_BGL = False
4921

    
4922
  def CheckArguments(self):
4923
    self.qcls = _GetQueryImplementation(self.op.what)
4924

    
4925
  def ExpandNames(self):
4926
    self.needed_locks = {}
4927

    
4928
  def Exec(self, feedback_fn):
4929
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4930

    
4931

    
4932
class LUNodeModifyStorage(NoHooksLU):
4933
  """Logical unit for modifying a storage volume on a node.
4934

4935
  """
4936
  REQ_BGL = False
4937

    
4938
  def CheckArguments(self):
4939
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4940

    
4941
    storage_type = self.op.storage_type
4942

    
4943
    try:
4944
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4945
    except KeyError:
4946
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4947
                                 " modified" % storage_type,
4948
                                 errors.ECODE_INVAL)
4949

    
4950
    diff = set(self.op.changes.keys()) - modifiable
4951
    if diff:
4952
      raise errors.OpPrereqError("The following fields can not be modified for"
4953
                                 " storage units of type '%s': %r" %
4954
                                 (storage_type, list(diff)),
4955
                                 errors.ECODE_INVAL)
4956

    
4957
  def ExpandNames(self):
4958
    self.needed_locks = {
4959
      locking.LEVEL_NODE: self.op.node_name,
4960
      }
4961

    
4962
  def Exec(self, feedback_fn):
4963
    """Computes the list of nodes and their attributes.
4964

4965
    """
4966
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4967
    result = self.rpc.call_storage_modify(self.op.node_name,
4968
                                          self.op.storage_type, st_args,
4969
                                          self.op.name, self.op.changes)
4970
    result.Raise("Failed to modify storage unit '%s' on %s" %
4971
                 (self.op.name, self.op.node_name))
4972

    
4973

    
4974
class LUNodeAdd(LogicalUnit):
4975
  """Logical unit for adding node to the cluster.
4976

4977
  """
4978
  HPATH = "node-add"
4979
  HTYPE = constants.HTYPE_NODE
4980
  _NFLAGS = ["master_capable", "vm_capable"]
4981

    
4982
  def CheckArguments(self):
4983
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4984
    # validate/normalize the node name
4985
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4986
                                         family=self.primary_ip_family)
4987
    self.op.node_name = self.hostname.name
4988

    
4989
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4990
      raise errors.OpPrereqError("Cannot readd the master node",
4991
                                 errors.ECODE_STATE)
4992

    
4993
    if self.op.readd and self.op.group:
4994
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4995
                                 " being readded", errors.ECODE_INVAL)
4996

    
4997
  def BuildHooksEnv(self):
4998
    """Build hooks env.
4999

5000
    This will run on all nodes before, and on all nodes + the new node after.
5001

5002
    """
5003
    return {
5004
      "OP_TARGET": self.op.node_name,
5005
      "NODE_NAME": self.op.node_name,
5006
      "NODE_PIP": self.op.primary_ip,
5007
      "NODE_SIP": self.op.secondary_ip,
5008
      "MASTER_CAPABLE": str(self.op.master_capable),
5009
      "VM_CAPABLE": str(self.op.vm_capable),
5010
      }
5011

    
5012
  def BuildHooksNodes(self):
5013
    """Build hooks nodes.
5014

5015
    """
5016
    # Exclude added node
5017
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
5018
    post_nodes = pre_nodes + [self.op.node_name, ]
5019

    
5020
    return (pre_nodes, post_nodes)
5021

    
5022
  def CheckPrereq(self):
5023
    """Check prerequisites.
5024

5025
    This checks:
5026
     - the new node is not already in the config
5027
     - it is resolvable
5028
     - its parameters (single/dual homed) matches the cluster
5029

5030
    Any errors are signaled by raising errors.OpPrereqError.
5031

5032
    """
5033
    cfg = self.cfg
5034
    hostname = self.hostname
5035
    node = hostname.name
5036
    primary_ip = self.op.primary_ip = hostname.ip
5037
    if self.op.secondary_ip is None:
5038
      if self.primary_ip_family == netutils.IP6Address.family:
5039
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
5040
                                   " IPv4 address must be given as secondary",
5041
                                   errors.ECODE_INVAL)
5042
      self.op.secondary_ip = primary_ip
5043

    
5044
    secondary_ip = self.op.secondary_ip
5045
    if not netutils.IP4Address.IsValid(secondary_ip):
5046
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5047
                                 " address" % secondary_ip, errors.ECODE_INVAL)
5048

    
5049
    node_list = cfg.GetNodeList()
5050
    if not self.op.readd and node in node_list:
5051
      raise errors.OpPrereqError("Node %s is already in the configuration" %
5052
                                 node, errors.ECODE_EXISTS)
5053
    elif self.op.readd and node not in node_list:
5054
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
5055
                                 errors.ECODE_NOENT)
5056

    
5057
    self.changed_primary_ip = False
5058

    
5059
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
5060
      if self.op.readd and node == existing_node_name:
5061
        if existing_node.secondary_ip != secondary_ip:
5062
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
5063
                                     " address configuration as before",
5064
                                     errors.ECODE_INVAL)
5065
        if existing_node.primary_ip != primary_ip:
5066
          self.changed_primary_ip = True
5067

    
5068
        continue
5069

    
5070
      if (existing_node.primary_ip == primary_ip or
5071
          existing_node.secondary_ip == primary_ip or
5072
          existing_node.primary_ip == secondary_ip or
5073
          existing_node.secondary_ip == secondary_ip):
5074
        raise errors.OpPrereqError("New node ip address(es) conflict with"
5075
                                   " existing node %s" % existing_node.name,
5076
                                   errors.ECODE_NOTUNIQUE)
5077

    
5078
    # After this 'if' block, None is no longer a valid value for the
5079
    # _capable op attributes
5080
    if self.op.readd:
5081
      old_node = self.cfg.GetNodeInfo(node)
5082
      assert old_node is not None, "Can't retrieve locked node %s" % node
5083
      for attr in self._NFLAGS:
5084
        if getattr(self.op, attr) is None:
5085
          setattr(self.op, attr, getattr(old_node, attr))
5086
    else:
5087
      for attr in self._NFLAGS:
5088
        if getattr(self.op, attr) is None:
5089
          setattr(self.op, attr, True)
5090

    
5091
    if self.op.readd and not self.op.vm_capable:
5092
      pri, sec = cfg.GetNodeInstances(node)
5093
      if pri or sec:
5094
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
5095
                                   " flag set to false, but it already holds"
5096
                                   " instances" % node,
5097
                                   errors.ECODE_STATE)
5098

    
5099
    # check that the type of the node (single versus dual homed) is the
5100
    # same as for the master
5101
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
5102
    master_singlehomed = myself.secondary_ip == myself.primary_ip
5103
    newbie_singlehomed = secondary_ip == primary_ip
5104
    if master_singlehomed != newbie_singlehomed:
5105
      if master_singlehomed:
5106
        raise errors.OpPrereqError("The master has no secondary ip but the"
5107
                                   " new node has one",
5108
                                   errors.ECODE_INVAL)
5109
      else:
5110
        raise errors.OpPrereqError("The master has a secondary ip but the"
5111
                                   " new node doesn't have one",
5112
                                   errors.ECODE_INVAL)
5113

    
5114
    # checks reachability
5115
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
5116
      raise errors.OpPrereqError("Node not reachable by ping",
5117
                                 errors.ECODE_ENVIRON)
5118

    
5119
    if not newbie_singlehomed:
5120
      # check reachability from my secondary ip to newbie's secondary ip
5121
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
5122
                           source=myself.secondary_ip):
5123
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5124
                                   " based ping to node daemon port",
5125
                                   errors.ECODE_ENVIRON)
5126

    
5127
    if self.op.readd:
5128
      exceptions = [node]
5129
    else:
5130
      exceptions = []
5131

    
5132
    if self.op.master_capable:
5133
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
5134
    else:
5135
      self.master_candidate = False
5136

    
5137
    if self.op.readd:
5138
      self.new_node = old_node
5139
    else:
5140
      node_group = cfg.LookupNodeGroup(self.op.group)
5141
      self.new_node = objects.Node(name=node,
5142
                                   primary_ip=primary_ip,
5143
                                   secondary_ip=secondary_ip,
5144
                                   master_candidate=self.master_candidate,
5145
                                   offline=False, drained=False,
5146
                                   group=node_group)
5147

    
5148
    if self.op.ndparams:
5149
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
5150

    
5151
  def Exec(self, feedback_fn):
5152
    """Adds the new node to the cluster.
5153

5154
    """
5155
    new_node = self.new_node
5156
    node = new_node.name
5157

    
5158
    assert locking.BGL in self.owned_locks(locking.LEVEL_CLUSTER), \
5159
      "Not owning BGL"
5160

    
5161
    # We adding a new node so we assume it's powered
5162
    new_node.powered = True
5163

    
5164
    # for re-adds, reset the offline/drained/master-candidate flags;
5165
    # we need to reset here, otherwise offline would prevent RPC calls
5166
    # later in the procedure; this also means that if the re-add
5167
    # fails, we are left with a non-offlined, broken node
5168
    if self.op.readd:
5169
      new_node.drained = new_node.offline = False # pylint: disable=W0201
5170
      self.LogInfo("Readding a node, the offline/drained flags were reset")
5171
      # if we demote the node, we do cleanup later in the procedure
5172
      new_node.master_candidate = self.master_candidate
5173
      if self.changed_primary_ip:
5174
        new_node.primary_ip = self.op.primary_ip
5175

    
5176
    # copy the master/vm_capable flags
5177
    for attr in self._NFLAGS:
5178
      setattr(new_node, attr, getattr(self.op, attr))
5179

    
5180
    # notify the user about any possible mc promotion
5181
    if new_node.master_candidate:
5182
      self.LogInfo("Node will be a master candidate")
5183

    
5184
    if self.op.ndparams:
5185
      new_node.ndparams = self.op.ndparams
5186
    else:
5187
      new_node.ndparams = {}
5188

    
5189
    # check connectivity
5190
    result = self.rpc.call_version([node])[node]
5191
    result.Raise("Can't get version information from node %s" % node)
5192
    if constants.PROTOCOL_VERSION == result.payload:
5193
      logging.info("Communication to node %s fine, sw version %s match",
5194
                   node, result.payload)
5195
    else:
5196
      raise errors.OpExecError("Version mismatch master version %s,"
5197
                               " node version %s" %
5198
                               (constants.PROTOCOL_VERSION, result.payload))
5199

    
5200
    # Add node to our /etc/hosts, and add key to known_hosts
5201
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5202
      master_node = self.cfg.GetMasterNode()
5203
      result = self.rpc.call_etc_hosts_modify(master_node,
5204
                                              constants.ETC_HOSTS_ADD,
5205
                                              self.hostname.name,
5206
                                              self.hostname.ip)
5207
      result.Raise("Can't update hosts file with new host data")
5208

    
5209
    if new_node.secondary_ip != new_node.primary_ip:
5210
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5211
                               False)
5212

    
5213
    node_verify_list = [self.cfg.GetMasterNode()]
5214
    node_verify_param = {
5215
      constants.NV_NODELIST: ([node], {}),
5216
      # TODO: do a node-net-test as well?
5217
    }
5218

    
5219
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5220
                                       self.cfg.GetClusterName())
5221
    for verifier in node_verify_list:
5222
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5223
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5224
      if nl_payload:
5225
        for failed in nl_payload:
5226
          feedback_fn("ssh/hostname verification failed"
5227
                      " (checking from %s): %s" %
5228
                      (verifier, nl_payload[failed]))
5229
        raise errors.OpExecError("ssh/hostname verification failed")
5230

    
5231
    if self.op.readd:
5232
      _RedistributeAncillaryFiles(self)
5233
      self.context.ReaddNode(new_node)
5234
      # make sure we redistribute the config
5235
      self.cfg.Update(new_node, feedback_fn)
5236
      # and make sure the new node will not have old files around
5237
      if not new_node.master_candidate:
5238
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5239
        msg = result.fail_msg
5240
        if msg:
5241
          self.LogWarning("Node failed to demote itself from master"
5242
                          " candidate status: %s" % msg)
5243
    else:
5244
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5245
                                  additional_vm=self.op.vm_capable)
5246
      self.context.AddNode(new_node, self.proc.GetECId())
5247

    
5248

    
5249
class LUNodeSetParams(LogicalUnit):
5250
  """Modifies the parameters of a node.
5251

5252
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5253
      to the node role (as _ROLE_*)
5254
  @cvar _R2F: a dictionary from node role to tuples of flags
5255
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5256

5257
  """
5258
  HPATH = "node-modify"
5259
  HTYPE = constants.HTYPE_NODE
5260
  REQ_BGL = False
5261
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5262
  _F2R = {
5263
    (True, False, False): _ROLE_CANDIDATE,
5264
    (False, True, False): _ROLE_DRAINED,
5265
    (False, False, True): _ROLE_OFFLINE,
5266
    (False, False, False): _ROLE_REGULAR,
5267
    }
5268
  _R2F = dict((v, k) for k, v in _F2R.items())
5269
  _FLAGS = ["master_candidate", "drained", "offline"]
5270

    
5271
  def CheckArguments(self):
5272
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5273
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5274
                self.op.master_capable, self.op.vm_capable,
5275
                self.op.secondary_ip, self.op.ndparams]
5276
    if all_mods.count(None) == len(all_mods):
5277
      raise errors.OpPrereqError("Please pass at least one modification",
5278
                                 errors.ECODE_INVAL)
5279
    if all_mods.count(True) > 1:
5280
      raise errors.OpPrereqError("Can't set the node into more than one"
5281
                                 " state at the same time",
5282
                                 errors.ECODE_INVAL)
5283

    
5284
    # Boolean value that tells us whether we might be demoting from MC
5285
    self.might_demote = (self.op.master_candidate == False or
5286
                         self.op.offline == True or
5287
                         self.op.drained == True or
5288
                         self.op.master_capable == False)
5289

    
5290
    if self.op.secondary_ip:
5291
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5292
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5293
                                   " address" % self.op.secondary_ip,
5294
                                   errors.ECODE_INVAL)
5295

    
5296
    self.lock_all = self.op.auto_promote and self.might_demote
5297
    self.lock_instances = self.op.secondary_ip is not None
5298

    
5299
  def _InstanceFilter(self, instance):
5300
    """Filter for getting affected instances.
5301

5302
    """
5303
    return (instance.disk_template in constants.DTS_INT_MIRROR and
5304
            self.op.node_name in instance.all_nodes)
5305

    
5306
  def ExpandNames(self):
5307
    if self.lock_all:
5308
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5309
    else:
5310
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5311

    
5312
    if self.lock_instances:
5313
      self.needed_locks[locking.LEVEL_INSTANCE] = \
5314
        frozenset(self.cfg.GetInstancesInfoByFilter(self._InstanceFilter))
5315

    
5316
  def BuildHooksEnv(self):
5317
    """Build hooks env.
5318

5319
    This runs on the master node.
5320

5321
    """
5322
    return {
5323
      "OP_TARGET": self.op.node_name,
5324
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5325
      "OFFLINE": str(self.op.offline),
5326
      "DRAINED": str(self.op.drained),
5327
      "MASTER_CAPABLE": str(self.op.master_capable),
5328
      "VM_CAPABLE": str(self.op.vm_capable),
5329
      }
5330

    
5331
  def BuildHooksNodes(self):
5332
    """Build hooks nodes.
5333

5334
    """
5335
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5336
    return (nl, nl)
5337

    
5338
  def CheckPrereq(self):
5339
    """Check prerequisites.
5340

5341
    This only checks the instance list against the existing names.
5342

5343
    """
5344
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5345

    
5346
    if self.lock_instances:
5347
      affected_instances = \
5348
        self.cfg.GetInstancesInfoByFilter(self._InstanceFilter)
5349

    
5350
      # Verify instance locks
5351
      owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
5352
      wanted_instances = frozenset(affected_instances.keys())
5353
      if wanted_instances - owned_instances:
5354
        raise errors.OpPrereqError("Instances affected by changing node %s's"
5355
                                   " secondary IP address have changed since"
5356
                                   " locks were acquired, wanted '%s', have"
5357
                                   " '%s'; retry the operation" %
5358
                                   (self.op.node_name,
5359
                                    utils.CommaJoin(wanted_instances),
5360
                                    utils.CommaJoin(owned_instances)),
5361
                                   errors.ECODE_STATE)
5362
    else:
5363
      affected_instances = None
5364

    
5365
    if (self.op.master_candidate is not None or
5366
        self.op.drained is not None or
5367
        self.op.offline is not None):
5368
      # we can't change the master's node flags
5369
      if self.op.node_name == self.cfg.GetMasterNode():
5370
        raise errors.OpPrereqError("The master role can be changed"
5371
                                   " only via master-failover",
5372
                                   errors.ECODE_INVAL)
5373

    
5374
    if self.op.master_candidate and not node.master_capable:
5375
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5376
                                 " it a master candidate" % node.name,
5377
                                 errors.ECODE_STATE)
5378

    
5379
    if self.op.vm_capable == False:
5380
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5381
      if ipri or isec:
5382
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5383
                                   " the vm_capable flag" % node.name,
5384
                                   errors.ECODE_STATE)
5385

    
5386
    if node.master_candidate and self.might_demote and not self.lock_all:
5387
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5388
      # check if after removing the current node, we're missing master
5389
      # candidates
5390
      (mc_remaining, mc_should, _) = \
5391
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5392
      if mc_remaining < mc_should:
5393
        raise errors.OpPrereqError("Not enough master candidates, please"
5394
                                   " pass auto promote option to allow"
5395
                                   " promotion", errors.ECODE_STATE)
5396

    
5397
    self.old_flags = old_flags = (node.master_candidate,
5398
                                  node.drained, node.offline)
5399
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5400
    self.old_role = old_role = self._F2R[old_flags]
5401

    
5402
    # Check for ineffective changes
5403
    for attr in self._FLAGS:
5404
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5405
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5406
        setattr(self.op, attr, None)
5407

    
5408
    # Past this point, any flag change to False means a transition
5409
    # away from the respective state, as only real changes are kept
5410

    
5411
    # TODO: We might query the real power state if it supports OOB
5412
    if _SupportsOob(self.cfg, node):
5413
      if self.op.offline is False and not (node.powered or
5414
                                           self.op.powered == True):
5415
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5416
                                    " offline status can be reset") %
5417
                                   self.op.node_name)
5418
    elif self.op.powered is not None:
5419
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5420
                                  " as it does not support out-of-band"
5421
                                  " handling") % self.op.node_name)
5422

    
5423
    # If we're being deofflined/drained, we'll MC ourself if needed
5424
    if (self.op.drained == False or self.op.offline == False or
5425
        (self.op.master_capable and not node.master_capable)):
5426
      if _DecideSelfPromotion(self):
5427
        self.op.master_candidate = True
5428
        self.LogInfo("Auto-promoting node to master candidate")
5429

    
5430
    # If we're no longer master capable, we'll demote ourselves from MC
5431
    if self.op.master_capable == False and node.master_candidate:
5432
      self.LogInfo("Demoting from master candidate")
5433
      self.op.master_candidate = False
5434

    
5435
    # Compute new role
5436
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5437
    if self.op.master_candidate:
5438
      new_role = self._ROLE_CANDIDATE
5439
    elif self.op.drained:
5440
      new_role = self._ROLE_DRAINED
5441
    elif self.op.offline:
5442
      new_role = self._ROLE_OFFLINE
5443
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5444
      # False is still in new flags, which means we're un-setting (the
5445
      # only) True flag
5446
      new_role = self._ROLE_REGULAR
5447
    else: # no new flags, nothing, keep old role
5448
      new_role = old_role
5449

    
5450
    self.new_role = new_role
5451

    
5452
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5453
      # Trying to transition out of offline status
5454
      # TODO: Use standard RPC runner, but make sure it works when the node is
5455
      # still marked offline
5456
      result = rpc.BootstrapRunner().call_version([node.name])[node.name]
5457
      if result.fail_msg:
5458
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5459
                                   " to report its version: %s" %
5460
                                   (node.name, result.fail_msg),
5461
                                   errors.ECODE_STATE)
5462
      else:
5463
        self.LogWarning("Transitioning node from offline to online state"
5464
                        " without using re-add. Please make sure the node"
5465
                        " is healthy!")
5466

    
5467
    if self.op.secondary_ip:
5468
      # Ok even without locking, because this can't be changed by any LU
5469
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5470
      master_singlehomed = master.secondary_ip == master.primary_ip
5471
      if master_singlehomed and self.op.secondary_ip:
5472
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5473
                                   " homed cluster", errors.ECODE_INVAL)
5474

    
5475
      assert not (frozenset(affected_instances) -
5476
                  self.owned_locks(locking.LEVEL_INSTANCE))
5477

    
5478
      if node.offline:
5479
        if affected_instances:
5480
          raise errors.OpPrereqError("Cannot change secondary IP address:"
5481
                                     " offline node has instances (%s)"
5482
                                     " configured to use it" %
5483
                                     utils.CommaJoin(affected_instances.keys()))
5484
      else:
5485
        # On online nodes, check that no instances are running, and that
5486
        # the node has the new ip and we can reach it.
5487
        for instance in affected_instances.values():
5488
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5489

    
5490
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5491
        if master.name != node.name:
5492
          # check reachability from master secondary ip to new secondary ip
5493
          if not netutils.TcpPing(self.op.secondary_ip,
5494
                                  constants.DEFAULT_NODED_PORT,
5495
                                  source=master.secondary_ip):
5496
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5497
                                       " based ping to node daemon port",
5498
                                       errors.ECODE_ENVIRON)
5499

    
5500
    if self.op.ndparams:
5501
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5502
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5503
      self.new_ndparams = new_ndparams
5504

    
5505
  def Exec(self, feedback_fn):
5506
    """Modifies a node.
5507

5508
    """
5509
    node = self.node
5510
    old_role = self.old_role
5511
    new_role = self.new_role
5512

    
5513
    result = []
5514

    
5515
    if self.op.ndparams:
5516
      node.ndparams = self.new_ndparams
5517

    
5518
    if self.op.powered is not None:
5519
      node.powered = self.op.powered
5520

    
5521
    for attr in ["master_capable", "vm_capable"]:
5522
      val = getattr(self.op, attr)
5523
      if val is not None:
5524
        setattr(node, attr, val)
5525
        result.append((attr, str(val)))
5526

    
5527
    if new_role != old_role:
5528
      # Tell the node to demote itself, if no longer MC and not offline
5529
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5530
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5531
        if msg:
5532
          self.LogWarning("Node failed to demote itself: %s", msg)
5533

    
5534
      new_flags = self._R2F[new_role]
5535
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5536
        if of != nf:
5537
          result.append((desc, str(nf)))
5538
      (node.master_candidate, node.drained, node.offline) = new_flags
5539

    
5540
      # we locked all nodes, we adjust the CP before updating this node
5541
      if self.lock_all:
5542
        _AdjustCandidatePool(self, [node.name])
5543

    
5544
    if self.op.secondary_ip:
5545
      node.secondary_ip = self.op.secondary_ip
5546
      result.append(("secondary_ip", self.op.secondary_ip))
5547

    
5548
    # this will trigger configuration file update, if needed
5549
    self.cfg.Update(node, feedback_fn)
5550

    
5551
    # this will trigger job queue propagation or cleanup if the mc
5552
    # flag changed
5553
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5554
      self.context.ReaddNode(node)
5555

    
5556
    return result
5557

    
5558

    
5559
class LUNodePowercycle(NoHooksLU):
5560
  """Powercycles a node.
5561

5562
  """
5563
  REQ_BGL = False
5564

    
5565
  def CheckArguments(self):
5566
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5567
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5568
      raise errors.OpPrereqError("The node is the master and the force"
5569
                                 " parameter was not set",
5570
                                 errors.ECODE_INVAL)
5571

    
5572
  def ExpandNames(self):
5573
    """Locking for PowercycleNode.
5574

5575
    This is a last-resort option and shouldn't block on other
5576
    jobs. Therefore, we grab no locks.
5577

5578
    """
5579
    self.needed_locks = {}
5580

    
5581
  def Exec(self, feedback_fn):
5582
    """Reboots a node.
5583

5584
    """
5585
    result = self.rpc.call_node_powercycle(self.op.node_name,
5586
                                           self.cfg.GetHypervisorType())
5587
    result.Raise("Failed to schedule the reboot")
5588
    return result.payload
5589

    
5590

    
5591
class LUClusterQuery(NoHooksLU):
5592
  """Query cluster configuration.
5593

5594
  """
5595
  REQ_BGL = False
5596

    
5597
  def ExpandNames(self):
5598
    self.needed_locks = {}
5599

    
5600
  def Exec(self, feedback_fn):
5601
    """Return cluster config.
5602

5603
    """
5604
    cluster = self.cfg.GetClusterInfo()
5605
    os_hvp = {}
5606

    
5607
    # Filter just for enabled hypervisors
5608
    for os_name, hv_dict in cluster.os_hvp.items():
5609
      os_hvp[os_name] = {}
5610
      for hv_name, hv_params in hv_dict.items():
5611
        if hv_name in cluster.enabled_hypervisors:
5612
          os_hvp[os_name][hv_name] = hv_params
5613

    
5614
    # Convert ip_family to ip_version
5615
    primary_ip_version = constants.IP4_VERSION
5616
    if cluster.primary_ip_family == netutils.IP6Address.family:
5617
      primary_ip_version = constants.IP6_VERSION
5618

    
5619
    result = {
5620
      "software_version": constants.RELEASE_VERSION,
5621
      "protocol_version": constants.PROTOCOL_VERSION,
5622
      "config_version": constants.CONFIG_VERSION,
5623
      "os_api_version": max(constants.OS_API_VERSIONS),
5624
      "export_version": constants.EXPORT_VERSION,
5625
      "architecture": (platform.architecture()[0], platform.machine()),
5626
      "name": cluster.cluster_name,
5627
      "master": cluster.master_node,
5628
      "default_hypervisor": cluster.enabled_hypervisors[0],
5629
      "enabled_hypervisors": cluster.enabled_hypervisors,
5630
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5631
                        for hypervisor_name in cluster.enabled_hypervisors]),
5632
      "os_hvp": os_hvp,
5633
      "beparams": cluster.beparams,
5634
      "osparams": cluster.osparams,
5635
      "nicparams": cluster.nicparams,
5636
      "ndparams": cluster.ndparams,
5637
      "candidate_pool_size": cluster.candidate_pool_size,
5638
      "master_netdev": cluster.master_netdev,
5639
      "master_netmask": cluster.master_netmask,
5640
      "use_external_mip_script": cluster.use_external_mip_script,
5641
      "volume_group_name": cluster.volume_group_name,
5642
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5643
      "file_storage_dir": cluster.file_storage_dir,
5644
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5645
      "maintain_node_health": cluster.maintain_node_health,
5646
      "ctime": cluster.ctime,
5647
      "mtime": cluster.mtime,
5648
      "uuid": cluster.uuid,
5649
      "tags": list(cluster.GetTags()),
5650
      "uid_pool": cluster.uid_pool,
5651
      "default_iallocator": cluster.default_iallocator,
5652
      "reserved_lvs": cluster.reserved_lvs,
5653
      "primary_ip_version": primary_ip_version,
5654
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5655
      "hidden_os": cluster.hidden_os,
5656
      "blacklisted_os": cluster.blacklisted_os,
5657
      }
5658

    
5659
    return result
5660

    
5661

    
5662
class LUClusterConfigQuery(NoHooksLU):
5663
  """Return configuration values.
5664

5665
  """
5666
  REQ_BGL = False
5667
  _FIELDS_DYNAMIC = utils.FieldSet()
5668
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5669
                                  "watcher_pause", "volume_group_name")
5670

    
5671
  def CheckArguments(self):
5672
    _CheckOutputFields(static=self._FIELDS_STATIC,
5673
                       dynamic=self._FIELDS_DYNAMIC,
5674
                       selected=self.op.output_fields)
5675

    
5676
  def ExpandNames(self):
5677
    self.needed_locks = {}
5678

    
5679
  def Exec(self, feedback_fn):
5680
    """Dump a representation of the cluster config to the standard output.
5681

5682
    """
5683
    values = []
5684
    for field in self.op.output_fields:
5685
      if field == "cluster_name":
5686
        entry = self.cfg.GetClusterName()
5687
      elif field == "master_node":
5688
        entry = self.cfg.GetMasterNode()
5689
      elif field == "drain_flag":
5690
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5691
      elif field == "watcher_pause":
5692
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5693
      elif field == "volume_group_name":
5694
        entry = self.cfg.GetVGName()
5695
      else:
5696
        raise errors.ParameterError(field)
5697
      values.append(entry)
5698
    return values
5699

    
5700

    
5701
class LUInstanceActivateDisks(NoHooksLU):
5702
  """Bring up an instance's disks.
5703

5704
  """
5705
  REQ_BGL = False
5706

    
5707
  def ExpandNames(self):
5708
    self._ExpandAndLockInstance()
5709
    self.needed_locks[locking.LEVEL_NODE] = []
5710
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5711

    
5712
  def DeclareLocks(self, level):
5713
    if level == locking.LEVEL_NODE:
5714
      self._LockInstancesNodes()
5715

    
5716
  def CheckPrereq(self):
5717
    """Check prerequisites.
5718

5719
    This checks that the instance is in the cluster.
5720

5721
    """
5722
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5723
    assert self.instance is not None, \
5724
      "Cannot retrieve locked instance %s" % self.op.instance_name
5725
    _CheckNodeOnline(self, self.instance.primary_node)
5726

    
5727
  def Exec(self, feedback_fn):
5728
    """Activate the disks.
5729

5730
    """
5731
    disks_ok, disks_info = \
5732
              _AssembleInstanceDisks(self, self.instance,
5733
                                     ignore_size=self.op.ignore_size)
5734
    if not disks_ok:
5735
      raise errors.OpExecError("Cannot activate block devices")
5736

    
5737
    return disks_info
5738

    
5739

    
5740
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5741
                           ignore_size=False):
5742
  """Prepare the block devices for an instance.
5743

5744
  This sets up the block devices on all nodes.
5745

5746
  @type lu: L{LogicalUnit}
5747
  @param lu: the logical unit on whose behalf we execute
5748
  @type instance: L{objects.Instance}
5749
  @param instance: the instance for whose disks we assemble
5750
  @type disks: list of L{objects.Disk} or None
5751
  @param disks: which disks to assemble (or all, if None)
5752
  @type ignore_secondaries: boolean
5753
  @param ignore_secondaries: if true, errors on secondary nodes
5754
      won't result in an error return from the function
5755
  @type ignore_size: boolean
5756
  @param ignore_size: if true, the current known size of the disk
5757
      will not be used during the disk activation, useful for cases
5758
      when the size is wrong
5759
  @return: False if the operation failed, otherwise a list of
5760
      (host, instance_visible_name, node_visible_name)
5761
      with the mapping from node devices to instance devices
5762

5763
  """
5764
  device_info = []
5765
  disks_ok = True
5766
  iname = instance.name
5767
  disks = _ExpandCheckDisks(instance, disks)
5768

    
5769
  # With the two passes mechanism we try to reduce the window of
5770
  # opportunity for the race condition of switching DRBD to primary
5771
  # before handshaking occured, but we do not eliminate it
5772

    
5773
  # The proper fix would be to wait (with some limits) until the
5774
  # connection has been made and drbd transitions from WFConnection
5775
  # into any other network-connected state (Connected, SyncTarget,
5776
  # SyncSource, etc.)
5777

    
5778
  # 1st pass, assemble on all nodes in secondary mode
5779
  for idx, inst_disk in enumerate(disks):
5780
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5781
      if ignore_size:
5782
        node_disk = node_disk.Copy()
5783
        node_disk.UnsetSize()
5784
      lu.cfg.SetDiskID(node_disk, node)
5785
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5786
      msg = result.fail_msg
5787
      if msg:
5788
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5789
                           " (is_primary=False, pass=1): %s",
5790
                           inst_disk.iv_name, node, msg)
5791
        if not ignore_secondaries:
5792
          disks_ok = False
5793

    
5794
  # FIXME: race condition on drbd migration to primary
5795

    
5796
  # 2nd pass, do only the primary node
5797
  for idx, inst_disk in enumerate(disks):
5798
    dev_path = None
5799

    
5800
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5801
      if node != instance.primary_node:
5802
        continue
5803
      if ignore_size:
5804
        node_disk = node_disk.Copy()
5805
        node_disk.UnsetSize()
5806
      lu.cfg.SetDiskID(node_disk, node)
5807
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5808
      msg = result.fail_msg
5809
      if msg:
5810
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5811
                           " (is_primary=True, pass=2): %s",
5812
                           inst_disk.iv_name, node, msg)
5813
        disks_ok = False
5814
      else:
5815
        dev_path = result.payload
5816

    
5817
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5818

    
5819
  # leave the disks configured for the primary node
5820
  # this is a workaround that would be fixed better by
5821
  # improving the logical/physical id handling
5822
  for disk in disks:
5823
    lu.cfg.SetDiskID(disk, instance.primary_node)
5824

    
5825
  return disks_ok, device_info
5826

    
5827

    
5828
def _StartInstanceDisks(lu, instance, force):
5829
  """Start the disks of an instance.
5830

5831
  """
5832
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5833
                                           ignore_secondaries=force)
5834
  if not disks_ok:
5835
    _ShutdownInstanceDisks(lu, instance)
5836
    if force is not None and not force:
5837
      lu.proc.LogWarning("", hint="If the message above refers to a"
5838
                         " secondary node,"
5839
                         " you can retry the operation using '--force'.")
5840
    raise errors.OpExecError("Disk consistency error")
5841

    
5842

    
5843
class LUInstanceDeactivateDisks(NoHooksLU):
5844
  """Shutdown an instance's disks.
5845

5846
  """
5847
  REQ_BGL = False
5848

    
5849
  def ExpandNames(self):
5850
    self._ExpandAndLockInstance()
5851
    self.needed_locks[locking.LEVEL_NODE] = []
5852
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5853

    
5854
  def DeclareLocks(self, level):
5855
    if level == locking.LEVEL_NODE:
5856
      self._LockInstancesNodes()
5857

    
5858
  def CheckPrereq(self):
5859
    """Check prerequisites.
5860

5861
    This checks that the instance is in the cluster.
5862

5863
    """
5864
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5865
    assert self.instance is not None, \
5866
      "Cannot retrieve locked instance %s" % self.op.instance_name
5867

    
5868
  def Exec(self, feedback_fn):
5869
    """Deactivate the disks
5870

5871
    """
5872
    instance = self.instance
5873
    if self.op.force:
5874
      _ShutdownInstanceDisks(self, instance)
5875
    else:
5876
      _SafeShutdownInstanceDisks(self, instance)
5877

    
5878

    
5879
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5880
  """Shutdown block devices of an instance.
5881

5882
  This function checks if an instance is running, before calling
5883
  _ShutdownInstanceDisks.
5884

5885
  """
5886
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5887
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5888

    
5889

    
5890
def _ExpandCheckDisks(instance, disks):
5891
  """Return the instance disks selected by the disks list
5892

5893
  @type disks: list of L{objects.Disk} or None
5894
  @param disks: selected disks
5895
  @rtype: list of L{objects.Disk}
5896
  @return: selected instance disks to act on
5897

5898
  """
5899
  if disks is None:
5900
    return instance.disks
5901
  else:
5902
    if not set(disks).issubset(instance.disks):
5903
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5904
                                   " target instance")
5905
    return disks
5906

    
5907

    
5908
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5909
  """Shutdown block devices of an instance.
5910

5911
  This does the shutdown on all nodes of the instance.
5912

5913
  If the ignore_primary is false, errors on the primary node are
5914
  ignored.
5915

5916
  """
5917
  all_result = True
5918
  disks = _ExpandCheckDisks(instance, disks)
5919

    
5920
  for disk in disks:
5921
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5922
      lu.cfg.SetDiskID(top_disk, node)
5923
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5924
      msg = result.fail_msg
5925
      if msg:
5926
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5927
                      disk.iv_name, node, msg)
5928
        if ((node == instance.primary_node and not ignore_primary) or
5929
            (node != instance.primary_node and not result.offline)):
5930
          all_result = False
5931
  return all_result
5932

    
5933

    
5934
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5935
  """Checks if a node has enough free memory.
5936

5937
  This function check if a given node has the needed amount of free
5938
  memory. In case the node has less memory or we cannot get the
5939
  information from the node, this function raise an OpPrereqError
5940
  exception.
5941

5942
  @type lu: C{LogicalUnit}
5943
  @param lu: a logical unit from which we get configuration data
5944
  @type node: C{str}
5945
  @param node: the node to check
5946
  @type reason: C{str}
5947
  @param reason: string to use in the error message
5948
  @type requested: C{int}
5949
  @param requested: the amount of memory in MiB to check for
5950
  @type hypervisor_name: C{str}
5951
  @param hypervisor_name: the hypervisor to ask for memory stats
5952
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5953
      we cannot check the node
5954

5955
  """
5956
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5957
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5958
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5959
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5960
  if not isinstance(free_mem, int):
5961
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5962
                               " was '%s'" % (node, free_mem),
5963
                               errors.ECODE_ENVIRON)
5964
  if requested > free_mem:
5965
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5966
                               " needed %s MiB, available %s MiB" %
5967
                               (node, reason, requested, free_mem),
5968
                               errors.ECODE_NORES)
5969

    
5970

    
5971
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5972
  """Checks if nodes have enough free disk space in the all VGs.
5973

5974
  This function check if all given nodes have the needed amount of
5975
  free disk. In case any node has less disk or we cannot get the
5976
  information from the node, this function raise an OpPrereqError
5977
  exception.
5978

5979
  @type lu: C{LogicalUnit}
5980
  @param lu: a logical unit from which we get configuration data
5981
  @type nodenames: C{list}
5982
  @param nodenames: the list of node names to check
5983
  @type req_sizes: C{dict}
5984
  @param req_sizes: the hash of vg and corresponding amount of disk in
5985
      MiB to check for
5986
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5987
      or we cannot check the node
5988

5989
  """
5990
  for vg, req_size in req_sizes.items():
5991
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5992

    
5993

    
5994
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5995
  """Checks if nodes have enough free disk space in the specified VG.
5996

5997
  This function check if all given nodes have the needed amount of
5998
  free disk. In case any node has less disk or we cannot get the
5999
  information from the node, this function raise an OpPrereqError
6000
  exception.
6001

6002
  @type lu: C{LogicalUnit}
6003
  @param lu: a logical unit from which we get configuration data
6004
  @type nodenames: C{list}
6005
  @param nodenames: the list of node names to check
6006
  @type vg: C{str}
6007
  @param vg: the volume group to check
6008
  @type requested: C{int}
6009
  @param requested: the amount of disk in MiB to check for
6010
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
6011
      or we cannot check the node
6012

6013
  """
6014
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
6015
  for node in nodenames:
6016
    info = nodeinfo[node]
6017
    info.Raise("Cannot get current information from node %s" % node,
6018
               prereq=True, ecode=errors.ECODE_ENVIRON)
6019
    vg_free = info.payload.get("vg_free", None)
6020
    if not isinstance(vg_free, int):
6021
      raise errors.OpPrereqError("Can't compute free disk space on node"
6022
                                 " %s for vg %s, result was '%s'" %
6023
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
6024
    if requested > vg_free:
6025
      raise errors.OpPrereqError("Not enough disk space on target node %s"
6026
                                 " vg %s: required %d MiB, available %d MiB" %
6027
                                 (node, vg, requested, vg_free),
6028
                                 errors.ECODE_NORES)
6029

    
6030

    
6031
def _CheckNodesPhysicalCPUs(lu, nodenames, requested, hypervisor_name):
6032
  """Checks if nodes have enough physical CPUs
6033

6034
  This function checks if all given nodes have the needed number of
6035
  physical CPUs. In case any node has less CPUs or we cannot get the
6036
  information from the node, this function raises an OpPrereqError
6037
  exception.
6038

6039
  @type lu: C{LogicalUnit}
6040
  @param lu: a logical unit from which we get configuration data
6041
  @type nodenames: C{list}
6042
  @param nodenames: the list of node names to check
6043
  @type requested: C{int}
6044
  @param requested: the minimum acceptable number of physical CPUs
6045
  @raise errors.OpPrereqError: if the node doesn't have enough CPUs,
6046
      or we cannot check the node
6047

6048
  """
6049
  nodeinfo = lu.rpc.call_node_info(nodenames, None, hypervisor_name)
6050
  for node in nodenames:
6051
    info = nodeinfo[node]
6052
    info.Raise("Cannot get current information from node %s" % node,
6053
               prereq=True, ecode=errors.ECODE_ENVIRON)
6054
    num_cpus = info.payload.get("cpu_total", None)
6055
    if not isinstance(num_cpus, int):
6056
      raise errors.OpPrereqError("Can't compute the number of physical CPUs"
6057
                                 " on node %s, result was '%s'" %
6058
                                 (node, num_cpus), errors.ECODE_ENVIRON)
6059
    if requested > num_cpus:
6060
      raise errors.OpPrereqError("Node %s has %s physical CPUs, but %s are "
6061
                                 "required" % (node, num_cpus, requested),
6062
                                 errors.ECODE_NORES)
6063

    
6064

    
6065
class LUInstanceStartup(LogicalUnit):
6066
  """Starts an instance.
6067

6068
  """
6069
  HPATH = "instance-start"
6070
  HTYPE = constants.HTYPE_INSTANCE
6071
  REQ_BGL = False
6072

    
6073
  def CheckArguments(self):
6074
    # extra beparams
6075
    if self.op.beparams:
6076
      # fill the beparams dict
6077
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6078

    
6079
  def ExpandNames(self):
6080
    self._ExpandAndLockInstance()
6081

    
6082
  def BuildHooksEnv(self):
6083
    """Build hooks env.
6084

6085
    This runs on master, primary and secondary nodes of the instance.
6086

6087
    """
6088
    env = {
6089
      "FORCE": self.op.force,
6090
      }
6091

    
6092
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6093

    
6094
    return env
6095

    
6096
  def BuildHooksNodes(self):
6097
    """Build hooks nodes.
6098

6099
    """
6100
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6101
    return (nl, nl)
6102

    
6103
  def CheckPrereq(self):
6104
    """Check prerequisites.
6105

6106
    This checks that the instance is in the cluster.
6107

6108
    """
6109
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6110
    assert self.instance is not None, \
6111
      "Cannot retrieve locked instance %s" % self.op.instance_name
6112

    
6113
    # extra hvparams
6114
    if self.op.hvparams:
6115
      # check hypervisor parameter syntax (locally)
6116
      cluster = self.cfg.GetClusterInfo()
6117
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6118
      filled_hvp = cluster.FillHV(instance)
6119
      filled_hvp.update(self.op.hvparams)
6120
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
6121
      hv_type.CheckParameterSyntax(filled_hvp)
6122
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
6123

    
6124
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
6125

    
6126
    if self.primary_offline and self.op.ignore_offline_nodes:
6127
      self.proc.LogWarning("Ignoring offline primary node")
6128

    
6129
      if self.op.hvparams or self.op.beparams:
6130
        self.proc.LogWarning("Overridden parameters are ignored")
6131
    else:
6132
      _CheckNodeOnline(self, instance.primary_node)
6133

    
6134
      bep = self.cfg.GetClusterInfo().FillBE(instance)
6135

    
6136
      # check bridges existence
6137
      _CheckInstanceBridgesExist(self, instance)
6138

    
6139
      remote_info = self.rpc.call_instance_info(instance.primary_node,
6140
                                                instance.name,
6141
                                                instance.hypervisor)
6142
      remote_info.Raise("Error checking node %s" % instance.primary_node,
6143
                        prereq=True, ecode=errors.ECODE_ENVIRON)
6144
      if not remote_info.payload: # not running already
6145
        _CheckNodeFreeMemory(self, instance.primary_node,
6146
                             "starting instance %s" % instance.name,
6147
                             bep[constants.BE_MEMORY], instance.hypervisor)
6148

    
6149
  def Exec(self, feedback_fn):
6150
    """Start the instance.
6151

6152
    """
6153
    instance = self.instance
6154
    force = self.op.force
6155

    
6156
    if not self.op.no_remember:
6157
      self.cfg.MarkInstanceUp(instance.name)
6158

    
6159
    if self.primary_offline:
6160
      assert self.op.ignore_offline_nodes
6161
      self.proc.LogInfo("Primary node offline, marked instance as started")
6162
    else:
6163
      node_current = instance.primary_node
6164

    
6165
      _StartInstanceDisks(self, instance, force)
6166

    
6167
      result = \
6168
        self.rpc.call_instance_start(node_current,
6169
                                     (instance, self.op.hvparams,
6170
                                      self.op.beparams),
6171
                                     self.op.startup_paused)
6172
      msg = result.fail_msg
6173
      if msg:
6174
        _ShutdownInstanceDisks(self, instance)
6175
        raise errors.OpExecError("Could not start instance: %s" % msg)
6176

    
6177

    
6178
class LUInstanceReboot(LogicalUnit):
6179
  """Reboot an instance.
6180

6181
  """
6182
  HPATH = "instance-reboot"
6183
  HTYPE = constants.HTYPE_INSTANCE
6184
  REQ_BGL = False
6185

    
6186
  def ExpandNames(self):
6187
    self._ExpandAndLockInstance()
6188

    
6189
  def BuildHooksEnv(self):
6190
    """Build hooks env.
6191

6192
    This runs on master, primary and secondary nodes of the instance.
6193

6194
    """
6195
    env = {
6196
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
6197
      "REBOOT_TYPE": self.op.reboot_type,
6198
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6199
      }
6200

    
6201
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6202

    
6203
    return env
6204

    
6205
  def BuildHooksNodes(self):
6206
    """Build hooks nodes.
6207

6208
    """
6209
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6210
    return (nl, nl)
6211

    
6212
  def CheckPrereq(self):
6213
    """Check prerequisites.
6214

6215
    This checks that the instance is in the cluster.
6216

6217
    """
6218
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6219
    assert self.instance is not None, \
6220
      "Cannot retrieve locked instance %s" % self.op.instance_name
6221

    
6222
    _CheckNodeOnline(self, instance.primary_node)
6223

    
6224
    # check bridges existence
6225
    _CheckInstanceBridgesExist(self, instance)
6226

    
6227
  def Exec(self, feedback_fn):
6228
    """Reboot the instance.
6229

6230
    """
6231
    instance = self.instance
6232
    ignore_secondaries = self.op.ignore_secondaries
6233
    reboot_type = self.op.reboot_type
6234

    
6235
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6236
                                              instance.name,
6237
                                              instance.hypervisor)
6238
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6239
    instance_running = bool(remote_info.payload)
6240

    
6241
    node_current = instance.primary_node
6242

    
6243
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6244
                                            constants.INSTANCE_REBOOT_HARD]:
6245
      for disk in instance.disks:
6246
        self.cfg.SetDiskID(disk, node_current)
6247
      result = self.rpc.call_instance_reboot(node_current, instance,
6248
                                             reboot_type,
6249
                                             self.op.shutdown_timeout)
6250
      result.Raise("Could not reboot instance")
6251
    else:
6252
      if instance_running:
6253
        result = self.rpc.call_instance_shutdown(node_current, instance,
6254
                                                 self.op.shutdown_timeout)
6255
        result.Raise("Could not shutdown instance for full reboot")
6256
        _ShutdownInstanceDisks(self, instance)
6257
      else:
6258
        self.LogInfo("Instance %s was already stopped, starting now",
6259
                     instance.name)
6260
      _StartInstanceDisks(self, instance, ignore_secondaries)
6261
      result = self.rpc.call_instance_start(node_current,
6262
                                            (instance, None, None), False)
6263
      msg = result.fail_msg
6264
      if msg:
6265
        _ShutdownInstanceDisks(self, instance)
6266
        raise errors.OpExecError("Could not start instance for"
6267
                                 " full reboot: %s" % msg)
6268

    
6269
    self.cfg.MarkInstanceUp(instance.name)
6270

    
6271

    
6272
class LUInstanceShutdown(LogicalUnit):
6273
  """Shutdown an instance.
6274

6275
  """
6276
  HPATH = "instance-stop"
6277
  HTYPE = constants.HTYPE_INSTANCE
6278
  REQ_BGL = False
6279

    
6280
  def ExpandNames(self):
6281
    self._ExpandAndLockInstance()
6282

    
6283
  def BuildHooksEnv(self):
6284
    """Build hooks env.
6285

6286
    This runs on master, primary and secondary nodes of the instance.
6287

6288
    """
6289
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6290
    env["TIMEOUT"] = self.op.timeout
6291
    return env
6292

    
6293
  def BuildHooksNodes(self):
6294
    """Build hooks nodes.
6295

6296
    """
6297
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6298
    return (nl, nl)
6299

    
6300
  def CheckPrereq(self):
6301
    """Check prerequisites.
6302

6303
    This checks that the instance is in the cluster.
6304

6305
    """
6306
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6307
    assert self.instance is not None, \
6308
      "Cannot retrieve locked instance %s" % self.op.instance_name
6309

    
6310
    self.primary_offline = \
6311
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6312

    
6313
    if self.primary_offline and self.op.ignore_offline_nodes:
6314
      self.proc.LogWarning("Ignoring offline primary node")
6315
    else:
6316
      _CheckNodeOnline(self, self.instance.primary_node)
6317

    
6318
  def Exec(self, feedback_fn):
6319
    """Shutdown the instance.
6320

6321
    """
6322
    instance = self.instance
6323
    node_current = instance.primary_node
6324
    timeout = self.op.timeout
6325

    
6326
    if not self.op.no_remember:
6327
      self.cfg.MarkInstanceDown(instance.name)
6328

    
6329
    if self.primary_offline:
6330
      assert self.op.ignore_offline_nodes
6331
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6332
    else:
6333
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6334
      msg = result.fail_msg
6335
      if msg:
6336
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6337

    
6338
      _ShutdownInstanceDisks(self, instance)
6339

    
6340

    
6341
class LUInstanceReinstall(LogicalUnit):
6342
  """Reinstall an instance.
6343

6344
  """
6345
  HPATH = "instance-reinstall"
6346
  HTYPE = constants.HTYPE_INSTANCE
6347
  REQ_BGL = False
6348

    
6349
  def ExpandNames(self):
6350
    self._ExpandAndLockInstance()
6351

    
6352
  def BuildHooksEnv(self):
6353
    """Build hooks env.
6354

6355
    This runs on master, primary and secondary nodes of the instance.
6356

6357
    """
6358
    return _BuildInstanceHookEnvByObject(self, self.instance)
6359

    
6360
  def BuildHooksNodes(self):
6361
    """Build hooks nodes.
6362

6363
    """
6364
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6365
    return (nl, nl)
6366

    
6367
  def CheckPrereq(self):
6368
    """Check prerequisites.
6369

6370
    This checks that the instance is in the cluster and is not running.
6371

6372
    """
6373
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6374
    assert instance is not None, \
6375
      "Cannot retrieve locked instance %s" % self.op.instance_name
6376
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6377
                     " offline, cannot reinstall")
6378
    for node in instance.secondary_nodes:
6379
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6380
                       " cannot reinstall")
6381

    
6382
    if instance.disk_template == constants.DT_DISKLESS:
6383
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6384
                                 self.op.instance_name,
6385
                                 errors.ECODE_INVAL)
6386
    _CheckInstanceDown(self, instance, "cannot reinstall")
6387

    
6388
    if self.op.os_type is not None:
6389
      # OS verification
6390
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6391
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6392
      instance_os = self.op.os_type
6393
    else:
6394
      instance_os = instance.os
6395

    
6396
    nodelist = list(instance.all_nodes)
6397

    
6398
    if self.op.osparams:
6399
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6400
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6401
      self.os_inst = i_osdict # the new dict (without defaults)
6402
    else:
6403
      self.os_inst = None
6404

    
6405
    self.instance = instance
6406

    
6407
  def Exec(self, feedback_fn):
6408
    """Reinstall the instance.
6409

6410
    """
6411
    inst = self.instance
6412

    
6413
    if self.op.os_type is not None:
6414
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6415
      inst.os = self.op.os_type
6416
      # Write to configuration
6417
      self.cfg.Update(inst, feedback_fn)
6418

    
6419
    _StartInstanceDisks(self, inst, None)
6420
    try:
6421
      feedback_fn("Running the instance OS create scripts...")
6422
      # FIXME: pass debug option from opcode to backend
6423
      result = self.rpc.call_instance_os_add(inst.primary_node,
6424
                                             (inst, self.os_inst), True,
6425
                                             self.op.debug_level)
6426
      result.Raise("Could not install OS for instance %s on node %s" %
6427
                   (inst.name, inst.primary_node))
6428
    finally:
6429
      _ShutdownInstanceDisks(self, inst)
6430

    
6431

    
6432
class LUInstanceRecreateDisks(LogicalUnit):
6433
  """Recreate an instance's missing disks.
6434

6435
  """
6436
  HPATH = "instance-recreate-disks"
6437
  HTYPE = constants.HTYPE_INSTANCE
6438
  REQ_BGL = False
6439

    
6440
  def CheckArguments(self):
6441
    # normalise the disk list
6442
    self.op.disks = sorted(frozenset(self.op.disks))
6443

    
6444
  def ExpandNames(self):
6445
    self._ExpandAndLockInstance()
6446
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6447
    if self.op.nodes:
6448
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6449
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6450
    else:
6451
      self.needed_locks[locking.LEVEL_NODE] = []
6452

    
6453
  def DeclareLocks(self, level):
6454
    if level == locking.LEVEL_NODE:
6455
      # if we replace the nodes, we only need to lock the old primary,
6456
      # otherwise we need to lock all nodes for disk re-creation
6457
      primary_only = bool(self.op.nodes)
6458
      self._LockInstancesNodes(primary_only=primary_only)
6459
    elif level == locking.LEVEL_NODE_RES:
6460
      # Copy node locks
6461
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6462
        self.needed_locks[locking.LEVEL_NODE][:]
6463

    
6464
  def BuildHooksEnv(self):
6465
    """Build hooks env.
6466

6467
    This runs on master, primary and secondary nodes of the instance.
6468

6469
    """
6470
    return _BuildInstanceHookEnvByObject(self, self.instance)
6471

    
6472
  def BuildHooksNodes(self):
6473
    """Build hooks nodes.
6474

6475
    """
6476
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6477
    return (nl, nl)
6478

    
6479
  def CheckPrereq(self):
6480
    """Check prerequisites.
6481

6482
    This checks that the instance is in the cluster and is not running.
6483

6484
    """
6485
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6486
    assert instance is not None, \
6487
      "Cannot retrieve locked instance %s" % self.op.instance_name
6488
    if self.op.nodes:
6489
      if len(self.op.nodes) != len(instance.all_nodes):
6490
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6491
                                   " %d replacement nodes were specified" %
6492
                                   (instance.name, len(instance.all_nodes),
6493
                                    len(self.op.nodes)),
6494
                                   errors.ECODE_INVAL)
6495
      assert instance.disk_template != constants.DT_DRBD8 or \
6496
          len(self.op.nodes) == 2
6497
      assert instance.disk_template != constants.DT_PLAIN or \
6498
          len(self.op.nodes) == 1
6499
      primary_node = self.op.nodes[0]
6500
    else:
6501
      primary_node = instance.primary_node
6502
    _CheckNodeOnline(self, primary_node)
6503

    
6504
    if instance.disk_template == constants.DT_DISKLESS:
6505
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6506
                                 self.op.instance_name, errors.ECODE_INVAL)
6507
    # if we replace nodes *and* the old primary is offline, we don't
6508
    # check
6509
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE)
6510
    assert instance.primary_node in self.owned_locks(locking.LEVEL_NODE_RES)
6511
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6512
    if not (self.op.nodes and old_pnode.offline):
6513
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6514

    
6515
    if not self.op.disks:
6516
      self.op.disks = range(len(instance.disks))
6517
    else:
6518
      for idx in self.op.disks:
6519
        if idx >= len(instance.disks):
6520
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6521
                                     errors.ECODE_INVAL)
6522
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6523
      raise errors.OpPrereqError("Can't recreate disks partially and"
6524
                                 " change the nodes at the same time",
6525
                                 errors.ECODE_INVAL)
6526
    self.instance = instance
6527

    
6528
  def Exec(self, feedback_fn):
6529
    """Recreate the disks.
6530

6531
    """
6532
    instance = self.instance
6533

    
6534
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6535
            self.owned_locks(locking.LEVEL_NODE_RES))
6536

    
6537
    to_skip = []
6538
    mods = [] # keeps track of needed logical_id changes
6539

    
6540
    for idx, disk in enumerate(instance.disks):
6541
      if idx not in self.op.disks: # disk idx has not been passed in
6542
        to_skip.append(idx)
6543
        continue
6544
      # update secondaries for disks, if needed
6545
      if self.op.nodes:
6546
        if disk.dev_type == constants.LD_DRBD8:
6547
          # need to update the nodes and minors
6548
          assert len(self.op.nodes) == 2
6549
          assert len(disk.logical_id) == 6 # otherwise disk internals
6550
                                           # have changed
6551
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6552
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6553
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6554
                    new_minors[0], new_minors[1], old_secret)
6555
          assert len(disk.logical_id) == len(new_id)
6556
          mods.append((idx, new_id))
6557

    
6558
    # now that we have passed all asserts above, we can apply the mods
6559
    # in a single run (to avoid partial changes)
6560
    for idx, new_id in mods:
6561
      instance.disks[idx].logical_id = new_id
6562

    
6563
    # change primary node, if needed
6564
    if self.op.nodes:
6565
      instance.primary_node = self.op.nodes[0]
6566
      self.LogWarning("Changing the instance's nodes, you will have to"
6567
                      " remove any disks left on the older nodes manually")
6568

    
6569
    if self.op.nodes:
6570
      self.cfg.Update(instance, feedback_fn)
6571

    
6572
    _CreateDisks(self, instance, to_skip=to_skip)
6573

    
6574

    
6575
class LUInstanceRename(LogicalUnit):
6576
  """Rename an instance.
6577

6578
  """
6579
  HPATH = "instance-rename"
6580
  HTYPE = constants.HTYPE_INSTANCE
6581

    
6582
  def CheckArguments(self):
6583
    """Check arguments.
6584

6585
    """
6586
    if self.op.ip_check and not self.op.name_check:
6587
      # TODO: make the ip check more flexible and not depend on the name check
6588
      raise errors.OpPrereqError("IP address check requires a name check",
6589
                                 errors.ECODE_INVAL)
6590

    
6591
  def BuildHooksEnv(self):
6592
    """Build hooks env.
6593

6594
    This runs on master, primary and secondary nodes of the instance.
6595

6596
    """
6597
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6598
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6599
    return env
6600

    
6601
  def BuildHooksNodes(self):
6602
    """Build hooks nodes.
6603

6604
    """
6605
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6606
    return (nl, nl)
6607

    
6608
  def CheckPrereq(self):
6609
    """Check prerequisites.
6610

6611
    This checks that the instance is in the cluster and is not running.
6612

6613
    """
6614
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6615
                                                self.op.instance_name)
6616
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6617
    assert instance is not None
6618
    _CheckNodeOnline(self, instance.primary_node)
6619
    _CheckInstanceDown(self, instance, "cannot rename")
6620
    self.instance = instance
6621

    
6622
    new_name = self.op.new_name
6623
    if self.op.name_check:
6624
      hostname = netutils.GetHostname(name=new_name)
6625
      if hostname != new_name:
6626
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6627
                     hostname.name)
6628
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6629
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6630
                                    " same as given hostname '%s'") %
6631
                                    (hostname.name, self.op.new_name),
6632
                                    errors.ECODE_INVAL)
6633
      new_name = self.op.new_name = hostname.name
6634
      if (self.op.ip_check and
6635
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6636
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6637
                                   (hostname.ip, new_name),
6638
                                   errors.ECODE_NOTUNIQUE)
6639

    
6640
    instance_list = self.cfg.GetInstanceList()
6641
    if new_name in instance_list and new_name != instance.name:
6642
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6643
                                 new_name, errors.ECODE_EXISTS)
6644

    
6645
  def Exec(self, feedback_fn):
6646
    """Rename the instance.
6647

6648
    """
6649
    inst = self.instance
6650
    old_name = inst.name
6651

    
6652
    rename_file_storage = False
6653
    if (inst.disk_template in constants.DTS_FILEBASED and
6654
        self.op.new_name != inst.name):
6655
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6656
      rename_file_storage = True
6657

    
6658
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6659
    # Change the instance lock. This is definitely safe while we hold the BGL.
6660
    # Otherwise the new lock would have to be added in acquired mode.
6661
    assert self.REQ_BGL
6662
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6663
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6664

    
6665
    # re-read the instance from the configuration after rename
6666
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6667

    
6668
    if rename_file_storage:
6669
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6670
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6671
                                                     old_file_storage_dir,
6672
                                                     new_file_storage_dir)
6673
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6674
                   " (but the instance has been renamed in Ganeti)" %
6675
                   (inst.primary_node, old_file_storage_dir,
6676
                    new_file_storage_dir))
6677

    
6678
    _StartInstanceDisks(self, inst, None)
6679
    try:
6680
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6681
                                                 old_name, self.op.debug_level)
6682
      msg = result.fail_msg
6683
      if msg:
6684
        msg = ("Could not run OS rename script for instance %s on node %s"
6685
               " (but the instance has been renamed in Ganeti): %s" %
6686
               (inst.name, inst.primary_node, msg))
6687
        self.proc.LogWarning(msg)
6688
    finally:
6689
      _ShutdownInstanceDisks(self, inst)
6690

    
6691
    return inst.name
6692

    
6693

    
6694
class LUInstanceRemove(LogicalUnit):
6695
  """Remove an instance.
6696

6697
  """
6698
  HPATH = "instance-remove"
6699
  HTYPE = constants.HTYPE_INSTANCE
6700
  REQ_BGL = False
6701

    
6702
  def ExpandNames(self):
6703
    self._ExpandAndLockInstance()
6704
    self.needed_locks[locking.LEVEL_NODE] = []
6705
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6706
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6707

    
6708
  def DeclareLocks(self, level):
6709
    if level == locking.LEVEL_NODE:
6710
      self._LockInstancesNodes()
6711
    elif level == locking.LEVEL_NODE_RES:
6712
      # Copy node locks
6713
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6714
        self.needed_locks[locking.LEVEL_NODE][:]
6715

    
6716
  def BuildHooksEnv(self):
6717
    """Build hooks env.
6718

6719
    This runs on master, primary and secondary nodes of the instance.
6720

6721
    """
6722
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6723
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6724
    return env
6725

    
6726
  def BuildHooksNodes(self):
6727
    """Build hooks nodes.
6728

6729
    """
6730
    nl = [self.cfg.GetMasterNode()]
6731
    nl_post = list(self.instance.all_nodes) + nl
6732
    return (nl, nl_post)
6733

    
6734
  def CheckPrereq(self):
6735
    """Check prerequisites.
6736

6737
    This checks that the instance is in the cluster.
6738

6739
    """
6740
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6741
    assert self.instance is not None, \
6742
      "Cannot retrieve locked instance %s" % self.op.instance_name
6743

    
6744
  def Exec(self, feedback_fn):
6745
    """Remove the instance.
6746

6747
    """
6748
    instance = self.instance
6749
    logging.info("Shutting down instance %s on node %s",
6750
                 instance.name, instance.primary_node)
6751

    
6752
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6753
                                             self.op.shutdown_timeout)
6754
    msg = result.fail_msg
6755
    if msg:
6756
      if self.op.ignore_failures:
6757
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6758
      else:
6759
        raise errors.OpExecError("Could not shutdown instance %s on"
6760
                                 " node %s: %s" %
6761
                                 (instance.name, instance.primary_node, msg))
6762

    
6763
    assert (self.owned_locks(locking.LEVEL_NODE) ==
6764
            self.owned_locks(locking.LEVEL_NODE_RES))
6765
    assert not (set(instance.all_nodes) -
6766
                self.owned_locks(locking.LEVEL_NODE)), \
6767
      "Not owning correct locks"
6768

    
6769
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6770

    
6771

    
6772
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6773
  """Utility function to remove an instance.
6774

6775
  """
6776
  logging.info("Removing block devices for instance %s", instance.name)
6777

    
6778
  if not _RemoveDisks(lu, instance):
6779
    if not ignore_failures:
6780
      raise errors.OpExecError("Can't remove instance's disks")
6781
    feedback_fn("Warning: can't remove instance's disks")
6782

    
6783
  logging.info("Removing instance %s out of cluster config", instance.name)
6784

    
6785
  lu.cfg.RemoveInstance(instance.name)
6786

    
6787
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6788
    "Instance lock removal conflict"
6789

    
6790
  # Remove lock for the instance
6791
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6792

    
6793

    
6794
class LUInstanceQuery(NoHooksLU):
6795
  """Logical unit for querying instances.
6796

6797
  """
6798
  # pylint: disable=W0142
6799
  REQ_BGL = False
6800

    
6801
  def CheckArguments(self):
6802
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6803
                             self.op.output_fields, self.op.use_locking)
6804

    
6805
  def ExpandNames(self):
6806
    self.iq.ExpandNames(self)
6807

    
6808
  def DeclareLocks(self, level):
6809
    self.iq.DeclareLocks(self, level)
6810

    
6811
  def Exec(self, feedback_fn):
6812
    return self.iq.OldStyleQuery(self)
6813

    
6814

    
6815
class LUInstanceFailover(LogicalUnit):
6816
  """Failover an instance.
6817

6818
  """
6819
  HPATH = "instance-failover"
6820
  HTYPE = constants.HTYPE_INSTANCE
6821
  REQ_BGL = False
6822

    
6823
  def CheckArguments(self):
6824
    """Check the arguments.
6825

6826
    """
6827
    self.iallocator = getattr(self.op, "iallocator", None)
6828
    self.target_node = getattr(self.op, "target_node", None)
6829

    
6830
  def ExpandNames(self):
6831
    self._ExpandAndLockInstance()
6832

    
6833
    if self.op.target_node is not None:
6834
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6835

    
6836
    self.needed_locks[locking.LEVEL_NODE] = []
6837
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6838

    
6839
    ignore_consistency = self.op.ignore_consistency
6840
    shutdown_timeout = self.op.shutdown_timeout
6841
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6842
                                       cleanup=False,
6843
                                       failover=True,
6844
                                       ignore_consistency=ignore_consistency,
6845
                                       shutdown_timeout=shutdown_timeout)
6846
    self.tasklets = [self._migrater]
6847

    
6848
  def DeclareLocks(self, level):
6849
    if level == locking.LEVEL_NODE:
6850
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6851
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6852
        if self.op.target_node is None:
6853
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6854
        else:
6855
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6856
                                                   self.op.target_node]
6857
        del self.recalculate_locks[locking.LEVEL_NODE]
6858
      else:
6859
        self._LockInstancesNodes()
6860

    
6861
  def BuildHooksEnv(self):
6862
    """Build hooks env.
6863

6864
    This runs on master, primary and secondary nodes of the instance.
6865

6866
    """
6867
    instance = self._migrater.instance
6868
    source_node = instance.primary_node
6869
    target_node = self.op.target_node
6870
    env = {
6871
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6872
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6873
      "OLD_PRIMARY": source_node,
6874
      "NEW_PRIMARY": target_node,
6875
      }
6876

    
6877
    if instance.disk_template in constants.DTS_INT_MIRROR:
6878
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6879
      env["NEW_SECONDARY"] = source_node
6880
    else:
6881
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6882

    
6883
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6884

    
6885
    return env
6886

    
6887
  def BuildHooksNodes(self):
6888
    """Build hooks nodes.
6889

6890
    """
6891
    instance = self._migrater.instance
6892
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6893
    return (nl, nl + [instance.primary_node])
6894

    
6895

    
6896
class LUInstanceMigrate(LogicalUnit):
6897
  """Migrate an instance.
6898

6899
  This is migration without shutting down, compared to the failover,
6900
  which is done with shutdown.
6901

6902
  """
6903
  HPATH = "instance-migrate"
6904
  HTYPE = constants.HTYPE_INSTANCE
6905
  REQ_BGL = False
6906

    
6907
  def ExpandNames(self):
6908
    self._ExpandAndLockInstance()
6909

    
6910
    if self.op.target_node is not None:
6911
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6912

    
6913
    self.needed_locks[locking.LEVEL_NODE] = []
6914
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6915

    
6916
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6917
                                       cleanup=self.op.cleanup,
6918
                                       failover=False,
6919
                                       fallback=self.op.allow_failover)
6920
    self.tasklets = [self._migrater]
6921

    
6922
  def DeclareLocks(self, level):
6923
    if level == locking.LEVEL_NODE:
6924
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6925
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6926
        if self.op.target_node is None:
6927
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6928
        else:
6929
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6930
                                                   self.op.target_node]
6931
        del self.recalculate_locks[locking.LEVEL_NODE]
6932
      else:
6933
        self._LockInstancesNodes()
6934

    
6935
  def BuildHooksEnv(self):
6936
    """Build hooks env.
6937

6938
    This runs on master, primary and secondary nodes of the instance.
6939

6940
    """
6941
    instance = self._migrater.instance
6942
    source_node = instance.primary_node
6943
    target_node = self.op.target_node
6944
    env = _BuildInstanceHookEnvByObject(self, instance)
6945
    env.update({
6946
      "MIGRATE_LIVE": self._migrater.live,
6947
      "MIGRATE_CLEANUP": self.op.cleanup,
6948
      "OLD_PRIMARY": source_node,
6949
      "NEW_PRIMARY": target_node,
6950
      })
6951

    
6952
    if instance.disk_template in constants.DTS_INT_MIRROR:
6953
      env["OLD_SECONDARY"] = target_node
6954
      env["NEW_SECONDARY"] = source_node
6955
    else:
6956
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6957

    
6958
    return env
6959

    
6960
  def BuildHooksNodes(self):
6961
    """Build hooks nodes.
6962

6963
    """
6964
    instance = self._migrater.instance
6965
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6966
    return (nl, nl + [instance.primary_node])
6967

    
6968

    
6969
class LUInstanceMove(LogicalUnit):
6970
  """Move an instance by data-copying.
6971

6972
  """
6973
  HPATH = "instance-move"
6974
  HTYPE = constants.HTYPE_INSTANCE
6975
  REQ_BGL = False
6976

    
6977
  def ExpandNames(self):
6978
    self._ExpandAndLockInstance()
6979
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6980
    self.op.target_node = target_node
6981
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6982
    self.needed_locks[locking.LEVEL_NODE_RES] = []
6983
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6984

    
6985
  def DeclareLocks(self, level):
6986
    if level == locking.LEVEL_NODE:
6987
      self._LockInstancesNodes(primary_only=True)
6988
    elif level == locking.LEVEL_NODE_RES:
6989
      # Copy node locks
6990
      self.needed_locks[locking.LEVEL_NODE_RES] = \
6991
        self.needed_locks[locking.LEVEL_NODE][:]
6992

    
6993
  def BuildHooksEnv(self):
6994
    """Build hooks env.
6995

6996
    This runs on master, primary and secondary nodes of the instance.
6997

6998
    """
6999
    env = {
7000
      "TARGET_NODE": self.op.target_node,
7001
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
7002
      }
7003
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7004
    return env
7005

    
7006
  def BuildHooksNodes(self):
7007
    """Build hooks nodes.
7008

7009
    """
7010
    nl = [
7011
      self.cfg.GetMasterNode(),
7012
      self.instance.primary_node,
7013
      self.op.target_node,
7014
      ]
7015
    return (nl, nl)
7016

    
7017
  def CheckPrereq(self):
7018
    """Check prerequisites.
7019

7020
    This checks that the instance is in the cluster.
7021

7022
    """
7023
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7024
    assert self.instance is not None, \
7025
      "Cannot retrieve locked instance %s" % self.op.instance_name
7026

    
7027
    node = self.cfg.GetNodeInfo(self.op.target_node)
7028
    assert node is not None, \
7029
      "Cannot retrieve locked node %s" % self.op.target_node
7030

    
7031
    self.target_node = target_node = node.name
7032

    
7033
    if target_node == instance.primary_node:
7034
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
7035
                                 (instance.name, target_node),
7036
                                 errors.ECODE_STATE)
7037

    
7038
    bep = self.cfg.GetClusterInfo().FillBE(instance)
7039

    
7040
    for idx, dsk in enumerate(instance.disks):
7041
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
7042
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
7043
                                   " cannot copy" % idx, errors.ECODE_STATE)
7044

    
7045
    _CheckNodeOnline(self, target_node)
7046
    _CheckNodeNotDrained(self, target_node)
7047
    _CheckNodeVmCapable(self, target_node)
7048

    
7049
    if instance.admin_up:
7050
      # check memory requirements on the secondary node
7051
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
7052
                           instance.name, bep[constants.BE_MEMORY],
7053
                           instance.hypervisor)
7054
    else:
7055
      self.LogInfo("Not checking memory on the secondary node as"
7056
                   " instance will not be started")
7057

    
7058
    # check bridge existance
7059
    _CheckInstanceBridgesExist(self, instance, node=target_node)
7060

    
7061
  def Exec(self, feedback_fn):
7062
    """Move an instance.
7063

7064
    The move is done by shutting it down on its present node, copying
7065
    the data over (slow) and starting it on the new node.
7066

7067
    """
7068
    instance = self.instance
7069

    
7070
    source_node = instance.primary_node
7071
    target_node = self.target_node
7072

    
7073
    self.LogInfo("Shutting down instance %s on source node %s",
7074
                 instance.name, source_node)
7075

    
7076
    assert (self.owned_locks(locking.LEVEL_NODE) ==
7077
            self.owned_locks(locking.LEVEL_NODE_RES))
7078

    
7079
    result = self.rpc.call_instance_shutdown(source_node, instance,
7080
                                             self.op.shutdown_timeout)
7081
    msg = result.fail_msg
7082
    if msg:
7083
      if self.op.ignore_consistency:
7084
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
7085
                             " Proceeding anyway. Please make sure node"
7086
                             " %s is down. Error details: %s",
7087
                             instance.name, source_node, source_node, msg)
7088
      else:
7089
        raise errors.OpExecError("Could not shutdown instance %s on"
7090
                                 " node %s: %s" %
7091
                                 (instance.name, source_node, msg))
7092

    
7093
    # create the target disks
7094
    try:
7095
      _CreateDisks(self, instance, target_node=target_node)
7096
    except errors.OpExecError:
7097
      self.LogWarning("Device creation failed, reverting...")
7098
      try:
7099
        _RemoveDisks(self, instance, target_node=target_node)
7100
      finally:
7101
        self.cfg.ReleaseDRBDMinors(instance.name)
7102
        raise
7103

    
7104
    cluster_name = self.cfg.GetClusterInfo().cluster_name
7105

    
7106
    errs = []
7107
    # activate, get path, copy the data over
7108
    for idx, disk in enumerate(instance.disks):
7109
      self.LogInfo("Copying data for disk %d", idx)
7110
      result = self.rpc.call_blockdev_assemble(target_node, disk,
7111
                                               instance.name, True, idx)
7112
      if result.fail_msg:
7113
        self.LogWarning("Can't assemble newly created disk %d: %s",
7114
                        idx, result.fail_msg)
7115
        errs.append(result.fail_msg)
7116
        break
7117
      dev_path = result.payload
7118
      result = self.rpc.call_blockdev_export(source_node, disk,
7119
                                             target_node, dev_path,
7120
                                             cluster_name)
7121
      if result.fail_msg:
7122
        self.LogWarning("Can't copy data over for disk %d: %s",
7123
                        idx, result.fail_msg)
7124
        errs.append(result.fail_msg)
7125
        break
7126

    
7127
    if errs:
7128
      self.LogWarning("Some disks failed to copy, aborting")
7129
      try:
7130
        _RemoveDisks(self, instance, target_node=target_node)
7131
      finally:
7132
        self.cfg.ReleaseDRBDMinors(instance.name)
7133
        raise errors.OpExecError("Errors during disk copy: %s" %
7134
                                 (",".join(errs),))
7135

    
7136
    instance.primary_node = target_node
7137
    self.cfg.Update(instance, feedback_fn)
7138

    
7139
    self.LogInfo("Removing the disks on the original node")
7140
    _RemoveDisks(self, instance, target_node=source_node)
7141

    
7142
    # Only start the instance if it's marked as up
7143
    if instance.admin_up:
7144
      self.LogInfo("Starting instance %s on node %s",
7145
                   instance.name, target_node)
7146

    
7147
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
7148
                                           ignore_secondaries=True)
7149
      if not disks_ok:
7150
        _ShutdownInstanceDisks(self, instance)
7151
        raise errors.OpExecError("Can't activate the instance's disks")
7152

    
7153
      result = self.rpc.call_instance_start(target_node,
7154
                                            (instance, None, None), False)
7155
      msg = result.fail_msg
7156
      if msg:
7157
        _ShutdownInstanceDisks(self, instance)
7158
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7159
                                 (instance.name, target_node, msg))
7160

    
7161

    
7162
class LUNodeMigrate(LogicalUnit):
7163
  """Migrate all instances from a node.
7164

7165
  """
7166
  HPATH = "node-migrate"
7167
  HTYPE = constants.HTYPE_NODE
7168
  REQ_BGL = False
7169

    
7170
  def CheckArguments(self):
7171
    pass
7172

    
7173
  def ExpandNames(self):
7174
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7175

    
7176
    self.share_locks = _ShareAll()
7177
    self.needed_locks = {
7178
      locking.LEVEL_NODE: [self.op.node_name],
7179
      }
7180

    
7181
  def BuildHooksEnv(self):
7182
    """Build hooks env.
7183

7184
    This runs on the master, the primary and all the secondaries.
7185

7186
    """
7187
    return {
7188
      "NODE_NAME": self.op.node_name,
7189
      }
7190

    
7191
  def BuildHooksNodes(self):
7192
    """Build hooks nodes.
7193

7194
    """
7195
    nl = [self.cfg.GetMasterNode()]
7196
    return (nl, nl)
7197

    
7198
  def CheckPrereq(self):
7199
    pass
7200

    
7201
  def Exec(self, feedback_fn):
7202
    # Prepare jobs for migration instances
7203
    jobs = [
7204
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
7205
                                 mode=self.op.mode,
7206
                                 live=self.op.live,
7207
                                 iallocator=self.op.iallocator,
7208
                                 target_node=self.op.target_node)]
7209
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
7210
      ]
7211

    
7212
    # TODO: Run iallocator in this opcode and pass correct placement options to
7213
    # OpInstanceMigrate. Since other jobs can modify the cluster between
7214
    # running the iallocator and the actual migration, a good consistency model
7215
    # will have to be found.
7216

    
7217
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
7218
            frozenset([self.op.node_name]))
7219

    
7220
    return ResultWithJobs(jobs)
7221

    
7222

    
7223
class TLMigrateInstance(Tasklet):
7224
  """Tasklet class for instance migration.
7225

7226
  @type live: boolean
7227
  @ivar live: whether the migration will be done live or non-live;
7228
      this variable is initalized only after CheckPrereq has run
7229
  @type cleanup: boolean
7230
  @ivar cleanup: Wheater we cleanup from a failed migration
7231
  @type iallocator: string
7232
  @ivar iallocator: The iallocator used to determine target_node
7233
  @type target_node: string
7234
  @ivar target_node: If given, the target_node to reallocate the instance to
7235
  @type failover: boolean
7236
  @ivar failover: Whether operation results in failover or migration
7237
  @type fallback: boolean
7238
  @ivar fallback: Whether fallback to failover is allowed if migration not
7239
                  possible
7240
  @type ignore_consistency: boolean
7241
  @ivar ignore_consistency: Wheter we should ignore consistency between source
7242
                            and target node
7243
  @type shutdown_timeout: int
7244
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
7245

7246
  """
7247

    
7248
  # Constants
7249
  _MIGRATION_POLL_INTERVAL = 1      # seconds
7250
  _MIGRATION_FEEDBACK_INTERVAL = 10 # seconds
7251

    
7252
  def __init__(self, lu, instance_name, cleanup=False,
7253
               failover=False, fallback=False,
7254
               ignore_consistency=False,
7255
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
7256
    """Initializes this class.
7257

7258
    """
7259
    Tasklet.__init__(self, lu)
7260

    
7261
    # Parameters
7262
    self.instance_name = instance_name
7263
    self.cleanup = cleanup
7264
    self.live = False # will be overridden later
7265
    self.failover = failover
7266
    self.fallback = fallback
7267
    self.ignore_consistency = ignore_consistency
7268
    self.shutdown_timeout = shutdown_timeout
7269

    
7270
  def CheckPrereq(self):
7271
    """Check prerequisites.
7272

7273
    This checks that the instance is in the cluster.
7274

7275
    """
7276
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7277
    instance = self.cfg.GetInstanceInfo(instance_name)
7278
    assert instance is not None
7279
    self.instance = instance
7280

    
7281
    if (not self.cleanup and not instance.admin_up and not self.failover and
7282
        self.fallback):
7283
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7284
                      " to failover")
7285
      self.failover = True
7286

    
7287
    if instance.disk_template not in constants.DTS_MIRRORED:
7288
      if self.failover:
7289
        text = "failovers"
7290
      else:
7291
        text = "migrations"
7292
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7293
                                 " %s" % (instance.disk_template, text),
7294
                                 errors.ECODE_STATE)
7295

    
7296
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7297
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7298

    
7299
      if self.lu.op.iallocator:
7300
        self._RunAllocator()
7301
      else:
7302
        # We set set self.target_node as it is required by
7303
        # BuildHooksEnv
7304
        self.target_node = self.lu.op.target_node
7305

    
7306
      # self.target_node is already populated, either directly or by the
7307
      # iallocator run
7308
      target_node = self.target_node
7309
      if self.target_node == instance.primary_node:
7310
        raise errors.OpPrereqError("Cannot migrate instance %s"
7311
                                   " to its primary (%s)" %
7312
                                   (instance.name, instance.primary_node))
7313

    
7314
      if len(self.lu.tasklets) == 1:
7315
        # It is safe to release locks only when we're the only tasklet
7316
        # in the LU
7317
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7318
                      keep=[instance.primary_node, self.target_node])
7319

    
7320
    else:
7321
      secondary_nodes = instance.secondary_nodes
7322
      if not secondary_nodes:
7323
        raise errors.ConfigurationError("No secondary node but using"
7324
                                        " %s disk template" %
7325
                                        instance.disk_template)
7326
      target_node = secondary_nodes[0]
7327
      if self.lu.op.iallocator or (self.lu.op.target_node and
7328
                                   self.lu.op.target_node != target_node):
7329
        if self.failover:
7330
          text = "failed over"
7331
        else:
7332
          text = "migrated"
7333
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7334
                                   " be %s to arbitrary nodes"
7335
                                   " (neither an iallocator nor a target"
7336
                                   " node can be passed)" %
7337
                                   (instance.disk_template, text),
7338
                                   errors.ECODE_INVAL)
7339

    
7340
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7341

    
7342
    # check memory requirements on the secondary node
7343
    if not self.failover or instance.admin_up:
7344
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7345
                           instance.name, i_be[constants.BE_MEMORY],
7346
                           instance.hypervisor)
7347
    else:
7348
      self.lu.LogInfo("Not checking memory on the secondary node as"
7349
                      " instance will not be started")
7350

    
7351
    # check bridge existance
7352
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7353

    
7354
    if not self.cleanup:
7355
      _CheckNodeNotDrained(self.lu, target_node)
7356
      if not self.failover:
7357
        result = self.rpc.call_instance_migratable(instance.primary_node,
7358
                                                   instance)
7359
        if result.fail_msg and self.fallback:
7360
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7361
                          " failover")
7362
          self.failover = True
7363
        else:
7364
          result.Raise("Can't migrate, please use failover",
7365
                       prereq=True, ecode=errors.ECODE_STATE)
7366

    
7367
    assert not (self.failover and self.cleanup)
7368

    
7369
    if not self.failover:
7370
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7371
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7372
                                   " parameters are accepted",
7373
                                   errors.ECODE_INVAL)
7374
      if self.lu.op.live is not None:
7375
        if self.lu.op.live:
7376
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7377
        else:
7378
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7379
        # reset the 'live' parameter to None so that repeated
7380
        # invocations of CheckPrereq do not raise an exception
7381
        self.lu.op.live = None
7382
      elif self.lu.op.mode is None:
7383
        # read the default value from the hypervisor
7384
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7385
                                                skip_globals=False)
7386
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7387

    
7388
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7389
    else:
7390
      # Failover is never live
7391
      self.live = False
7392

    
7393
  def _RunAllocator(self):
7394
    """Run the allocator based on input opcode.
7395

7396
    """
7397
    ial = IAllocator(self.cfg, self.rpc,
7398
                     mode=constants.IALLOCATOR_MODE_RELOC,
7399
                     name=self.instance_name,
7400
                     # TODO See why hail breaks with a single node below
7401
                     relocate_from=[self.instance.primary_node,
7402
                                    self.instance.primary_node],
7403
                     )
7404

    
7405
    ial.Run(self.lu.op.iallocator)
7406

    
7407
    if not ial.success:
7408
      raise errors.OpPrereqError("Can't compute nodes using"
7409
                                 " iallocator '%s': %s" %
7410
                                 (self.lu.op.iallocator, ial.info),
7411
                                 errors.ECODE_NORES)
7412
    if len(ial.result) != ial.required_nodes:
7413
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7414
                                 " of nodes (%s), required %s" %
7415
                                 (self.lu.op.iallocator, len(ial.result),
7416
                                  ial.required_nodes), errors.ECODE_FAULT)
7417
    self.target_node = ial.result[0]
7418
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7419
                 self.instance_name, self.lu.op.iallocator,
7420
                 utils.CommaJoin(ial.result))
7421

    
7422
  def _WaitUntilSync(self):
7423
    """Poll with custom rpc for disk sync.
7424

7425
    This uses our own step-based rpc call.
7426

7427
    """
7428
    self.feedback_fn("* wait until resync is done")
7429
    all_done = False
7430
    while not all_done:
7431
      all_done = True
7432
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7433
                                            self.nodes_ip,
7434
                                            self.instance.disks)
7435
      min_percent = 100
7436
      for node, nres in result.items():
7437
        nres.Raise("Cannot resync disks on node %s" % node)
7438
        node_done, node_percent = nres.payload
7439
        all_done = all_done and node_done
7440
        if node_percent is not None:
7441
          min_percent = min(min_percent, node_percent)
7442
      if not all_done:
7443
        if min_percent < 100:
7444
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7445
        time.sleep(2)
7446

    
7447
  def _EnsureSecondary(self, node):
7448
    """Demote a node to secondary.
7449

7450
    """
7451
    self.feedback_fn("* switching node %s to secondary mode" % node)
7452

    
7453
    for dev in self.instance.disks:
7454
      self.cfg.SetDiskID(dev, node)
7455

    
7456
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7457
                                          self.instance.disks)
7458
    result.Raise("Cannot change disk to secondary on node %s" % node)
7459

    
7460
  def _GoStandalone(self):
7461
    """Disconnect from the network.
7462

7463
    """
7464
    self.feedback_fn("* changing into standalone mode")
7465
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7466
                                               self.instance.disks)
7467
    for node, nres in result.items():
7468
      nres.Raise("Cannot disconnect disks node %s" % node)
7469

    
7470
  def _GoReconnect(self, multimaster):
7471
    """Reconnect to the network.
7472

7473
    """
7474
    if multimaster:
7475
      msg = "dual-master"
7476
    else:
7477
      msg = "single-master"
7478
    self.feedback_fn("* changing disks into %s mode" % msg)
7479
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7480
                                           self.instance.disks,
7481
                                           self.instance.name, multimaster)
7482
    for node, nres in result.items():
7483
      nres.Raise("Cannot change disks config on node %s" % node)
7484

    
7485
  def _ExecCleanup(self):
7486
    """Try to cleanup after a failed migration.
7487

7488
    The cleanup is done by:
7489
      - check that the instance is running only on one node
7490
        (and update the config if needed)
7491
      - change disks on its secondary node to secondary
7492
      - wait until disks are fully synchronized
7493
      - disconnect from the network
7494
      - change disks into single-master mode
7495
      - wait again until disks are fully synchronized
7496

7497
    """
7498
    instance = self.instance
7499
    target_node = self.target_node
7500
    source_node = self.source_node
7501

    
7502
    # check running on only one node
7503
    self.feedback_fn("* checking where the instance actually runs"
7504
                     " (if this hangs, the hypervisor might be in"
7505
                     " a bad state)")
7506
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7507
    for node, result in ins_l.items():
7508
      result.Raise("Can't contact node %s" % node)
7509

    
7510
    runningon_source = instance.name in ins_l[source_node].payload
7511
    runningon_target = instance.name in ins_l[target_node].payload
7512

    
7513
    if runningon_source and runningon_target:
7514
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7515
                               " or the hypervisor is confused; you will have"
7516
                               " to ensure manually that it runs only on one"
7517
                               " and restart this operation")
7518

    
7519
    if not (runningon_source or runningon_target):
7520
      raise errors.OpExecError("Instance does not seem to be running at all;"
7521
                               " in this case it's safer to repair by"
7522
                               " running 'gnt-instance stop' to ensure disk"
7523
                               " shutdown, and then restarting it")
7524

    
7525
    if runningon_target:
7526
      # the migration has actually succeeded, we need to update the config
7527
      self.feedback_fn("* instance running on secondary node (%s),"
7528
                       " updating config" % target_node)
7529
      instance.primary_node = target_node
7530
      self.cfg.Update(instance, self.feedback_fn)
7531
      demoted_node = source_node
7532
    else:
7533
      self.feedback_fn("* instance confirmed to be running on its"
7534
                       " primary node (%s)" % source_node)
7535
      demoted_node = target_node
7536

    
7537
    if instance.disk_template in constants.DTS_INT_MIRROR:
7538
      self._EnsureSecondary(demoted_node)
7539
      try:
7540
        self._WaitUntilSync()
7541
      except errors.OpExecError:
7542
        # we ignore here errors, since if the device is standalone, it
7543
        # won't be able to sync
7544
        pass
7545
      self._GoStandalone()
7546
      self._GoReconnect(False)
7547
      self._WaitUntilSync()
7548

    
7549
    self.feedback_fn("* done")
7550

    
7551
  def _RevertDiskStatus(self):
7552
    """Try to revert the disk status after a failed migration.
7553

7554
    """
7555
    target_node = self.target_node
7556
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7557
      return
7558

    
7559
    try:
7560
      self._EnsureSecondary(target_node)
7561
      self._GoStandalone()
7562
      self._GoReconnect(False)
7563
      self._WaitUntilSync()
7564
    except errors.OpExecError, err:
7565
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7566
                         " please try to recover the instance manually;"
7567
                         " error '%s'" % str(err))
7568

    
7569
  def _AbortMigration(self):
7570
    """Call the hypervisor code to abort a started migration.
7571

7572
    """
7573
    instance = self.instance
7574
    target_node = self.target_node
7575
    source_node = self.source_node
7576
    migration_info = self.migration_info
7577

    
7578
    abort_result = self.rpc.call_instance_finalize_migration_dst(target_node,
7579
                                                                 instance,
7580
                                                                 migration_info,
7581
                                                                 False)
7582
    abort_msg = abort_result.fail_msg
7583
    if abort_msg:
7584
      logging.error("Aborting migration failed on target node %s: %s",
7585
                    target_node, abort_msg)
7586
      # Don't raise an exception here, as we stil have to try to revert the
7587
      # disk status, even if this step failed.
7588

    
7589
    abort_result = self.rpc.call_instance_finalize_migration_src(source_node,
7590
        instance, False, self.live)
7591
    abort_msg = abort_result.fail_msg
7592
    if abort_msg:
7593
      logging.error("Aborting migration failed on source node %s: %s",
7594
                    source_node, abort_msg)
7595

    
7596
  def _ExecMigration(self):
7597
    """Migrate an instance.
7598

7599
    The migrate is done by:
7600
      - change the disks into dual-master mode
7601
      - wait until disks are fully synchronized again
7602
      - migrate the instance
7603
      - change disks on the new secondary node (the old primary) to secondary
7604
      - wait until disks are fully synchronized
7605
      - change disks into single-master mode
7606

7607
    """
7608
    instance = self.instance
7609
    target_node = self.target_node
7610
    source_node = self.source_node
7611

    
7612
    # Check for hypervisor version mismatch and warn the user.
7613
    nodeinfo = self.rpc.call_node_info([source_node, target_node],
7614
                                       None, self.instance.hypervisor)
7615
    src_info = nodeinfo[source_node]
7616
    dst_info = nodeinfo[target_node]
7617

    
7618
    if ((constants.HV_NODEINFO_KEY_VERSION in src_info.payload) and
7619
        (constants.HV_NODEINFO_KEY_VERSION in dst_info.payload)):
7620
      src_version = src_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7621
      dst_version = dst_info.payload[constants.HV_NODEINFO_KEY_VERSION]
7622
      if src_version != dst_version:
7623
        self.feedback_fn("* warning: hypervisor version mismatch between"
7624
                         " source (%s) and target (%s) node" %
7625
                         (src_version, dst_version))
7626

    
7627
    self.feedback_fn("* checking disk consistency between source and target")
7628
    for dev in instance.disks:
7629
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7630
        raise errors.OpExecError("Disk %s is degraded or not fully"
7631
                                 " synchronized on target node,"
7632
                                 " aborting migration" % dev.iv_name)
7633

    
7634
    # First get the migration information from the remote node
7635
    result = self.rpc.call_migration_info(source_node, instance)
7636
    msg = result.fail_msg
7637
    if msg:
7638
      log_err = ("Failed fetching source migration information from %s: %s" %
7639
                 (source_node, msg))
7640
      logging.error(log_err)
7641
      raise errors.OpExecError(log_err)
7642

    
7643
    self.migration_info = migration_info = result.payload
7644

    
7645
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7646
      # Then switch the disks to master/master mode
7647
      self._EnsureSecondary(target_node)
7648
      self._GoStandalone()
7649
      self._GoReconnect(True)
7650
      self._WaitUntilSync()
7651

    
7652
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7653
    result = self.rpc.call_accept_instance(target_node,
7654
                                           instance,
7655
                                           migration_info,
7656
                                           self.nodes_ip[target_node])
7657

    
7658
    msg = result.fail_msg
7659
    if msg:
7660
      logging.error("Instance pre-migration failed, trying to revert"
7661
                    " disk status: %s", msg)
7662
      self.feedback_fn("Pre-migration failed, aborting")
7663
      self._AbortMigration()
7664
      self._RevertDiskStatus()
7665
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7666
                               (instance.name, msg))
7667

    
7668
    self.feedback_fn("* migrating instance to %s" % target_node)
7669
    result = self.rpc.call_instance_migrate(source_node, instance,
7670
                                            self.nodes_ip[target_node],
7671
                                            self.live)
7672
    msg = result.fail_msg
7673
    if msg:
7674
      logging.error("Instance migration failed, trying to revert"
7675
                    " disk status: %s", msg)
7676
      self.feedback_fn("Migration failed, aborting")
7677
      self._AbortMigration()
7678
      self._RevertDiskStatus()
7679
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7680
                               (instance.name, msg))
7681

    
7682
    self.feedback_fn("* starting memory transfer")
7683
    last_feedback = time.time()
7684
    while True:
7685
      result = self.rpc.call_instance_get_migration_status(source_node,
7686
                                                           instance)
7687
      msg = result.fail_msg
7688
      ms = result.payload   # MigrationStatus instance
7689
      if msg or (ms.status in constants.HV_MIGRATION_FAILED_STATUSES):
7690
        logging.error("Instance migration failed, trying to revert"
7691
                      " disk status: %s", msg)
7692
        self.feedback_fn("Migration failed, aborting")
7693
        self._AbortMigration()
7694
        self._RevertDiskStatus()
7695
        raise errors.OpExecError("Could not migrate instance %s: %s" %
7696
                                 (instance.name, msg))
7697

    
7698
      if result.payload.status != constants.HV_MIGRATION_ACTIVE:
7699
        self.feedback_fn("* memory transfer complete")
7700
        break
7701

    
7702
      if (utils.TimeoutExpired(last_feedback,
7703
                               self._MIGRATION_FEEDBACK_INTERVAL) and
7704
          ms.transferred_ram is not None):
7705
        mem_progress = 100 * float(ms.transferred_ram) / float(ms.total_ram)
7706
        self.feedback_fn("* memory transfer progress: %.2f %%" % mem_progress)
7707
        last_feedback = time.time()
7708

    
7709
      time.sleep(self._MIGRATION_POLL_INTERVAL)
7710

    
7711
    result = self.rpc.call_instance_finalize_migration_src(source_node,
7712
                                                           instance,
7713
                                                           True,
7714
                                                           self.live)
7715
    msg = result.fail_msg
7716
    if msg:
7717
      logging.error("Instance migration succeeded, but finalization failed"
7718
                    " on the source node: %s", msg)
7719
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7720
                               msg)
7721

    
7722
    instance.primary_node = target_node
7723

    
7724
    # distribute new instance config to the other nodes
7725
    self.cfg.Update(instance, self.feedback_fn)
7726

    
7727
    result = self.rpc.call_instance_finalize_migration_dst(target_node,
7728
                                                           instance,
7729
                                                           migration_info,
7730
                                                           True)
7731
    msg = result.fail_msg
7732
    if msg:
7733
      logging.error("Instance migration succeeded, but finalization failed"
7734
                    " on the target node: %s", msg)
7735
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7736
                               msg)
7737

    
7738
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7739
      self._EnsureSecondary(source_node)
7740
      self._WaitUntilSync()
7741
      self._GoStandalone()
7742
      self._GoReconnect(False)
7743
      self._WaitUntilSync()
7744

    
7745
    self.feedback_fn("* done")
7746

    
7747
  def _ExecFailover(self):
7748
    """Failover an instance.
7749

7750
    The failover is done by shutting it down on its present node and
7751
    starting it on the secondary.
7752

7753
    """
7754
    instance = self.instance
7755
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7756

    
7757
    source_node = instance.primary_node
7758
    target_node = self.target_node
7759

    
7760
    if instance.admin_up:
7761
      self.feedback_fn("* checking disk consistency between source and target")
7762
      for dev in instance.disks:
7763
        # for drbd, these are drbd over lvm
7764
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7765
          if primary_node.offline:
7766
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7767
                             " target node %s" %
7768
                             (primary_node.name, dev.iv_name, target_node))
7769
          elif not self.ignore_consistency:
7770
            raise errors.OpExecError("Disk %s is degraded on target node,"
7771
                                     " aborting failover" % dev.iv_name)
7772
    else:
7773
      self.feedback_fn("* not checking disk consistency as instance is not"
7774
                       " running")
7775

    
7776
    self.feedback_fn("* shutting down instance on source node")
7777
    logging.info("Shutting down instance %s on node %s",
7778
                 instance.name, source_node)
7779

    
7780
    result = self.rpc.call_instance_shutdown(source_node, instance,
7781
                                             self.shutdown_timeout)
7782
    msg = result.fail_msg
7783
    if msg:
7784
      if self.ignore_consistency or primary_node.offline:
7785
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7786
                           " proceeding anyway; please make sure node"
7787
                           " %s is down; error details: %s",
7788
                           instance.name, source_node, source_node, msg)
7789
      else:
7790
        raise errors.OpExecError("Could not shutdown instance %s on"
7791
                                 " node %s: %s" %
7792
                                 (instance.name, source_node, msg))
7793

    
7794
    self.feedback_fn("* deactivating the instance's disks on source node")
7795
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7796
      raise errors.OpExecError("Can't shut down the instance's disks")
7797

    
7798
    instance.primary_node = target_node
7799
    # distribute new instance config to the other nodes
7800
    self.cfg.Update(instance, self.feedback_fn)
7801

    
7802
    # Only start the instance if it's marked as up
7803
    if instance.admin_up:
7804
      self.feedback_fn("* activating the instance's disks on target node %s" %
7805
                       target_node)
7806
      logging.info("Starting instance %s on node %s",
7807
                   instance.name, target_node)
7808

    
7809
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7810
                                           ignore_secondaries=True)
7811
      if not disks_ok:
7812
        _ShutdownInstanceDisks(self.lu, instance)
7813
        raise errors.OpExecError("Can't activate the instance's disks")
7814

    
7815
      self.feedback_fn("* starting the instance on the target node %s" %
7816
                       target_node)
7817
      result = self.rpc.call_instance_start(target_node, (instance, None, None),
7818
                                            False)
7819
      msg = result.fail_msg
7820
      if msg:
7821
        _ShutdownInstanceDisks(self.lu, instance)
7822
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7823
                                 (instance.name, target_node, msg))
7824

    
7825
  def Exec(self, feedback_fn):
7826
    """Perform the migration.
7827

7828
    """
7829
    self.feedback_fn = feedback_fn
7830
    self.source_node = self.instance.primary_node
7831

    
7832
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7833
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7834
      self.target_node = self.instance.secondary_nodes[0]
7835
      # Otherwise self.target_node has been populated either
7836
      # directly, or through an iallocator.
7837

    
7838
    self.all_nodes = [self.source_node, self.target_node]
7839
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7840
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7841

    
7842
    if self.failover:
7843
      feedback_fn("Failover instance %s" % self.instance.name)
7844
      self._ExecFailover()
7845
    else:
7846
      feedback_fn("Migrating instance %s" % self.instance.name)
7847

    
7848
      if self.cleanup:
7849
        return self._ExecCleanup()
7850
      else:
7851
        return self._ExecMigration()
7852

    
7853

    
7854
def _CreateBlockDev(lu, node, instance, device, force_create,
7855
                    info, force_open):
7856
  """Create a tree of block devices on a given node.
7857

7858
  If this device type has to be created on secondaries, create it and
7859
  all its children.
7860

7861
  If not, just recurse to children keeping the same 'force' value.
7862

7863
  @param lu: the lu on whose behalf we execute
7864
  @param node: the node on which to create the device
7865
  @type instance: L{objects.Instance}
7866
  @param instance: the instance which owns the device
7867
  @type device: L{objects.Disk}
7868
  @param device: the device to create
7869
  @type force_create: boolean
7870
  @param force_create: whether to force creation of this device; this
7871
      will be change to True whenever we find a device which has
7872
      CreateOnSecondary() attribute
7873
  @param info: the extra 'metadata' we should attach to the device
7874
      (this will be represented as a LVM tag)
7875
  @type force_open: boolean
7876
  @param force_open: this parameter will be passes to the
7877
      L{backend.BlockdevCreate} function where it specifies
7878
      whether we run on primary or not, and it affects both
7879
      the child assembly and the device own Open() execution
7880

7881
  """
7882
  if device.CreateOnSecondary():
7883
    force_create = True
7884

    
7885
  if device.children:
7886
    for child in device.children:
7887
      _CreateBlockDev(lu, node, instance, child, force_create,
7888
                      info, force_open)
7889

    
7890
  if not force_create:
7891
    return
7892

    
7893
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7894

    
7895

    
7896
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7897
  """Create a single block device on a given node.
7898

7899
  This will not recurse over children of the device, so they must be
7900
  created in advance.
7901

7902
  @param lu: the lu on whose behalf we execute
7903
  @param node: the node on which to create the device
7904
  @type instance: L{objects.Instance}
7905
  @param instance: the instance which owns the device
7906
  @type device: L{objects.Disk}
7907
  @param device: the device to create
7908
  @param info: the extra 'metadata' we should attach to the device
7909
      (this will be represented as a LVM tag)
7910
  @type force_open: boolean
7911
  @param force_open: this parameter will be passes to the
7912
      L{backend.BlockdevCreate} function where it specifies
7913
      whether we run on primary or not, and it affects both
7914
      the child assembly and the device own Open() execution
7915

7916
  """
7917
  lu.cfg.SetDiskID(device, node)
7918
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7919
                                       instance.name, force_open, info)
7920
  result.Raise("Can't create block device %s on"
7921
               " node %s for instance %s" % (device, node, instance.name))
7922
  if device.physical_id is None:
7923
    device.physical_id = result.payload
7924

    
7925

    
7926
def _GenerateUniqueNames(lu, exts):
7927
  """Generate a suitable LV name.
7928

7929
  This will generate a logical volume name for the given instance.
7930

7931
  """
7932
  results = []
7933
  for val in exts:
7934
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7935
    results.append("%s%s" % (new_id, val))
7936
  return results
7937

    
7938

    
7939
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7940
                         iv_name, p_minor, s_minor):
7941
  """Generate a drbd8 device complete with its children.
7942

7943
  """
7944
  assert len(vgnames) == len(names) == 2
7945
  port = lu.cfg.AllocatePort()
7946
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7947
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7948
                          logical_id=(vgnames[0], names[0]))
7949
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
7950
                          logical_id=(vgnames[1], names[1]))
7951
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7952
                          logical_id=(primary, secondary, port,
7953
                                      p_minor, s_minor,
7954
                                      shared_secret),
7955
                          children=[dev_data, dev_meta],
7956
                          iv_name=iv_name)
7957
  return drbd_dev
7958

    
7959

    
7960
def _GenerateDiskTemplate(lu, template_name,
7961
                          instance_name, primary_node,
7962
                          secondary_nodes, disk_info,
7963
                          file_storage_dir, file_driver,
7964
                          base_index, feedback_fn):
7965
  """Generate the entire disk layout for a given template type.
7966

7967
  """
7968
  #TODO: compute space requirements
7969

    
7970
  vgname = lu.cfg.GetVGName()
7971
  disk_count = len(disk_info)
7972
  disks = []
7973
  if template_name == constants.DT_DISKLESS:
7974
    pass
7975
  elif template_name == constants.DT_PLAIN:
7976
    if len(secondary_nodes) != 0:
7977
      raise errors.ProgrammerError("Wrong template configuration")
7978

    
7979
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7980
                                      for i in range(disk_count)])
7981
    for idx, disk in enumerate(disk_info):
7982
      disk_index = idx + base_index
7983
      vg = disk.get(constants.IDISK_VG, vgname)
7984
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7985
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7986
                              size=disk[constants.IDISK_SIZE],
7987
                              logical_id=(vg, names[idx]),
7988
                              iv_name="disk/%d" % disk_index,
7989
                              mode=disk[constants.IDISK_MODE])
7990
      disks.append(disk_dev)
7991
  elif template_name == constants.DT_DRBD8:
7992
    if len(secondary_nodes) != 1:
7993
      raise errors.ProgrammerError("Wrong template configuration")
7994
    remote_node = secondary_nodes[0]
7995
    minors = lu.cfg.AllocateDRBDMinor(
7996
      [primary_node, remote_node] * len(disk_info), instance_name)
7997

    
7998
    names = []
7999
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
8000
                                               for i in range(disk_count)]):
8001
      names.append(lv_prefix + "_data")
8002
      names.append(lv_prefix + "_meta")
8003
    for idx, disk in enumerate(disk_info):
8004
      disk_index = idx + base_index
8005
      data_vg = disk.get(constants.IDISK_VG, vgname)
8006
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
8007
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
8008
                                      disk[constants.IDISK_SIZE],
8009
                                      [data_vg, meta_vg],
8010
                                      names[idx * 2:idx * 2 + 2],
8011
                                      "disk/%d" % disk_index,
8012
                                      minors[idx * 2], minors[idx * 2 + 1])
8013
      disk_dev.mode = disk[constants.IDISK_MODE]
8014
      disks.append(disk_dev)
8015
  elif template_name == constants.DT_FILE:
8016
    if len(secondary_nodes) != 0:
8017
      raise errors.ProgrammerError("Wrong template configuration")
8018

    
8019
    opcodes.RequireFileStorage()
8020

    
8021
    for idx, disk in enumerate(disk_info):
8022
      disk_index = idx + base_index
8023
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8024
                              size=disk[constants.IDISK_SIZE],
8025
                              iv_name="disk/%d" % disk_index,
8026
                              logical_id=(file_driver,
8027
                                          "%s/disk%d" % (file_storage_dir,
8028
                                                         disk_index)),
8029
                              mode=disk[constants.IDISK_MODE])
8030
      disks.append(disk_dev)
8031
  elif template_name == constants.DT_SHARED_FILE:
8032
    if len(secondary_nodes) != 0:
8033
      raise errors.ProgrammerError("Wrong template configuration")
8034

    
8035
    opcodes.RequireSharedFileStorage()
8036

    
8037
    for idx, disk in enumerate(disk_info):
8038
      disk_index = idx + base_index
8039
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
8040
                              size=disk[constants.IDISK_SIZE],
8041
                              iv_name="disk/%d" % disk_index,
8042
                              logical_id=(file_driver,
8043
                                          "%s/disk%d" % (file_storage_dir,
8044
                                                         disk_index)),
8045
                              mode=disk[constants.IDISK_MODE])
8046
      disks.append(disk_dev)
8047
  elif template_name == constants.DT_BLOCK:
8048
    if len(secondary_nodes) != 0:
8049
      raise errors.ProgrammerError("Wrong template configuration")
8050

    
8051
    for idx, disk in enumerate(disk_info):
8052
      disk_index = idx + base_index
8053
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
8054
                              size=disk[constants.IDISK_SIZE],
8055
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
8056
                                          disk[constants.IDISK_ADOPT]),
8057
                              iv_name="disk/%d" % disk_index,
8058
                              mode=disk[constants.IDISK_MODE])
8059
      disks.append(disk_dev)
8060

    
8061
  else:
8062
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
8063
  return disks
8064

    
8065

    
8066
def _GetInstanceInfoText(instance):
8067
  """Compute that text that should be added to the disk's metadata.
8068

8069
  """
8070
  return "originstname+%s" % instance.name
8071

    
8072

    
8073
def _CalcEta(time_taken, written, total_size):
8074
  """Calculates the ETA based on size written and total size.
8075

8076
  @param time_taken: The time taken so far
8077
  @param written: amount written so far
8078
  @param total_size: The total size of data to be written
8079
  @return: The remaining time in seconds
8080

8081
  """
8082
  avg_time = time_taken / float(written)
8083
  return (total_size - written) * avg_time
8084

    
8085

    
8086
def _WipeDisks(lu, instance):
8087
  """Wipes instance disks.
8088

8089
  @type lu: L{LogicalUnit}
8090
  @param lu: the logical unit on whose behalf we execute
8091
  @type instance: L{objects.Instance}
8092
  @param instance: the instance whose disks we should create
8093
  @return: the success of the wipe
8094

8095
  """
8096
  node = instance.primary_node
8097

    
8098
  for device in instance.disks:
8099
    lu.cfg.SetDiskID(device, node)
8100

    
8101
  logging.info("Pause sync of instance %s disks", instance.name)
8102
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
8103

    
8104
  for idx, success in enumerate(result.payload):
8105
    if not success:
8106
      logging.warn("pause-sync of instance %s for disks %d failed",
8107
                   instance.name, idx)
8108

    
8109
  try:
8110
    for idx, device in enumerate(instance.disks):
8111
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
8112
      # MAX_WIPE_CHUNK at max
8113
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
8114
                            constants.MIN_WIPE_CHUNK_PERCENT)
8115
      # we _must_ make this an int, otherwise rounding errors will
8116
      # occur
8117
      wipe_chunk_size = int(wipe_chunk_size)
8118

    
8119
      lu.LogInfo("* Wiping disk %d", idx)
8120
      logging.info("Wiping disk %d for instance %s, node %s using"
8121
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
8122

    
8123
      offset = 0
8124
      size = device.size
8125
      last_output = 0
8126
      start_time = time.time()
8127

    
8128
      while offset < size:
8129
        wipe_size = min(wipe_chunk_size, size - offset)
8130
        logging.debug("Wiping disk %d, offset %s, chunk %s",
8131
                      idx, offset, wipe_size)
8132
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
8133
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
8134
                     (idx, offset, wipe_size))
8135
        now = time.time()
8136
        offset += wipe_size
8137
        if now - last_output >= 60:
8138
          eta = _CalcEta(now - start_time, offset, size)
8139
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
8140
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
8141
          last_output = now
8142
  finally:
8143
    logging.info("Resume sync of instance %s disks", instance.name)
8144

    
8145
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
8146

    
8147
    for idx, success in enumerate(result.payload):
8148
      if not success:
8149
        lu.LogWarning("Resume sync of disk %d failed, please have a"
8150
                      " look at the status and troubleshoot the issue", idx)
8151
        logging.warn("resume-sync of instance %s for disks %d failed",
8152
                     instance.name, idx)
8153

    
8154

    
8155
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
8156
  """Create all disks for an instance.
8157

8158
  This abstracts away some work from AddInstance.
8159

8160
  @type lu: L{LogicalUnit}
8161
  @param lu: the logical unit on whose behalf we execute
8162
  @type instance: L{objects.Instance}
8163
  @param instance: the instance whose disks we should create
8164
  @type to_skip: list
8165
  @param to_skip: list of indices to skip
8166
  @type target_node: string
8167
  @param target_node: if passed, overrides the target node for creation
8168
  @rtype: boolean
8169
  @return: the success of the creation
8170

8171
  """
8172
  info = _GetInstanceInfoText(instance)
8173
  if target_node is None:
8174
    pnode = instance.primary_node
8175
    all_nodes = instance.all_nodes
8176
  else:
8177
    pnode = target_node
8178
    all_nodes = [pnode]
8179

    
8180
  if instance.disk_template in constants.DTS_FILEBASED:
8181
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8182
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
8183

    
8184
    result.Raise("Failed to create directory '%s' on"
8185
                 " node %s" % (file_storage_dir, pnode))
8186

    
8187
  # Note: this needs to be kept in sync with adding of disks in
8188
  # LUInstanceSetParams
8189
  for idx, device in enumerate(instance.disks):
8190
    if to_skip and idx in to_skip:
8191
      continue
8192
    logging.info("Creating volume %s for instance %s",
8193
                 device.iv_name, instance.name)
8194
    #HARDCODE
8195
    for node in all_nodes:
8196
      f_create = node == pnode
8197
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
8198

    
8199

    
8200
def _RemoveDisks(lu, instance, target_node=None):
8201
  """Remove all disks for an instance.
8202

8203
  This abstracts away some work from `AddInstance()` and
8204
  `RemoveInstance()`. Note that in case some of the devices couldn't
8205
  be removed, the removal will continue with the other ones (compare
8206
  with `_CreateDisks()`).
8207

8208
  @type lu: L{LogicalUnit}
8209
  @param lu: the logical unit on whose behalf we execute
8210
  @type instance: L{objects.Instance}
8211
  @param instance: the instance whose disks we should remove
8212
  @type target_node: string
8213
  @param target_node: used to override the node on which to remove the disks
8214
  @rtype: boolean
8215
  @return: the success of the removal
8216

8217
  """
8218
  logging.info("Removing block devices for instance %s", instance.name)
8219

    
8220
  all_result = True
8221
  for device in instance.disks:
8222
    if target_node:
8223
      edata = [(target_node, device)]
8224
    else:
8225
      edata = device.ComputeNodeTree(instance.primary_node)
8226
    for node, disk in edata:
8227
      lu.cfg.SetDiskID(disk, node)
8228
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
8229
      if msg:
8230
        lu.LogWarning("Could not remove block device %s on node %s,"
8231
                      " continuing anyway: %s", device.iv_name, node, msg)
8232
        all_result = False
8233

    
8234
  if instance.disk_template == constants.DT_FILE:
8235
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
8236
    if target_node:
8237
      tgt = target_node
8238
    else:
8239
      tgt = instance.primary_node
8240
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
8241
    if result.fail_msg:
8242
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
8243
                    file_storage_dir, instance.primary_node, result.fail_msg)
8244
      all_result = False
8245

    
8246
  return all_result
8247

    
8248

    
8249
def _ComputeDiskSizePerVG(disk_template, disks):
8250
  """Compute disk size requirements in the volume group
8251

8252
  """
8253
  def _compute(disks, payload):
8254
    """Universal algorithm.
8255

8256
    """
8257
    vgs = {}
8258
    for disk in disks:
8259
      vgs[disk[constants.IDISK_VG]] = \
8260
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
8261

    
8262
    return vgs
8263

    
8264
  # Required free disk space as a function of disk and swap space
8265
  req_size_dict = {
8266
    constants.DT_DISKLESS: {},
8267
    constants.DT_PLAIN: _compute(disks, 0),
8268
    # 128 MB are added for drbd metadata for each disk
8269
    constants.DT_DRBD8: _compute(disks, DRBD_META_SIZE),
8270
    constants.DT_FILE: {},
8271
    constants.DT_SHARED_FILE: {},
8272
  }
8273

    
8274
  if disk_template not in req_size_dict:
8275
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8276
                                 " is unknown" % disk_template)
8277

    
8278
  return req_size_dict[disk_template]
8279

    
8280

    
8281
def _ComputeDiskSize(disk_template, disks):
8282
  """Compute disk size requirements in the volume group
8283

8284
  """
8285
  # Required free disk space as a function of disk and swap space
8286
  req_size_dict = {
8287
    constants.DT_DISKLESS: None,
8288
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
8289
    # 128 MB are added for drbd metadata for each disk
8290
    constants.DT_DRBD8:
8291
      sum(d[constants.IDISK_SIZE] + DRBD_META_SIZE for d in disks),
8292
    constants.DT_FILE: None,
8293
    constants.DT_SHARED_FILE: 0,
8294
    constants.DT_BLOCK: 0,
8295
  }
8296

    
8297
  if disk_template not in req_size_dict:
8298
    raise errors.ProgrammerError("Disk template '%s' size requirement"
8299
                                 " is unknown" % disk_template)
8300

    
8301
  return req_size_dict[disk_template]
8302

    
8303

    
8304
def _FilterVmNodes(lu, nodenames):
8305
  """Filters out non-vm_capable nodes from a list.
8306

8307
  @type lu: L{LogicalUnit}
8308
  @param lu: the logical unit for which we check
8309
  @type nodenames: list
8310
  @param nodenames: the list of nodes on which we should check
8311
  @rtype: list
8312
  @return: the list of vm-capable nodes
8313

8314
  """
8315
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
8316
  return [name for name in nodenames if name not in vm_nodes]
8317

    
8318

    
8319
def _CheckHVParams(lu, nodenames, hvname, hvparams):
8320
  """Hypervisor parameter validation.
8321

8322
  This function abstract the hypervisor parameter validation to be
8323
  used in both instance create and instance modify.
8324

8325
  @type lu: L{LogicalUnit}
8326
  @param lu: the logical unit for which we check
8327
  @type nodenames: list
8328
  @param nodenames: the list of nodes on which we should check
8329
  @type hvname: string
8330
  @param hvname: the name of the hypervisor we should use
8331
  @type hvparams: dict
8332
  @param hvparams: the parameters which we need to check
8333
  @raise errors.OpPrereqError: if the parameters are not valid
8334

8335
  """
8336
  nodenames = _FilterVmNodes(lu, nodenames)
8337

    
8338
  cluster = lu.cfg.GetClusterInfo()
8339
  hvfull = objects.FillDict(cluster.hvparams.get(hvname, {}), hvparams)
8340

    
8341
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames, hvname, hvfull)
8342
  for node in nodenames:
8343
    info = hvinfo[node]
8344
    if info.offline:
8345
      continue
8346
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8347

    
8348

    
8349
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8350
  """OS parameters validation.
8351

8352
  @type lu: L{LogicalUnit}
8353
  @param lu: the logical unit for which we check
8354
  @type required: boolean
8355
  @param required: whether the validation should fail if the OS is not
8356
      found
8357
  @type nodenames: list
8358
  @param nodenames: the list of nodes on which we should check
8359
  @type osname: string
8360
  @param osname: the name of the hypervisor we should use
8361
  @type osparams: dict
8362
  @param osparams: the parameters which we need to check
8363
  @raise errors.OpPrereqError: if the parameters are not valid
8364

8365
  """
8366
  nodenames = _FilterVmNodes(lu, nodenames)
8367
  result = lu.rpc.call_os_validate(nodenames, required, osname,
8368
                                   [constants.OS_VALIDATE_PARAMETERS],
8369
                                   osparams)
8370
  for node, nres in result.items():
8371
    # we don't check for offline cases since this should be run only
8372
    # against the master node and/or an instance's nodes
8373
    nres.Raise("OS Parameters validation failed on node %s" % node)
8374
    if not nres.payload:
8375
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8376
                 osname, node)
8377

    
8378

    
8379
class LUInstanceCreate(LogicalUnit):
8380
  """Create an instance.
8381

8382
  """
8383
  HPATH = "instance-add"
8384
  HTYPE = constants.HTYPE_INSTANCE
8385
  REQ_BGL = False
8386

    
8387
  def CheckArguments(self):
8388
    """Check arguments.
8389

8390
    """
8391
    # do not require name_check to ease forward/backward compatibility
8392
    # for tools
8393
    if self.op.no_install and self.op.start:
8394
      self.LogInfo("No-installation mode selected, disabling startup")
8395
      self.op.start = False
8396
    # validate/normalize the instance name
8397
    self.op.instance_name = \
8398
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8399

    
8400
    if self.op.ip_check and not self.op.name_check:
8401
      # TODO: make the ip check more flexible and not depend on the name check
8402
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8403
                                 " check", errors.ECODE_INVAL)
8404

    
8405
    # check nics' parameter names
8406
    for nic in self.op.nics:
8407
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8408

    
8409
    # check disks. parameter names and consistent adopt/no-adopt strategy
8410
    has_adopt = has_no_adopt = False
8411
    for disk in self.op.disks:
8412
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8413
      if constants.IDISK_ADOPT in disk:
8414
        has_adopt = True
8415
      else:
8416
        has_no_adopt = True
8417
    if has_adopt and has_no_adopt:
8418
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8419
                                 errors.ECODE_INVAL)
8420
    if has_adopt:
8421
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8422
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8423
                                   " '%s' disk template" %
8424
                                   self.op.disk_template,
8425
                                   errors.ECODE_INVAL)
8426
      if self.op.iallocator is not None:
8427
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8428
                                   " iallocator script", errors.ECODE_INVAL)
8429
      if self.op.mode == constants.INSTANCE_IMPORT:
8430
        raise errors.OpPrereqError("Disk adoption not allowed for"
8431
                                   " instance import", errors.ECODE_INVAL)
8432
    else:
8433
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8434
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8435
                                   " but no 'adopt' parameter given" %
8436
                                   self.op.disk_template,
8437
                                   errors.ECODE_INVAL)
8438

    
8439
    self.adopt_disks = has_adopt
8440

    
8441
    # instance name verification
8442
    if self.op.name_check:
8443
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8444
      self.op.instance_name = self.hostname1.name
8445
      # used in CheckPrereq for ip ping check
8446
      self.check_ip = self.hostname1.ip
8447
    else:
8448
      self.check_ip = None
8449

    
8450
    # file storage checks
8451
    if (self.op.file_driver and
8452
        not self.op.file_driver in constants.FILE_DRIVER):
8453
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8454
                                 self.op.file_driver, errors.ECODE_INVAL)
8455

    
8456
    if self.op.disk_template == constants.DT_FILE:
8457
      opcodes.RequireFileStorage()
8458
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8459
      opcodes.RequireSharedFileStorage()
8460

    
8461
    ### Node/iallocator related checks
8462
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8463

    
8464
    if self.op.pnode is not None:
8465
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8466
        if self.op.snode is None:
8467
          raise errors.OpPrereqError("The networked disk templates need"
8468
                                     " a mirror node", errors.ECODE_INVAL)
8469
      elif self.op.snode:
8470
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8471
                        " template")
8472
        self.op.snode = None
8473

    
8474
    self._cds = _GetClusterDomainSecret()
8475

    
8476
    if self.op.mode == constants.INSTANCE_IMPORT:
8477
      # On import force_variant must be True, because if we forced it at
8478
      # initial install, our only chance when importing it back is that it
8479
      # works again!
8480
      self.op.force_variant = True
8481

    
8482
      if self.op.no_install:
8483
        self.LogInfo("No-installation mode has no effect during import")
8484

    
8485
    elif self.op.mode == constants.INSTANCE_CREATE:
8486
      if self.op.os_type is None:
8487
        raise errors.OpPrereqError("No guest OS specified",
8488
                                   errors.ECODE_INVAL)
8489
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8490
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8491
                                   " installation" % self.op.os_type,
8492
                                   errors.ECODE_STATE)
8493
      if self.op.disk_template is None:
8494
        raise errors.OpPrereqError("No disk template specified",
8495
                                   errors.ECODE_INVAL)
8496

    
8497
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8498
      # Check handshake to ensure both clusters have the same domain secret
8499
      src_handshake = self.op.source_handshake
8500
      if not src_handshake:
8501
        raise errors.OpPrereqError("Missing source handshake",
8502
                                   errors.ECODE_INVAL)
8503

    
8504
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8505
                                                           src_handshake)
8506
      if errmsg:
8507
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8508
                                   errors.ECODE_INVAL)
8509

    
8510
      # Load and check source CA
8511
      self.source_x509_ca_pem = self.op.source_x509_ca
8512
      if not self.source_x509_ca_pem:
8513
        raise errors.OpPrereqError("Missing source X509 CA",
8514
                                   errors.ECODE_INVAL)
8515

    
8516
      try:
8517
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8518
                                                    self._cds)
8519
      except OpenSSL.crypto.Error, err:
8520
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8521
                                   (err, ), errors.ECODE_INVAL)
8522

    
8523
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8524
      if errcode is not None:
8525
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8526
                                   errors.ECODE_INVAL)
8527

    
8528
      self.source_x509_ca = cert
8529

    
8530
      src_instance_name = self.op.source_instance_name
8531
      if not src_instance_name:
8532
        raise errors.OpPrereqError("Missing source instance name",
8533
                                   errors.ECODE_INVAL)
8534

    
8535
      self.source_instance_name = \
8536
          netutils.GetHostname(name=src_instance_name).name
8537

    
8538
    else:
8539
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8540
                                 self.op.mode, errors.ECODE_INVAL)
8541

    
8542
  def ExpandNames(self):
8543
    """ExpandNames for CreateInstance.
8544

8545
    Figure out the right locks for instance creation.
8546

8547
    """
8548
    self.needed_locks = {}
8549

    
8550
    instance_name = self.op.instance_name
8551
    # this is just a preventive check, but someone might still add this
8552
    # instance in the meantime, and creation will fail at lock-add time
8553
    if instance_name in self.cfg.GetInstanceList():
8554
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8555
                                 instance_name, errors.ECODE_EXISTS)
8556

    
8557
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8558

    
8559
    if self.op.iallocator:
8560
      # TODO: Find a solution to not lock all nodes in the cluster, e.g. by
8561
      # specifying a group on instance creation and then selecting nodes from
8562
      # that group
8563
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8564
      self.needed_locks[locking.LEVEL_NODE_RES] = locking.ALL_SET
8565
    else:
8566
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8567
      nodelist = [self.op.pnode]
8568
      if self.op.snode is not None:
8569
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8570
        nodelist.append(self.op.snode)
8571
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8572
      # Lock resources of instance's primary and secondary nodes (copy to
8573
      # prevent accidential modification)
8574
      self.needed_locks[locking.LEVEL_NODE_RES] = list(nodelist)
8575

    
8576
    # in case of import lock the source node too
8577
    if self.op.mode == constants.INSTANCE_IMPORT:
8578
      src_node = self.op.src_node
8579
      src_path = self.op.src_path
8580

    
8581
      if src_path is None:
8582
        self.op.src_path = src_path = self.op.instance_name
8583

    
8584
      if src_node is None:
8585
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8586
        self.op.src_node = None
8587
        if os.path.isabs(src_path):
8588
          raise errors.OpPrereqError("Importing an instance from a path"
8589
                                     " requires a source node option",
8590
                                     errors.ECODE_INVAL)
8591
      else:
8592
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8593
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8594
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8595
        if not os.path.isabs(src_path):
8596
          self.op.src_path = src_path = \
8597
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8598

    
8599
  def _RunAllocator(self):
8600
    """Run the allocator based on input opcode.
8601

8602
    """
8603
    nics = [n.ToDict() for n in self.nics]
8604
    ial = IAllocator(self.cfg, self.rpc,
8605
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8606
                     name=self.op.instance_name,
8607
                     disk_template=self.op.disk_template,
8608
                     tags=self.op.tags,
8609
                     os=self.op.os_type,
8610
                     vcpus=self.be_full[constants.BE_VCPUS],
8611
                     memory=self.be_full[constants.BE_MEMORY],
8612
                     disks=self.disks,
8613
                     nics=nics,
8614
                     hypervisor=self.op.hypervisor,
8615
                     )
8616

    
8617
    ial.Run(self.op.iallocator)
8618

    
8619
    if not ial.success:
8620
      raise errors.OpPrereqError("Can't compute nodes using"
8621
                                 " iallocator '%s': %s" %
8622
                                 (self.op.iallocator, ial.info),
8623
                                 errors.ECODE_NORES)
8624
    if len(ial.result) != ial.required_nodes:
8625
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8626
                                 " of nodes (%s), required %s" %
8627
                                 (self.op.iallocator, len(ial.result),
8628
                                  ial.required_nodes), errors.ECODE_FAULT)
8629
    self.op.pnode = ial.result[0]
8630
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8631
                 self.op.instance_name, self.op.iallocator,
8632
                 utils.CommaJoin(ial.result))
8633
    if ial.required_nodes == 2:
8634
      self.op.snode = ial.result[1]
8635

    
8636
  def BuildHooksEnv(self):
8637
    """Build hooks env.
8638

8639
    This runs on master, primary and secondary nodes of the instance.
8640

8641
    """
8642
    env = {
8643
      "ADD_MODE": self.op.mode,
8644
      }
8645
    if self.op.mode == constants.INSTANCE_IMPORT:
8646
      env["SRC_NODE"] = self.op.src_node
8647
      env["SRC_PATH"] = self.op.src_path
8648
      env["SRC_IMAGES"] = self.src_images
8649

    
8650
    env.update(_BuildInstanceHookEnv(
8651
      name=self.op.instance_name,
8652
      primary_node=self.op.pnode,
8653
      secondary_nodes=self.secondaries,
8654
      status=self.op.start,
8655
      os_type=self.op.os_type,
8656
      memory=self.be_full[constants.BE_MEMORY],
8657
      vcpus=self.be_full[constants.BE_VCPUS],
8658
      nics=_NICListToTuple(self, self.nics),
8659
      disk_template=self.op.disk_template,
8660
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8661
             for d in self.disks],
8662
      bep=self.be_full,
8663
      hvp=self.hv_full,
8664
      hypervisor_name=self.op.hypervisor,
8665
      tags=self.op.tags,
8666
    ))
8667

    
8668
    return env
8669

    
8670
  def BuildHooksNodes(self):
8671
    """Build hooks nodes.
8672

8673
    """
8674
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8675
    return nl, nl
8676

    
8677
  def _ReadExportInfo(self):
8678
    """Reads the export information from disk.
8679

8680
    It will override the opcode source node and path with the actual
8681
    information, if these two were not specified before.
8682

8683
    @return: the export information
8684

8685
    """
8686
    assert self.op.mode == constants.INSTANCE_IMPORT
8687

    
8688
    src_node = self.op.src_node
8689
    src_path = self.op.src_path
8690

    
8691
    if src_node is None:
8692
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8693
      exp_list = self.rpc.call_export_list(locked_nodes)
8694
      found = False
8695
      for node in exp_list:
8696
        if exp_list[node].fail_msg:
8697
          continue
8698
        if src_path in exp_list[node].payload:
8699
          found = True
8700
          self.op.src_node = src_node = node
8701
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8702
                                                       src_path)
8703
          break
8704
      if not found:
8705
        raise errors.OpPrereqError("No export found for relative path %s" %
8706
                                    src_path, errors.ECODE_INVAL)
8707

    
8708
    _CheckNodeOnline(self, src_node)
8709
    result = self.rpc.call_export_info(src_node, src_path)
8710
    result.Raise("No export or invalid export found in dir %s" % src_path)
8711

    
8712
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8713
    if not export_info.has_section(constants.INISECT_EXP):
8714
      raise errors.ProgrammerError("Corrupted export config",
8715
                                   errors.ECODE_ENVIRON)
8716

    
8717
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8718
    if (int(ei_version) != constants.EXPORT_VERSION):
8719
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8720
                                 (ei_version, constants.EXPORT_VERSION),
8721
                                 errors.ECODE_ENVIRON)
8722
    return export_info
8723

    
8724
  def _ReadExportParams(self, einfo):
8725
    """Use export parameters as defaults.
8726

8727
    In case the opcode doesn't specify (as in override) some instance
8728
    parameters, then try to use them from the export information, if
8729
    that declares them.
8730

8731
    """
8732
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8733

    
8734
    if self.op.disk_template is None:
8735
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8736
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8737
                                          "disk_template")
8738
        if self.op.disk_template not in constants.DISK_TEMPLATES:
8739
          raise errors.OpPrereqError("Disk template specified in configuration"
8740
                                     " file is not one of the allowed values:"
8741
                                     " %s" % " ".join(constants.DISK_TEMPLATES))
8742
      else:
8743
        raise errors.OpPrereqError("No disk template specified and the export"
8744
                                   " is missing the disk_template information",
8745
                                   errors.ECODE_INVAL)
8746

    
8747
    if not self.op.disks:
8748
      disks = []
8749
      # TODO: import the disk iv_name too
8750
      for idx in range(constants.MAX_DISKS):
8751
        if einfo.has_option(constants.INISECT_INS, "disk%d_size" % idx):
8752
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8753
          disks.append({constants.IDISK_SIZE: disk_sz})
8754
      self.op.disks = disks
8755
      if not disks and self.op.disk_template != constants.DT_DISKLESS:
8756
        raise errors.OpPrereqError("No disk info specified and the export"
8757
                                   " is missing the disk information",
8758
                                   errors.ECODE_INVAL)
8759

    
8760
    if not self.op.nics:
8761
      nics = []
8762
      for idx in range(constants.MAX_NICS):
8763
        if einfo.has_option(constants.INISECT_INS, "nic%d_mac" % idx):
8764
          ndict = {}
8765
          for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8766
            v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8767
            ndict[name] = v
8768
          nics.append(ndict)
8769
        else:
8770
          break
8771
      self.op.nics = nics
8772

    
8773
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8774
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8775

    
8776
    if (self.op.hypervisor is None and
8777
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8778
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8779

    
8780
    if einfo.has_section(constants.INISECT_HYP):
8781
      # use the export parameters but do not override the ones
8782
      # specified by the user
8783
      for name, value in einfo.items(constants.INISECT_HYP):
8784
        if name not in self.op.hvparams:
8785
          self.op.hvparams[name] = value
8786

    
8787
    if einfo.has_section(constants.INISECT_BEP):
8788
      # use the parameters, without overriding
8789
      for name, value in einfo.items(constants.INISECT_BEP):
8790
        if name not in self.op.beparams:
8791
          self.op.beparams[name] = value
8792
    else:
8793
      # try to read the parameters old style, from the main section
8794
      for name in constants.BES_PARAMETERS:
8795
        if (name not in self.op.beparams and
8796
            einfo.has_option(constants.INISECT_INS, name)):
8797
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8798

    
8799
    if einfo.has_section(constants.INISECT_OSP):
8800
      # use the parameters, without overriding
8801
      for name, value in einfo.items(constants.INISECT_OSP):
8802
        if name not in self.op.osparams:
8803
          self.op.osparams[name] = value
8804

    
8805
  def _RevertToDefaults(self, cluster):
8806
    """Revert the instance parameters to the default values.
8807

8808
    """
8809
    # hvparams
8810
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8811
    for name in self.op.hvparams.keys():
8812
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8813
        del self.op.hvparams[name]
8814
    # beparams
8815
    be_defs = cluster.SimpleFillBE({})
8816
    for name in self.op.beparams.keys():
8817
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8818
        del self.op.beparams[name]
8819
    # nic params
8820
    nic_defs = cluster.SimpleFillNIC({})
8821
    for nic in self.op.nics:
8822
      for name in constants.NICS_PARAMETERS:
8823
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8824
          del nic[name]
8825
    # osparams
8826
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8827
    for name in self.op.osparams.keys():
8828
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8829
        del self.op.osparams[name]
8830

    
8831
  def _CalculateFileStorageDir(self):
8832
    """Calculate final instance file storage dir.
8833

8834
    """
8835
    # file storage dir calculation/check
8836
    self.instance_file_storage_dir = None
8837
    if self.op.disk_template in constants.DTS_FILEBASED:
8838
      # build the full file storage dir path
8839
      joinargs = []
8840

    
8841
      if self.op.disk_template == constants.DT_SHARED_FILE:
8842
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8843
      else:
8844
        get_fsd_fn = self.cfg.GetFileStorageDir
8845

    
8846
      cfg_storagedir = get_fsd_fn()
8847
      if not cfg_storagedir:
8848
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8849
      joinargs.append(cfg_storagedir)
8850

    
8851
      if self.op.file_storage_dir is not None:
8852
        joinargs.append(self.op.file_storage_dir)
8853

    
8854
      joinargs.append(self.op.instance_name)
8855

    
8856
      # pylint: disable=W0142
8857
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8858

    
8859
  def CheckPrereq(self):
8860
    """Check prerequisites.
8861

8862
    """
8863
    self._CalculateFileStorageDir()
8864

    
8865
    if self.op.mode == constants.INSTANCE_IMPORT:
8866
      export_info = self._ReadExportInfo()
8867
      self._ReadExportParams(export_info)
8868

    
8869
    if (not self.cfg.GetVGName() and
8870
        self.op.disk_template not in constants.DTS_NOT_LVM):
8871
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8872
                                 " instances", errors.ECODE_STATE)
8873

    
8874
    if (self.op.hypervisor is None or
8875
        self.op.hypervisor == constants.VALUE_AUTO):
8876
      self.op.hypervisor = self.cfg.GetHypervisorType()
8877

    
8878
    cluster = self.cfg.GetClusterInfo()
8879
    enabled_hvs = cluster.enabled_hypervisors
8880
    if self.op.hypervisor not in enabled_hvs:
8881
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8882
                                 " cluster (%s)" % (self.op.hypervisor,
8883
                                  ",".join(enabled_hvs)),
8884
                                 errors.ECODE_STATE)
8885

    
8886
    # Check tag validity
8887
    for tag in self.op.tags:
8888
      objects.TaggableObject.ValidateTag(tag)
8889

    
8890
    # check hypervisor parameter syntax (locally)
8891
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8892
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8893
                                      self.op.hvparams)
8894
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8895
    hv_type.CheckParameterSyntax(filled_hvp)
8896
    self.hv_full = filled_hvp
8897
    # check that we don't specify global parameters on an instance
8898
    _CheckGlobalHvParams(self.op.hvparams)
8899

    
8900
    # fill and remember the beparams dict
8901
    default_beparams = cluster.beparams[constants.PP_DEFAULT]
8902
    for param, value in self.op.beparams.iteritems():
8903
      if value == constants.VALUE_AUTO:
8904
        self.op.beparams[param] = default_beparams[param]
8905
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8906
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8907

    
8908
    # build os parameters
8909
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8910

    
8911
    # now that hvp/bep are in final format, let's reset to defaults,
8912
    # if told to do so
8913
    if self.op.identify_defaults:
8914
      self._RevertToDefaults(cluster)
8915

    
8916
    # NIC buildup
8917
    self.nics = []
8918
    for idx, nic in enumerate(self.op.nics):
8919
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8920
      nic_mode = nic_mode_req
8921
      if nic_mode is None or nic_mode == constants.VALUE_AUTO:
8922
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8923

    
8924
      # in routed mode, for the first nic, the default ip is 'auto'
8925
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8926
        default_ip_mode = constants.VALUE_AUTO
8927
      else:
8928
        default_ip_mode = constants.VALUE_NONE
8929

    
8930
      # ip validity checks
8931
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8932
      if ip is None or ip.lower() == constants.VALUE_NONE:
8933
        nic_ip = None
8934
      elif ip.lower() == constants.VALUE_AUTO:
8935
        if not self.op.name_check:
8936
          raise errors.OpPrereqError("IP address set to auto but name checks"
8937
                                     " have been skipped",
8938
                                     errors.ECODE_INVAL)
8939
        nic_ip = self.hostname1.ip
8940
      else:
8941
        if not netutils.IPAddress.IsValid(ip):
8942
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8943
                                     errors.ECODE_INVAL)
8944
        nic_ip = ip
8945

    
8946
      # TODO: check the ip address for uniqueness
8947
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8948
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8949
                                   errors.ECODE_INVAL)
8950

    
8951
      # MAC address verification
8952
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8953
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8954
        mac = utils.NormalizeAndValidateMac(mac)
8955

    
8956
        try:
8957
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8958
        except errors.ReservationError:
8959
          raise errors.OpPrereqError("MAC address %s already in use"
8960
                                     " in cluster" % mac,
8961
                                     errors.ECODE_NOTUNIQUE)
8962

    
8963
      #  Build nic parameters
8964
      link = nic.get(constants.INIC_LINK, None)
8965
      if link == constants.VALUE_AUTO:
8966
        link = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_LINK]
8967
      nicparams = {}
8968
      if nic_mode_req:
8969
        nicparams[constants.NIC_MODE] = nic_mode
8970
      if link:
8971
        nicparams[constants.NIC_LINK] = link
8972

    
8973
      check_params = cluster.SimpleFillNIC(nicparams)
8974
      objects.NIC.CheckParameterSyntax(check_params)
8975
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8976

    
8977
    # disk checks/pre-build
8978
    default_vg = self.cfg.GetVGName()
8979
    self.disks = []
8980
    for disk in self.op.disks:
8981
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8982
      if mode not in constants.DISK_ACCESS_SET:
8983
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8984
                                   mode, errors.ECODE_INVAL)
8985
      size = disk.get(constants.IDISK_SIZE, None)
8986
      if size is None:
8987
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8988
      try:
8989
        size = int(size)
8990
      except (TypeError, ValueError):
8991
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8992
                                   errors.ECODE_INVAL)
8993

    
8994
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8995
      new_disk = {
8996
        constants.IDISK_SIZE: size,
8997
        constants.IDISK_MODE: mode,
8998
        constants.IDISK_VG: data_vg,
8999
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
9000
        }
9001
      if constants.IDISK_ADOPT in disk:
9002
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
9003
      self.disks.append(new_disk)
9004

    
9005
    if self.op.mode == constants.INSTANCE_IMPORT:
9006
      disk_images = []
9007
      for idx in range(len(self.disks)):
9008
        option = "disk%d_dump" % idx
9009
        if export_info.has_option(constants.INISECT_INS, option):
9010
          # FIXME: are the old os-es, disk sizes, etc. useful?
9011
          export_name = export_info.get(constants.INISECT_INS, option)
9012
          image = utils.PathJoin(self.op.src_path, export_name)
9013
          disk_images.append(image)
9014
        else:
9015
          disk_images.append(False)
9016

    
9017
      self.src_images = disk_images
9018

    
9019
      old_name = export_info.get(constants.INISECT_INS, "name")
9020
      if self.op.instance_name == old_name:
9021
        for idx, nic in enumerate(self.nics):
9022
          if nic.mac == constants.VALUE_AUTO:
9023
            nic_mac_ini = "nic%d_mac" % idx
9024
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
9025

    
9026
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
9027

    
9028
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
9029
    if self.op.ip_check:
9030
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
9031
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
9032
                                   (self.check_ip, self.op.instance_name),
9033
                                   errors.ECODE_NOTUNIQUE)
9034

    
9035
    #### mac address generation
9036
    # By generating here the mac address both the allocator and the hooks get
9037
    # the real final mac address rather than the 'auto' or 'generate' value.
9038
    # There is a race condition between the generation and the instance object
9039
    # creation, which means that we know the mac is valid now, but we're not
9040
    # sure it will be when we actually add the instance. If things go bad
9041
    # adding the instance will abort because of a duplicate mac, and the
9042
    # creation job will fail.
9043
    for nic in self.nics:
9044
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9045
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
9046

    
9047
    #### allocator run
9048

    
9049
    if self.op.iallocator is not None:
9050
      self._RunAllocator()
9051

    
9052
    #### node related checks
9053

    
9054
    # check primary node
9055
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
9056
    assert self.pnode is not None, \
9057
      "Cannot retrieve locked node %s" % self.op.pnode
9058
    if pnode.offline:
9059
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
9060
                                 pnode.name, errors.ECODE_STATE)
9061
    if pnode.drained:
9062
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
9063
                                 pnode.name, errors.ECODE_STATE)
9064
    if not pnode.vm_capable:
9065
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
9066
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
9067

    
9068
    self.secondaries = []
9069

    
9070
    # mirror node verification
9071
    if self.op.disk_template in constants.DTS_INT_MIRROR:
9072
      if self.op.snode == pnode.name:
9073
        raise errors.OpPrereqError("The secondary node cannot be the"
9074
                                   " primary node", errors.ECODE_INVAL)
9075
      _CheckNodeOnline(self, self.op.snode)
9076
      _CheckNodeNotDrained(self, self.op.snode)
9077
      _CheckNodeVmCapable(self, self.op.snode)
9078
      self.secondaries.append(self.op.snode)
9079

    
9080
    nodenames = [pnode.name] + self.secondaries
9081

    
9082
    if not self.adopt_disks:
9083
      # Check lv size requirements, if not adopting
9084
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
9085
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
9086

    
9087
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
9088
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
9089
                                disk[constants.IDISK_ADOPT])
9090
                     for disk in self.disks])
9091
      if len(all_lvs) != len(self.disks):
9092
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
9093
                                   errors.ECODE_INVAL)
9094
      for lv_name in all_lvs:
9095
        try:
9096
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
9097
          # to ReserveLV uses the same syntax
9098
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
9099
        except errors.ReservationError:
9100
          raise errors.OpPrereqError("LV named %s used by another instance" %
9101
                                     lv_name, errors.ECODE_NOTUNIQUE)
9102

    
9103
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
9104
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
9105

    
9106
      node_lvs = self.rpc.call_lv_list([pnode.name],
9107
                                       vg_names.payload.keys())[pnode.name]
9108
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
9109
      node_lvs = node_lvs.payload
9110

    
9111
      delta = all_lvs.difference(node_lvs.keys())
9112
      if delta:
9113
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
9114
                                   utils.CommaJoin(delta),
9115
                                   errors.ECODE_INVAL)
9116
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
9117
      if online_lvs:
9118
        raise errors.OpPrereqError("Online logical volumes found, cannot"
9119
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
9120
                                   errors.ECODE_STATE)
9121
      # update the size of disk based on what is found
9122
      for dsk in self.disks:
9123
        dsk[constants.IDISK_SIZE] = \
9124
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
9125
                                        dsk[constants.IDISK_ADOPT])][0]))
9126

    
9127
    elif self.op.disk_template == constants.DT_BLOCK:
9128
      # Normalize and de-duplicate device paths
9129
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
9130
                       for disk in self.disks])
9131
      if len(all_disks) != len(self.disks):
9132
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
9133
                                   errors.ECODE_INVAL)
9134
      baddisks = [d for d in all_disks
9135
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
9136
      if baddisks:
9137
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
9138
                                   " cannot be adopted" %
9139
                                   (", ".join(baddisks),
9140
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
9141
                                   errors.ECODE_INVAL)
9142

    
9143
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
9144
                                            list(all_disks))[pnode.name]
9145
      node_disks.Raise("Cannot get block device information from node %s" %
9146
                       pnode.name)
9147
      node_disks = node_disks.payload
9148
      delta = all_disks.difference(node_disks.keys())
9149
      if delta:
9150
        raise errors.OpPrereqError("Missing block device(s): %s" %
9151
                                   utils.CommaJoin(delta),
9152
                                   errors.ECODE_INVAL)
9153
      for dsk in self.disks:
9154
        dsk[constants.IDISK_SIZE] = \
9155
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
9156

    
9157
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
9158

    
9159
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
9160
    # check OS parameters (remotely)
9161
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
9162

    
9163
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
9164

    
9165
    # memory check on primary node
9166
    if self.op.start:
9167
      _CheckNodeFreeMemory(self, self.pnode.name,
9168
                           "creating instance %s" % self.op.instance_name,
9169
                           self.be_full[constants.BE_MEMORY],
9170
                           self.op.hypervisor)
9171

    
9172
    self.dry_run_result = list(nodenames)
9173

    
9174
  def Exec(self, feedback_fn):
9175
    """Create and add the instance to the cluster.
9176

9177
    """
9178
    instance = self.op.instance_name
9179
    pnode_name = self.pnode.name
9180

    
9181
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) -
9182
                self.owned_locks(locking.LEVEL_NODE)), \
9183
      "Node locks differ from node resource locks"
9184

    
9185
    ht_kind = self.op.hypervisor
9186
    if ht_kind in constants.HTS_REQ_PORT:
9187
      network_port = self.cfg.AllocatePort()
9188
    else:
9189
      network_port = None
9190

    
9191
    disks = _GenerateDiskTemplate(self,
9192
                                  self.op.disk_template,
9193
                                  instance, pnode_name,
9194
                                  self.secondaries,
9195
                                  self.disks,
9196
                                  self.instance_file_storage_dir,
9197
                                  self.op.file_driver,
9198
                                  0,
9199
                                  feedback_fn)
9200

    
9201
    iobj = objects.Instance(name=instance, os=self.op.os_type,
9202
                            primary_node=pnode_name,
9203
                            nics=self.nics, disks=disks,
9204
                            disk_template=self.op.disk_template,
9205
                            admin_up=False,
9206
                            network_port=network_port,
9207
                            beparams=self.op.beparams,
9208
                            hvparams=self.op.hvparams,
9209
                            hypervisor=self.op.hypervisor,
9210
                            osparams=self.op.osparams,
9211
                            )
9212

    
9213
    if self.op.tags:
9214
      for tag in self.op.tags:
9215
        iobj.AddTag(tag)
9216

    
9217
    if self.adopt_disks:
9218
      if self.op.disk_template == constants.DT_PLAIN:
9219
        # rename LVs to the newly-generated names; we need to construct
9220
        # 'fake' LV disks with the old data, plus the new unique_id
9221
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
9222
        rename_to = []
9223
        for t_dsk, a_dsk in zip(tmp_disks, self.disks):
9224
          rename_to.append(t_dsk.logical_id)
9225
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
9226
          self.cfg.SetDiskID(t_dsk, pnode_name)
9227
        result = self.rpc.call_blockdev_rename(pnode_name,
9228
                                               zip(tmp_disks, rename_to))
9229
        result.Raise("Failed to rename adoped LVs")
9230
    else:
9231
      feedback_fn("* creating instance disks...")
9232
      try:
9233
        _CreateDisks(self, iobj)
9234
      except errors.OpExecError:
9235
        self.LogWarning("Device creation failed, reverting...")
9236
        try:
9237
          _RemoveDisks(self, iobj)
9238
        finally:
9239
          self.cfg.ReleaseDRBDMinors(instance)
9240
          raise
9241

    
9242
    feedback_fn("adding instance %s to cluster config" % instance)
9243

    
9244
    self.cfg.AddInstance(iobj, self.proc.GetECId())
9245

    
9246
    # Declare that we don't want to remove the instance lock anymore, as we've
9247
    # added the instance to the config
9248
    del self.remove_locks[locking.LEVEL_INSTANCE]
9249

    
9250
    if self.op.mode == constants.INSTANCE_IMPORT:
9251
      # Release unused nodes
9252
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
9253
    else:
9254
      # Release all nodes
9255
      _ReleaseLocks(self, locking.LEVEL_NODE)
9256

    
9257
    disk_abort = False
9258
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
9259
      feedback_fn("* wiping instance disks...")
9260
      try:
9261
        _WipeDisks(self, iobj)
9262
      except errors.OpExecError, err:
9263
        logging.exception("Wiping disks failed")
9264
        self.LogWarning("Wiping instance disks failed (%s)", err)
9265
        disk_abort = True
9266

    
9267
    if disk_abort:
9268
      # Something is already wrong with the disks, don't do anything else
9269
      pass
9270
    elif self.op.wait_for_sync:
9271
      disk_abort = not _WaitForSync(self, iobj)
9272
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
9273
      # make sure the disks are not degraded (still sync-ing is ok)
9274
      feedback_fn("* checking mirrors status")
9275
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
9276
    else:
9277
      disk_abort = False
9278

    
9279
    if disk_abort:
9280
      _RemoveDisks(self, iobj)
9281
      self.cfg.RemoveInstance(iobj.name)
9282
      # Make sure the instance lock gets removed
9283
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
9284
      raise errors.OpExecError("There are some degraded disks for"
9285
                               " this instance")
9286

    
9287
    # Release all node resource locks
9288
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
9289

    
9290
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
9291
      if self.op.mode == constants.INSTANCE_CREATE:
9292
        if not self.op.no_install:
9293
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
9294
                        not self.op.wait_for_sync)
9295
          if pause_sync:
9296
            feedback_fn("* pausing disk sync to install instance OS")
9297
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9298
                                                              iobj.disks, True)
9299
            for idx, success in enumerate(result.payload):
9300
              if not success:
9301
                logging.warn("pause-sync of instance %s for disk %d failed",
9302
                             instance, idx)
9303

    
9304
          feedback_fn("* running the instance OS create scripts...")
9305
          # FIXME: pass debug option from opcode to backend
9306
          os_add_result = \
9307
            self.rpc.call_instance_os_add(pnode_name, (iobj, None), False,
9308
                                          self.op.debug_level)
9309
          if pause_sync:
9310
            feedback_fn("* resuming disk sync")
9311
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
9312
                                                              iobj.disks, False)
9313
            for idx, success in enumerate(result.payload):
9314
              if not success:
9315
                logging.warn("resume-sync of instance %s for disk %d failed",
9316
                             instance, idx)
9317

    
9318
          os_add_result.Raise("Could not add os for instance %s"
9319
                              " on node %s" % (instance, pnode_name))
9320

    
9321
      elif self.op.mode == constants.INSTANCE_IMPORT:
9322
        feedback_fn("* running the instance OS import scripts...")
9323

    
9324
        transfers = []
9325

    
9326
        for idx, image in enumerate(self.src_images):
9327
          if not image:
9328
            continue
9329

    
9330
          # FIXME: pass debug option from opcode to backend
9331
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
9332
                                             constants.IEIO_FILE, (image, ),
9333
                                             constants.IEIO_SCRIPT,
9334
                                             (iobj.disks[idx], idx),
9335
                                             None)
9336
          transfers.append(dt)
9337

    
9338
        import_result = \
9339
          masterd.instance.TransferInstanceData(self, feedback_fn,
9340
                                                self.op.src_node, pnode_name,
9341
                                                self.pnode.secondary_ip,
9342
                                                iobj, transfers)
9343
        if not compat.all(import_result):
9344
          self.LogWarning("Some disks for instance %s on node %s were not"
9345
                          " imported successfully" % (instance, pnode_name))
9346

    
9347
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9348
        feedback_fn("* preparing remote import...")
9349
        # The source cluster will stop the instance before attempting to make a
9350
        # connection. In some cases stopping an instance can take a long time,
9351
        # hence the shutdown timeout is added to the connection timeout.
9352
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9353
                           self.op.source_shutdown_timeout)
9354
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9355

    
9356
        assert iobj.primary_node == self.pnode.name
9357
        disk_results = \
9358
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9359
                                        self.source_x509_ca,
9360
                                        self._cds, timeouts)
9361
        if not compat.all(disk_results):
9362
          # TODO: Should the instance still be started, even if some disks
9363
          # failed to import (valid for local imports, too)?
9364
          self.LogWarning("Some disks for instance %s on node %s were not"
9365
                          " imported successfully" % (instance, pnode_name))
9366

    
9367
        # Run rename script on newly imported instance
9368
        assert iobj.name == instance
9369
        feedback_fn("Running rename script for %s" % instance)
9370
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9371
                                                   self.source_instance_name,
9372
                                                   self.op.debug_level)
9373
        if result.fail_msg:
9374
          self.LogWarning("Failed to run rename script for %s on node"
9375
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9376

    
9377
      else:
9378
        # also checked in the prereq part
9379
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9380
                                     % self.op.mode)
9381

    
9382
    assert not self.owned_locks(locking.LEVEL_NODE_RES)
9383

    
9384
    if self.op.start:
9385
      iobj.admin_up = True
9386
      self.cfg.Update(iobj, feedback_fn)
9387
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9388
      feedback_fn("* starting instance...")
9389
      result = self.rpc.call_instance_start(pnode_name, (iobj, None, None),
9390
                                            False)
9391
      result.Raise("Could not start instance")
9392

    
9393
    return list(iobj.all_nodes)
9394

    
9395

    
9396
class LUInstanceConsole(NoHooksLU):
9397
  """Connect to an instance's console.
9398

9399
  This is somewhat special in that it returns the command line that
9400
  you need to run on the master node in order to connect to the
9401
  console.
9402

9403
  """
9404
  REQ_BGL = False
9405

    
9406
  def ExpandNames(self):
9407
    self.share_locks = _ShareAll()
9408
    self._ExpandAndLockInstance()
9409

    
9410
  def CheckPrereq(self):
9411
    """Check prerequisites.
9412

9413
    This checks that the instance is in the cluster.
9414

9415
    """
9416
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9417
    assert self.instance is not None, \
9418
      "Cannot retrieve locked instance %s" % self.op.instance_name
9419
    _CheckNodeOnline(self, self.instance.primary_node)
9420

    
9421
  def Exec(self, feedback_fn):
9422
    """Connect to the console of an instance
9423

9424
    """
9425
    instance = self.instance
9426
    node = instance.primary_node
9427

    
9428
    node_insts = self.rpc.call_instance_list([node],
9429
                                             [instance.hypervisor])[node]
9430
    node_insts.Raise("Can't get node information from %s" % node)
9431

    
9432
    if instance.name not in node_insts.payload:
9433
      if instance.admin_up:
9434
        state = constants.INSTST_ERRORDOWN
9435
      else:
9436
        state = constants.INSTST_ADMINDOWN
9437
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9438
                               (instance.name, state))
9439

    
9440
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9441

    
9442
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9443

    
9444

    
9445
def _GetInstanceConsole(cluster, instance):
9446
  """Returns console information for an instance.
9447

9448
  @type cluster: L{objects.Cluster}
9449
  @type instance: L{objects.Instance}
9450
  @rtype: dict
9451

9452
  """
9453
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9454
  # beparams and hvparams are passed separately, to avoid editing the
9455
  # instance and then saving the defaults in the instance itself.
9456
  hvparams = cluster.FillHV(instance)
9457
  beparams = cluster.FillBE(instance)
9458
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9459

    
9460
  assert console.instance == instance.name
9461
  assert console.Validate()
9462

    
9463
  return console.ToDict()
9464

    
9465

    
9466
class LUInstanceReplaceDisks(LogicalUnit):
9467
  """Replace the disks of an instance.
9468

9469
  """
9470
  HPATH = "mirrors-replace"
9471
  HTYPE = constants.HTYPE_INSTANCE
9472
  REQ_BGL = False
9473

    
9474
  def CheckArguments(self):
9475
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9476
                                  self.op.iallocator)
9477

    
9478
  def ExpandNames(self):
9479
    self._ExpandAndLockInstance()
9480

    
9481
    assert locking.LEVEL_NODE not in self.needed_locks
9482
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9483

    
9484
    assert self.op.iallocator is None or self.op.remote_node is None, \
9485
      "Conflicting options"
9486

    
9487
    if self.op.remote_node is not None:
9488
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9489

    
9490
      # Warning: do not remove the locking of the new secondary here
9491
      # unless DRBD8.AddChildren is changed to work in parallel;
9492
      # currently it doesn't since parallel invocations of
9493
      # FindUnusedMinor will conflict
9494
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9495
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9496
    else:
9497
      self.needed_locks[locking.LEVEL_NODE] = []
9498
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9499

    
9500
      if self.op.iallocator is not None:
9501
        # iallocator will select a new node in the same group
9502
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9503

    
9504
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9505
                                   self.op.iallocator, self.op.remote_node,
9506
                                   self.op.disks, False, self.op.early_release)
9507

    
9508
    self.tasklets = [self.replacer]
9509

    
9510
  def DeclareLocks(self, level):
9511
    if level == locking.LEVEL_NODEGROUP:
9512
      assert self.op.remote_node is None
9513
      assert self.op.iallocator is not None
9514
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9515

    
9516
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9517
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9518
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9519

    
9520
    elif level == locking.LEVEL_NODE:
9521
      if self.op.iallocator is not None:
9522
        assert self.op.remote_node is None
9523
        assert not self.needed_locks[locking.LEVEL_NODE]
9524

    
9525
        # Lock member nodes of all locked groups
9526
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9527
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9528
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9529
      else:
9530
        self._LockInstancesNodes()
9531

    
9532
  def BuildHooksEnv(self):
9533
    """Build hooks env.
9534

9535
    This runs on the master, the primary and all the secondaries.
9536

9537
    """
9538
    instance = self.replacer.instance
9539
    env = {
9540
      "MODE": self.op.mode,
9541
      "NEW_SECONDARY": self.op.remote_node,
9542
      "OLD_SECONDARY": instance.secondary_nodes[0],
9543
      }
9544
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9545
    return env
9546

    
9547
  def BuildHooksNodes(self):
9548
    """Build hooks nodes.
9549

9550
    """
9551
    instance = self.replacer.instance
9552
    nl = [
9553
      self.cfg.GetMasterNode(),
9554
      instance.primary_node,
9555
      ]
9556
    if self.op.remote_node is not None:
9557
      nl.append(self.op.remote_node)
9558
    return nl, nl
9559

    
9560
  def CheckPrereq(self):
9561
    """Check prerequisites.
9562

9563
    """
9564
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9565
            self.op.iallocator is None)
9566

    
9567
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9568
    if owned_groups:
9569
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9570

    
9571
    return LogicalUnit.CheckPrereq(self)
9572

    
9573

    
9574
class TLReplaceDisks(Tasklet):
9575
  """Replaces disks for an instance.
9576

9577
  Note: Locking is not within the scope of this class.
9578

9579
  """
9580
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9581
               disks, delay_iallocator, early_release):
9582
    """Initializes this class.
9583

9584
    """
9585
    Tasklet.__init__(self, lu)
9586

    
9587
    # Parameters
9588
    self.instance_name = instance_name
9589
    self.mode = mode
9590
    self.iallocator_name = iallocator_name
9591
    self.remote_node = remote_node
9592
    self.disks = disks
9593
    self.delay_iallocator = delay_iallocator
9594
    self.early_release = early_release
9595

    
9596
    # Runtime data
9597
    self.instance = None
9598
    self.new_node = None
9599
    self.target_node = None
9600
    self.other_node = None
9601
    self.remote_node_info = None
9602
    self.node_secondary_ip = None
9603

    
9604
  @staticmethod
9605
  def CheckArguments(mode, remote_node, iallocator):
9606
    """Helper function for users of this class.
9607

9608
    """
9609
    # check for valid parameter combination
9610
    if mode == constants.REPLACE_DISK_CHG:
9611
      if remote_node is None and iallocator is None:
9612
        raise errors.OpPrereqError("When changing the secondary either an"
9613
                                   " iallocator script must be used or the"
9614
                                   " new node given", errors.ECODE_INVAL)
9615

    
9616
      if remote_node is not None and iallocator is not None:
9617
        raise errors.OpPrereqError("Give either the iallocator or the new"
9618
                                   " secondary, not both", errors.ECODE_INVAL)
9619

    
9620
    elif remote_node is not None or iallocator is not None:
9621
      # Not replacing the secondary
9622
      raise errors.OpPrereqError("The iallocator and new node options can"
9623
                                 " only be used when changing the"
9624
                                 " secondary node", errors.ECODE_INVAL)
9625

    
9626
  @staticmethod
9627
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9628
    """Compute a new secondary node using an IAllocator.
9629

9630
    """
9631
    ial = IAllocator(lu.cfg, lu.rpc,
9632
                     mode=constants.IALLOCATOR_MODE_RELOC,
9633
                     name=instance_name,
9634
                     relocate_from=list(relocate_from))
9635

    
9636
    ial.Run(iallocator_name)
9637

    
9638
    if not ial.success:
9639
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9640
                                 " %s" % (iallocator_name, ial.info),
9641
                                 errors.ECODE_NORES)
9642

    
9643
    if len(ial.result) != ial.required_nodes:
9644
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9645
                                 " of nodes (%s), required %s" %
9646
                                 (iallocator_name,
9647
                                  len(ial.result), ial.required_nodes),
9648
                                 errors.ECODE_FAULT)
9649

    
9650
    remote_node_name = ial.result[0]
9651

    
9652
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9653
               instance_name, remote_node_name)
9654

    
9655
    return remote_node_name
9656

    
9657
  def _FindFaultyDisks(self, node_name):
9658
    """Wrapper for L{_FindFaultyInstanceDisks}.
9659

9660
    """
9661
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9662
                                    node_name, True)
9663

    
9664
  def _CheckDisksActivated(self, instance):
9665
    """Checks if the instance disks are activated.
9666

9667
    @param instance: The instance to check disks
9668
    @return: True if they are activated, False otherwise
9669

9670
    """
9671
    nodes = instance.all_nodes
9672

    
9673
    for idx, dev in enumerate(instance.disks):
9674
      for node in nodes:
9675
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9676
        self.cfg.SetDiskID(dev, node)
9677

    
9678
        result = self.rpc.call_blockdev_find(node, dev)
9679

    
9680
        if result.offline:
9681
          continue
9682
        elif result.fail_msg or not result.payload:
9683
          return False
9684

    
9685
    return True
9686

    
9687
  def CheckPrereq(self):
9688
    """Check prerequisites.
9689

9690
    This checks that the instance is in the cluster.
9691

9692
    """
9693
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9694
    assert instance is not None, \
9695
      "Cannot retrieve locked instance %s" % self.instance_name
9696

    
9697
    if instance.disk_template != constants.DT_DRBD8:
9698
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9699
                                 " instances", errors.ECODE_INVAL)
9700

    
9701
    if len(instance.secondary_nodes) != 1:
9702
      raise errors.OpPrereqError("The instance has a strange layout,"
9703
                                 " expected one secondary but found %d" %
9704
                                 len(instance.secondary_nodes),
9705
                                 errors.ECODE_FAULT)
9706

    
9707
    if not self.delay_iallocator:
9708
      self._CheckPrereq2()
9709

    
9710
  def _CheckPrereq2(self):
9711
    """Check prerequisites, second part.
9712

9713
    This function should always be part of CheckPrereq. It was separated and is
9714
    now called from Exec because during node evacuation iallocator was only
9715
    called with an unmodified cluster model, not taking planned changes into
9716
    account.
9717

9718
    """
9719
    instance = self.instance
9720
    secondary_node = instance.secondary_nodes[0]
9721

    
9722
    if self.iallocator_name is None:
9723
      remote_node = self.remote_node
9724
    else:
9725
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9726
                                       instance.name, instance.secondary_nodes)
9727

    
9728
    if remote_node is None:
9729
      self.remote_node_info = None
9730
    else:
9731
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9732
             "Remote node '%s' is not locked" % remote_node
9733

    
9734
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9735
      assert self.remote_node_info is not None, \
9736
        "Cannot retrieve locked node %s" % remote_node
9737

    
9738
    if remote_node == self.instance.primary_node:
9739
      raise errors.OpPrereqError("The specified node is the primary node of"
9740
                                 " the instance", errors.ECODE_INVAL)
9741

    
9742
    if remote_node == secondary_node:
9743
      raise errors.OpPrereqError("The specified node is already the"
9744
                                 " secondary node of the instance",
9745
                                 errors.ECODE_INVAL)
9746

    
9747
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9748
                                    constants.REPLACE_DISK_CHG):
9749
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9750
                                 errors.ECODE_INVAL)
9751

    
9752
    if self.mode == constants.REPLACE_DISK_AUTO:
9753
      if not self._CheckDisksActivated(instance):
9754
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9755
                                   " first" % self.instance_name,
9756
                                   errors.ECODE_STATE)
9757
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9758
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9759

    
9760
      if faulty_primary and faulty_secondary:
9761
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9762
                                   " one node and can not be repaired"
9763
                                   " automatically" % self.instance_name,
9764
                                   errors.ECODE_STATE)
9765

    
9766
      if faulty_primary:
9767
        self.disks = faulty_primary
9768
        self.target_node = instance.primary_node
9769
        self.other_node = secondary_node
9770
        check_nodes = [self.target_node, self.other_node]
9771
      elif faulty_secondary:
9772
        self.disks = faulty_secondary
9773
        self.target_node = secondary_node
9774
        self.other_node = instance.primary_node
9775
        check_nodes = [self.target_node, self.other_node]
9776
      else:
9777
        self.disks = []
9778
        check_nodes = []
9779

    
9780
    else:
9781
      # Non-automatic modes
9782
      if self.mode == constants.REPLACE_DISK_PRI:
9783
        self.target_node = instance.primary_node
9784
        self.other_node = secondary_node
9785
        check_nodes = [self.target_node, self.other_node]
9786

    
9787
      elif self.mode == constants.REPLACE_DISK_SEC:
9788
        self.target_node = secondary_node
9789
        self.other_node = instance.primary_node
9790
        check_nodes = [self.target_node, self.other_node]
9791

    
9792
      elif self.mode == constants.REPLACE_DISK_CHG:
9793
        self.new_node = remote_node
9794
        self.other_node = instance.primary_node
9795
        self.target_node = secondary_node
9796
        check_nodes = [self.new_node, self.other_node]
9797

    
9798
        _CheckNodeNotDrained(self.lu, remote_node)
9799
        _CheckNodeVmCapable(self.lu, remote_node)
9800

    
9801
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9802
        assert old_node_info is not None
9803
        if old_node_info.offline and not self.early_release:
9804
          # doesn't make sense to delay the release
9805
          self.early_release = True
9806
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9807
                          " early-release mode", secondary_node)
9808

    
9809
      else:
9810
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9811
                                     self.mode)
9812

    
9813
      # If not specified all disks should be replaced
9814
      if not self.disks:
9815
        self.disks = range(len(self.instance.disks))
9816

    
9817
    for node in check_nodes:
9818
      _CheckNodeOnline(self.lu, node)
9819

    
9820
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9821
                                                          self.other_node,
9822
                                                          self.target_node]
9823
                              if node_name is not None)
9824

    
9825
    # Release unneeded node locks
9826
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9827

    
9828
    # Release any owned node group
9829
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9830
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9831

    
9832
    # Check whether disks are valid
9833
    for disk_idx in self.disks:
9834
      instance.FindDisk(disk_idx)
9835

    
9836
    # Get secondary node IP addresses
9837
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9838
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9839

    
9840
  def Exec(self, feedback_fn):
9841
    """Execute disk replacement.
9842

9843
    This dispatches the disk replacement to the appropriate handler.
9844

9845
    """
9846
    if self.delay_iallocator:
9847
      self._CheckPrereq2()
9848

    
9849
    if __debug__:
9850
      # Verify owned locks before starting operation
9851
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9852
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9853
          ("Incorrect node locks, owning %s, expected %s" %
9854
           (owned_nodes, self.node_secondary_ip.keys()))
9855

    
9856
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9857
      assert list(owned_instances) == [self.instance_name], \
9858
          "Instance '%s' not locked" % self.instance_name
9859

    
9860
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9861
          "Should not own any node group lock at this point"
9862

    
9863
    if not self.disks:
9864
      feedback_fn("No disks need replacement")
9865
      return
9866

    
9867
    feedback_fn("Replacing disk(s) %s for %s" %
9868
                (utils.CommaJoin(self.disks), self.instance.name))
9869

    
9870
    activate_disks = (not self.instance.admin_up)
9871

    
9872
    # Activate the instance disks if we're replacing them on a down instance
9873
    if activate_disks:
9874
      _StartInstanceDisks(self.lu, self.instance, True)
9875

    
9876
    try:
9877
      # Should we replace the secondary node?
9878
      if self.new_node is not None:
9879
        fn = self._ExecDrbd8Secondary
9880
      else:
9881
        fn = self._ExecDrbd8DiskOnly
9882

    
9883
      result = fn(feedback_fn)
9884
    finally:
9885
      # Deactivate the instance disks if we're replacing them on a
9886
      # down instance
9887
      if activate_disks:
9888
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9889

    
9890
    if __debug__:
9891
      # Verify owned locks
9892
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9893
      nodes = frozenset(self.node_secondary_ip)
9894
      assert ((self.early_release and not owned_nodes) or
9895
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9896
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9897
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9898

    
9899
    return result
9900

    
9901
  def _CheckVolumeGroup(self, nodes):
9902
    self.lu.LogInfo("Checking volume groups")
9903

    
9904
    vgname = self.cfg.GetVGName()
9905

    
9906
    # Make sure volume group exists on all involved nodes
9907
    results = self.rpc.call_vg_list(nodes)
9908
    if not results:
9909
      raise errors.OpExecError("Can't list volume groups on the nodes")
9910

    
9911
    for node in nodes:
9912
      res = results[node]
9913
      res.Raise("Error checking node %s" % node)
9914
      if vgname not in res.payload:
9915
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9916
                                 (vgname, node))
9917

    
9918
  def _CheckDisksExistence(self, nodes):
9919
    # Check disk existence
9920
    for idx, dev in enumerate(self.instance.disks):
9921
      if idx not in self.disks:
9922
        continue
9923

    
9924
      for node in nodes:
9925
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9926
        self.cfg.SetDiskID(dev, node)
9927

    
9928
        result = self.rpc.call_blockdev_find(node, dev)
9929

    
9930
        msg = result.fail_msg
9931
        if msg or not result.payload:
9932
          if not msg:
9933
            msg = "disk not found"
9934
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9935
                                   (idx, node, msg))
9936

    
9937
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9938
    for idx, dev in enumerate(self.instance.disks):
9939
      if idx not in self.disks:
9940
        continue
9941

    
9942
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9943
                      (idx, node_name))
9944

    
9945
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9946
                                   ldisk=ldisk):
9947
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9948
                                 " replace disks for instance %s" %
9949
                                 (node_name, self.instance.name))
9950

    
9951
  def _CreateNewStorage(self, node_name):
9952
    """Create new storage on the primary or secondary node.
9953

9954
    This is only used for same-node replaces, not for changing the
9955
    secondary node, hence we don't want to modify the existing disk.
9956

9957
    """
9958
    iv_names = {}
9959

    
9960
    for idx, dev in enumerate(self.instance.disks):
9961
      if idx not in self.disks:
9962
        continue
9963

    
9964
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9965

    
9966
      self.cfg.SetDiskID(dev, node_name)
9967

    
9968
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9969
      names = _GenerateUniqueNames(self.lu, lv_names)
9970

    
9971
      vg_data = dev.children[0].logical_id[0]
9972
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9973
                             logical_id=(vg_data, names[0]))
9974
      vg_meta = dev.children[1].logical_id[0]
9975
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=DRBD_META_SIZE,
9976
                             logical_id=(vg_meta, names[1]))
9977

    
9978
      new_lvs = [lv_data, lv_meta]
9979
      old_lvs = [child.Copy() for child in dev.children]
9980
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9981

    
9982
      # we pass force_create=True to force the LVM creation
9983
      for new_lv in new_lvs:
9984
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9985
                        _GetInstanceInfoText(self.instance), False)
9986

    
9987
    return iv_names
9988

    
9989
  def _CheckDevices(self, node_name, iv_names):
9990
    for name, (dev, _, _) in iv_names.iteritems():
9991
      self.cfg.SetDiskID(dev, node_name)
9992

    
9993
      result = self.rpc.call_blockdev_find(node_name, dev)
9994

    
9995
      msg = result.fail_msg
9996
      if msg or not result.payload:
9997
        if not msg:
9998
          msg = "disk not found"
9999
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
10000
                                 (name, msg))
10001

    
10002
      if result.payload.is_degraded:
10003
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
10004

    
10005
  def _RemoveOldStorage(self, node_name, iv_names):
10006
    for name, (_, old_lvs, _) in iv_names.iteritems():
10007
      self.lu.LogInfo("Remove logical volumes for %s" % name)
10008

    
10009
      for lv in old_lvs:
10010
        self.cfg.SetDiskID(lv, node_name)
10011

    
10012
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
10013
        if msg:
10014
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
10015
                             hint="remove unused LVs manually")
10016

    
10017
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable=W0613
10018
    """Replace a disk on the primary or secondary for DRBD 8.
10019

10020
    The algorithm for replace is quite complicated:
10021

10022
      1. for each disk to be replaced:
10023

10024
        1. create new LVs on the target node with unique names
10025
        1. detach old LVs from the drbd device
10026
        1. rename old LVs to name_replaced.<time_t>
10027
        1. rename new LVs to old LVs
10028
        1. attach the new LVs (with the old names now) to the drbd device
10029

10030
      1. wait for sync across all devices
10031

10032
      1. for each modified disk:
10033

10034
        1. remove old LVs (which have the name name_replaces.<time_t>)
10035

10036
    Failures are not very well handled.
10037

10038
    """
10039
    steps_total = 6
10040

    
10041
    # Step: check device activation
10042
    self.lu.LogStep(1, steps_total, "Check device existence")
10043
    self._CheckDisksExistence([self.other_node, self.target_node])
10044
    self._CheckVolumeGroup([self.target_node, self.other_node])
10045

    
10046
    # Step: check other node consistency
10047
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10048
    self._CheckDisksConsistency(self.other_node,
10049
                                self.other_node == self.instance.primary_node,
10050
                                False)
10051

    
10052
    # Step: create new storage
10053
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10054
    iv_names = self._CreateNewStorage(self.target_node)
10055

    
10056
    # Step: for each lv, detach+rename*2+attach
10057
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10058
    for dev, old_lvs, new_lvs in iv_names.itervalues():
10059
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
10060

    
10061
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
10062
                                                     old_lvs)
10063
      result.Raise("Can't detach drbd from local storage on node"
10064
                   " %s for device %s" % (self.target_node, dev.iv_name))
10065
      #dev.children = []
10066
      #cfg.Update(instance)
10067

    
10068
      # ok, we created the new LVs, so now we know we have the needed
10069
      # storage; as such, we proceed on the target node to rename
10070
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
10071
      # using the assumption that logical_id == physical_id (which in
10072
      # turn is the unique_id on that node)
10073

    
10074
      # FIXME(iustin): use a better name for the replaced LVs
10075
      temp_suffix = int(time.time())
10076
      ren_fn = lambda d, suff: (d.physical_id[0],
10077
                                d.physical_id[1] + "_replaced-%s" % suff)
10078

    
10079
      # Build the rename list based on what LVs exist on the node
10080
      rename_old_to_new = []
10081
      for to_ren in old_lvs:
10082
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
10083
        if not result.fail_msg and result.payload:
10084
          # device exists
10085
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
10086

    
10087
      self.lu.LogInfo("Renaming the old LVs on the target node")
10088
      result = self.rpc.call_blockdev_rename(self.target_node,
10089
                                             rename_old_to_new)
10090
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
10091

    
10092
      # Now we rename the new LVs to the old LVs
10093
      self.lu.LogInfo("Renaming the new LVs on the target node")
10094
      rename_new_to_old = [(new, old.physical_id)
10095
                           for old, new in zip(old_lvs, new_lvs)]
10096
      result = self.rpc.call_blockdev_rename(self.target_node,
10097
                                             rename_new_to_old)
10098
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
10099

    
10100
      # Intermediate steps of in memory modifications
10101
      for old, new in zip(old_lvs, new_lvs):
10102
        new.logical_id = old.logical_id
10103
        self.cfg.SetDiskID(new, self.target_node)
10104

    
10105
      # We need to modify old_lvs so that removal later removes the
10106
      # right LVs, not the newly added ones; note that old_lvs is a
10107
      # copy here
10108
      for disk in old_lvs:
10109
        disk.logical_id = ren_fn(disk, temp_suffix)
10110
        self.cfg.SetDiskID(disk, self.target_node)
10111

    
10112
      # Now that the new lvs have the old name, we can add them to the device
10113
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
10114
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
10115
                                                  new_lvs)
10116
      msg = result.fail_msg
10117
      if msg:
10118
        for new_lv in new_lvs:
10119
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
10120
                                               new_lv).fail_msg
10121
          if msg2:
10122
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
10123
                               hint=("cleanup manually the unused logical"
10124
                                     "volumes"))
10125
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
10126

    
10127
    cstep = 5
10128
    if self.early_release:
10129
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10130
      cstep += 1
10131
      self._RemoveOldStorage(self.target_node, iv_names)
10132
      # WARNING: we release both node locks here, do not do other RPCs
10133
      # than WaitForSync to the primary node
10134
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10135
                    names=[self.target_node, self.other_node])
10136

    
10137
    # Wait for sync
10138
    # This can fail as the old devices are degraded and _WaitForSync
10139
    # does a combined result over all disks, so we don't check its return value
10140
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10141
    cstep += 1
10142
    _WaitForSync(self.lu, self.instance)
10143

    
10144
    # Check all devices manually
10145
    self._CheckDevices(self.instance.primary_node, iv_names)
10146

    
10147
    # Step: remove old storage
10148
    if not self.early_release:
10149
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10150
      cstep += 1
10151
      self._RemoveOldStorage(self.target_node, iv_names)
10152

    
10153
  def _ExecDrbd8Secondary(self, feedback_fn):
10154
    """Replace the secondary node for DRBD 8.
10155

10156
    The algorithm for replace is quite complicated:
10157
      - for all disks of the instance:
10158
        - create new LVs on the new node with same names
10159
        - shutdown the drbd device on the old secondary
10160
        - disconnect the drbd network on the primary
10161
        - create the drbd device on the new secondary
10162
        - network attach the drbd on the primary, using an artifice:
10163
          the drbd code for Attach() will connect to the network if it
10164
          finds a device which is connected to the good local disks but
10165
          not network enabled
10166
      - wait for sync across all devices
10167
      - remove all disks from the old secondary
10168

10169
    Failures are not very well handled.
10170

10171
    """
10172
    steps_total = 6
10173

    
10174
    pnode = self.instance.primary_node
10175

    
10176
    # Step: check device activation
10177
    self.lu.LogStep(1, steps_total, "Check device existence")
10178
    self._CheckDisksExistence([self.instance.primary_node])
10179
    self._CheckVolumeGroup([self.instance.primary_node])
10180

    
10181
    # Step: check other node consistency
10182
    self.lu.LogStep(2, steps_total, "Check peer consistency")
10183
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
10184

    
10185
    # Step: create new storage
10186
    self.lu.LogStep(3, steps_total, "Allocate new storage")
10187
    for idx, dev in enumerate(self.instance.disks):
10188
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
10189
                      (self.new_node, idx))
10190
      # we pass force_create=True to force LVM creation
10191
      for new_lv in dev.children:
10192
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
10193
                        _GetInstanceInfoText(self.instance), False)
10194

    
10195
    # Step 4: dbrd minors and drbd setups changes
10196
    # after this, we must manually remove the drbd minors on both the
10197
    # error and the success paths
10198
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
10199
    minors = self.cfg.AllocateDRBDMinor([self.new_node
10200
                                         for dev in self.instance.disks],
10201
                                        self.instance.name)
10202
    logging.debug("Allocated minors %r", minors)
10203

    
10204
    iv_names = {}
10205
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
10206
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
10207
                      (self.new_node, idx))
10208
      # create new devices on new_node; note that we create two IDs:
10209
      # one without port, so the drbd will be activated without
10210
      # networking information on the new node at this stage, and one
10211
      # with network, for the latter activation in step 4
10212
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
10213
      if self.instance.primary_node == o_node1:
10214
        p_minor = o_minor1
10215
      else:
10216
        assert self.instance.primary_node == o_node2, "Three-node instance?"
10217
        p_minor = o_minor2
10218

    
10219
      new_alone_id = (self.instance.primary_node, self.new_node, None,
10220
                      p_minor, new_minor, o_secret)
10221
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
10222
                    p_minor, new_minor, o_secret)
10223

    
10224
      iv_names[idx] = (dev, dev.children, new_net_id)
10225
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
10226
                    new_net_id)
10227
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
10228
                              logical_id=new_alone_id,
10229
                              children=dev.children,
10230
                              size=dev.size)
10231
      try:
10232
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
10233
                              _GetInstanceInfoText(self.instance), False)
10234
      except errors.GenericError:
10235
        self.cfg.ReleaseDRBDMinors(self.instance.name)
10236
        raise
10237

    
10238
    # We have new devices, shutdown the drbd on the old secondary
10239
    for idx, dev in enumerate(self.instance.disks):
10240
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
10241
      self.cfg.SetDiskID(dev, self.target_node)
10242
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
10243
      if msg:
10244
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
10245
                           "node: %s" % (idx, msg),
10246
                           hint=("Please cleanup this device manually as"
10247
                                 " soon as possible"))
10248

    
10249
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
10250
    result = self.rpc.call_drbd_disconnect_net([pnode], self.node_secondary_ip,
10251
                                               self.instance.disks)[pnode]
10252

    
10253
    msg = result.fail_msg
10254
    if msg:
10255
      # detaches didn't succeed (unlikely)
10256
      self.cfg.ReleaseDRBDMinors(self.instance.name)
10257
      raise errors.OpExecError("Can't detach the disks from the network on"
10258
                               " old node: %s" % (msg,))
10259

    
10260
    # if we managed to detach at least one, we update all the disks of
10261
    # the instance to point to the new secondary
10262
    self.lu.LogInfo("Updating instance configuration")
10263
    for dev, _, new_logical_id in iv_names.itervalues():
10264
      dev.logical_id = new_logical_id
10265
      self.cfg.SetDiskID(dev, self.instance.primary_node)
10266

    
10267
    self.cfg.Update(self.instance, feedback_fn)
10268

    
10269
    # and now perform the drbd attach
10270
    self.lu.LogInfo("Attaching primary drbds to new secondary"
10271
                    " (standalone => connected)")
10272
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
10273
                                            self.new_node],
10274
                                           self.node_secondary_ip,
10275
                                           self.instance.disks,
10276
                                           self.instance.name,
10277
                                           False)
10278
    for to_node, to_result in result.items():
10279
      msg = to_result.fail_msg
10280
      if msg:
10281
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
10282
                           to_node, msg,
10283
                           hint=("please do a gnt-instance info to see the"
10284
                                 " status of disks"))
10285
    cstep = 5
10286
    if self.early_release:
10287
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10288
      cstep += 1
10289
      self._RemoveOldStorage(self.target_node, iv_names)
10290
      # WARNING: we release all node locks here, do not do other RPCs
10291
      # than WaitForSync to the primary node
10292
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
10293
                    names=[self.instance.primary_node,
10294
                           self.target_node,
10295
                           self.new_node])
10296

    
10297
    # Wait for sync
10298
    # This can fail as the old devices are degraded and _WaitForSync
10299
    # does a combined result over all disks, so we don't check its return value
10300
    self.lu.LogStep(cstep, steps_total, "Sync devices")
10301
    cstep += 1
10302
    _WaitForSync(self.lu, self.instance)
10303

    
10304
    # Check all devices manually
10305
    self._CheckDevices(self.instance.primary_node, iv_names)
10306

    
10307
    # Step: remove old storage
10308
    if not self.early_release:
10309
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
10310
      self._RemoveOldStorage(self.target_node, iv_names)
10311

    
10312

    
10313
class LURepairNodeStorage(NoHooksLU):
10314
  """Repairs the volume group on a node.
10315

10316
  """
10317
  REQ_BGL = False
10318

    
10319
  def CheckArguments(self):
10320
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10321

    
10322
    storage_type = self.op.storage_type
10323

    
10324
    if (constants.SO_FIX_CONSISTENCY not in
10325
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
10326
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
10327
                                 " repaired" % storage_type,
10328
                                 errors.ECODE_INVAL)
10329

    
10330
  def ExpandNames(self):
10331
    self.needed_locks = {
10332
      locking.LEVEL_NODE: [self.op.node_name],
10333
      }
10334

    
10335
  def _CheckFaultyDisks(self, instance, node_name):
10336
    """Ensure faulty disks abort the opcode or at least warn."""
10337
    try:
10338
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
10339
                                  node_name, True):
10340
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
10341
                                   " node '%s'" % (instance.name, node_name),
10342
                                   errors.ECODE_STATE)
10343
    except errors.OpPrereqError, err:
10344
      if self.op.ignore_consistency:
10345
        self.proc.LogWarning(str(err.args[0]))
10346
      else:
10347
        raise
10348

    
10349
  def CheckPrereq(self):
10350
    """Check prerequisites.
10351

10352
    """
10353
    # Check whether any instance on this node has faulty disks
10354
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10355
      if not inst.admin_up:
10356
        continue
10357
      check_nodes = set(inst.all_nodes)
10358
      check_nodes.discard(self.op.node_name)
10359
      for inst_node_name in check_nodes:
10360
        self._CheckFaultyDisks(inst, inst_node_name)
10361

    
10362
  def Exec(self, feedback_fn):
10363
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10364
                (self.op.name, self.op.node_name))
10365

    
10366
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10367
    result = self.rpc.call_storage_execute(self.op.node_name,
10368
                                           self.op.storage_type, st_args,
10369
                                           self.op.name,
10370
                                           constants.SO_FIX_CONSISTENCY)
10371
    result.Raise("Failed to repair storage unit '%s' on %s" %
10372
                 (self.op.name, self.op.node_name))
10373

    
10374

    
10375
class LUNodeEvacuate(NoHooksLU):
10376
  """Evacuates instances off a list of nodes.
10377

10378
  """
10379
  REQ_BGL = False
10380

    
10381
  def CheckArguments(self):
10382
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10383

    
10384
  def ExpandNames(self):
10385
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10386

    
10387
    if self.op.remote_node is not None:
10388
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10389
      assert self.op.remote_node
10390

    
10391
      if self.op.remote_node == self.op.node_name:
10392
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10393
                                   " secondary node", errors.ECODE_INVAL)
10394

    
10395
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10396
        raise errors.OpPrereqError("Without the use of an iallocator only"
10397
                                   " secondary instances can be evacuated",
10398
                                   errors.ECODE_INVAL)
10399

    
10400
    # Declare locks
10401
    self.share_locks = _ShareAll()
10402
    self.needed_locks = {
10403
      locking.LEVEL_INSTANCE: [],
10404
      locking.LEVEL_NODEGROUP: [],
10405
      locking.LEVEL_NODE: [],
10406
      }
10407

    
10408
    if self.op.remote_node is None:
10409
      # Iallocator will choose any node(s) in the same group
10410
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10411
    else:
10412
      group_nodes = frozenset([self.op.remote_node])
10413

    
10414
    # Determine nodes to be locked
10415
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10416

    
10417
  def _DetermineInstances(self):
10418
    """Builds list of instances to operate on.
10419

10420
    """
10421
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10422

    
10423
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10424
      # Primary instances only
10425
      inst_fn = _GetNodePrimaryInstances
10426
      assert self.op.remote_node is None, \
10427
        "Evacuating primary instances requires iallocator"
10428
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10429
      # Secondary instances only
10430
      inst_fn = _GetNodeSecondaryInstances
10431
    else:
10432
      # All instances
10433
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10434
      inst_fn = _GetNodeInstances
10435

    
10436
    return inst_fn(self.cfg, self.op.node_name)
10437

    
10438
  def DeclareLocks(self, level):
10439
    if level == locking.LEVEL_INSTANCE:
10440
      # Lock instances optimistically, needs verification once node and group
10441
      # locks have been acquired
10442
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10443
        set(i.name for i in self._DetermineInstances())
10444

    
10445
    elif level == locking.LEVEL_NODEGROUP:
10446
      # Lock node groups optimistically, needs verification once nodes have
10447
      # been acquired
10448
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10449
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10450

    
10451
    elif level == locking.LEVEL_NODE:
10452
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10453

    
10454
  def CheckPrereq(self):
10455
    # Verify locks
10456
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10457
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10458
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10459

    
10460
    assert owned_nodes == self.lock_nodes
10461

    
10462
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10463
    if owned_groups != wanted_groups:
10464
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10465
                               " current groups are '%s', used to be '%s'" %
10466
                               (utils.CommaJoin(wanted_groups),
10467
                                utils.CommaJoin(owned_groups)))
10468

    
10469
    # Determine affected instances
10470
    self.instances = self._DetermineInstances()
10471
    self.instance_names = [i.name for i in self.instances]
10472

    
10473
    if set(self.instance_names) != owned_instances:
10474
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10475
                               " were acquired, current instances are '%s',"
10476
                               " used to be '%s'" %
10477
                               (self.op.node_name,
10478
                                utils.CommaJoin(self.instance_names),
10479
                                utils.CommaJoin(owned_instances)))
10480

    
10481
    if self.instance_names:
10482
      self.LogInfo("Evacuating instances from node '%s': %s",
10483
                   self.op.node_name,
10484
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10485
    else:
10486
      self.LogInfo("No instances to evacuate from node '%s'",
10487
                   self.op.node_name)
10488

    
10489
    if self.op.remote_node is not None:
10490
      for i in self.instances:
10491
        if i.primary_node == self.op.remote_node:
10492
          raise errors.OpPrereqError("Node %s is the primary node of"
10493
                                     " instance %s, cannot use it as"
10494
                                     " secondary" %
10495
                                     (self.op.remote_node, i.name),
10496
                                     errors.ECODE_INVAL)
10497

    
10498
  def Exec(self, feedback_fn):
10499
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10500

    
10501
    if not self.instance_names:
10502
      # No instances to evacuate
10503
      jobs = []
10504

    
10505
    elif self.op.iallocator is not None:
10506
      # TODO: Implement relocation to other group
10507
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10508
                       evac_mode=self.op.mode,
10509
                       instances=list(self.instance_names))
10510

    
10511
      ial.Run(self.op.iallocator)
10512

    
10513
      if not ial.success:
10514
        raise errors.OpPrereqError("Can't compute node evacuation using"
10515
                                   " iallocator '%s': %s" %
10516
                                   (self.op.iallocator, ial.info),
10517
                                   errors.ECODE_NORES)
10518

    
10519
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10520

    
10521
    elif self.op.remote_node is not None:
10522
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10523
      jobs = [
10524
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10525
                                        remote_node=self.op.remote_node,
10526
                                        disks=[],
10527
                                        mode=constants.REPLACE_DISK_CHG,
10528
                                        early_release=self.op.early_release)]
10529
        for instance_name in self.instance_names
10530
        ]
10531

    
10532
    else:
10533
      raise errors.ProgrammerError("No iallocator or remote node")
10534

    
10535
    return ResultWithJobs(jobs)
10536

    
10537

    
10538
def _SetOpEarlyRelease(early_release, op):
10539
  """Sets C{early_release} flag on opcodes if available.
10540

10541
  """
10542
  try:
10543
    op.early_release = early_release
10544
  except AttributeError:
10545
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10546

    
10547
  return op
10548

    
10549

    
10550
def _NodeEvacDest(use_nodes, group, nodes):
10551
  """Returns group or nodes depending on caller's choice.
10552

10553
  """
10554
  if use_nodes:
10555
    return utils.CommaJoin(nodes)
10556
  else:
10557
    return group
10558

    
10559

    
10560
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10561
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10562

10563
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10564
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10565

10566
  @type lu: L{LogicalUnit}
10567
  @param lu: Logical unit instance
10568
  @type alloc_result: tuple/list
10569
  @param alloc_result: Result from iallocator
10570
  @type early_release: bool
10571
  @param early_release: Whether to release locks early if possible
10572
  @type use_nodes: bool
10573
  @param use_nodes: Whether to display node names instead of groups
10574

10575
  """
10576
  (moved, failed, jobs) = alloc_result
10577

    
10578
  if failed:
10579
    lu.LogWarning("Unable to evacuate instances %s",
10580
                  utils.CommaJoin("%s (%s)" % (name, reason)
10581
                                  for (name, reason) in failed))
10582

    
10583
  if moved:
10584
    lu.LogInfo("Instances to be moved: %s",
10585
               utils.CommaJoin("%s (to %s)" %
10586
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10587
                               for (name, group, nodes) in moved))
10588

    
10589
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10590
              map(opcodes.OpCode.LoadOpCode, ops))
10591
          for ops in jobs]
10592

    
10593

    
10594
class LUInstanceGrowDisk(LogicalUnit):
10595
  """Grow a disk of an instance.
10596

10597
  """
10598
  HPATH = "disk-grow"
10599
  HTYPE = constants.HTYPE_INSTANCE
10600
  REQ_BGL = False
10601

    
10602
  def ExpandNames(self):
10603
    self._ExpandAndLockInstance()
10604
    self.needed_locks[locking.LEVEL_NODE] = []
10605
    self.needed_locks[locking.LEVEL_NODE_RES] = []
10606
    self.recalculate_locks[locking.LEVEL_NODE_RES] = constants.LOCKS_REPLACE
10607

    
10608
  def DeclareLocks(self, level):
10609
    if level == locking.LEVEL_NODE:
10610
      self._LockInstancesNodes()
10611
    elif level == locking.LEVEL_NODE_RES:
10612
      # Copy node locks
10613
      self.needed_locks[locking.LEVEL_NODE_RES] = \
10614
        self.needed_locks[locking.LEVEL_NODE][:]
10615

    
10616
  def BuildHooksEnv(self):
10617
    """Build hooks env.
10618

10619
    This runs on the master, the primary and all the secondaries.
10620

10621
    """
10622
    env = {
10623
      "DISK": self.op.disk,
10624
      "AMOUNT": self.op.amount,
10625
      }
10626
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10627
    return env
10628

    
10629
  def BuildHooksNodes(self):
10630
    """Build hooks nodes.
10631

10632
    """
10633
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10634
    return (nl, nl)
10635

    
10636
  def CheckPrereq(self):
10637
    """Check prerequisites.
10638

10639
    This checks that the instance is in the cluster.
10640

10641
    """
10642
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10643
    assert instance is not None, \
10644
      "Cannot retrieve locked instance %s" % self.op.instance_name
10645
    nodenames = list(instance.all_nodes)
10646
    for node in nodenames:
10647
      _CheckNodeOnline(self, node)
10648

    
10649
    self.instance = instance
10650

    
10651
    if instance.disk_template not in constants.DTS_GROWABLE:
10652
      raise errors.OpPrereqError("Instance's disk layout does not support"
10653
                                 " growing", errors.ECODE_INVAL)
10654

    
10655
    self.disk = instance.FindDisk(self.op.disk)
10656

    
10657
    if instance.disk_template not in (constants.DT_FILE,
10658
                                      constants.DT_SHARED_FILE):
10659
      # TODO: check the free disk space for file, when that feature will be
10660
      # supported
10661
      _CheckNodesFreeDiskPerVG(self, nodenames,
10662
                               self.disk.ComputeGrowth(self.op.amount))
10663

    
10664
  def Exec(self, feedback_fn):
10665
    """Execute disk grow.
10666

10667
    """
10668
    instance = self.instance
10669
    disk = self.disk
10670

    
10671
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10672
    assert (self.owned_locks(locking.LEVEL_NODE) ==
10673
            self.owned_locks(locking.LEVEL_NODE_RES))
10674

    
10675
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10676
    if not disks_ok:
10677
      raise errors.OpExecError("Cannot activate block device to grow")
10678

    
10679
    feedback_fn("Growing disk %s of instance '%s' by %s" %
10680
                (self.op.disk, instance.name,
10681
                 utils.FormatUnit(self.op.amount, "h")))
10682

    
10683
    # First run all grow ops in dry-run mode
10684
    for node in instance.all_nodes:
10685
      self.cfg.SetDiskID(disk, node)
10686
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10687
      result.Raise("Grow request failed to node %s" % node)
10688

    
10689
    # We know that (as far as we can test) operations across different
10690
    # nodes will succeed, time to run it for real
10691
    for node in instance.all_nodes:
10692
      self.cfg.SetDiskID(disk, node)
10693
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10694
      result.Raise("Grow request failed to node %s" % node)
10695

    
10696
      # TODO: Rewrite code to work properly
10697
      # DRBD goes into sync mode for a short amount of time after executing the
10698
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10699
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10700
      # time is a work-around.
10701
      time.sleep(5)
10702

    
10703
    disk.RecordGrow(self.op.amount)
10704
    self.cfg.Update(instance, feedback_fn)
10705

    
10706
    # Changes have been recorded, release node lock
10707
    _ReleaseLocks(self, locking.LEVEL_NODE)
10708

    
10709
    # Downgrade lock while waiting for sync
10710
    self.glm.downgrade(locking.LEVEL_INSTANCE)
10711

    
10712
    if self.op.wait_for_sync:
10713
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10714
      if disk_abort:
10715
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10716
                             " status; please check the instance")
10717
      if not instance.admin_up:
10718
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10719
    elif not instance.admin_up:
10720
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10721
                           " not supposed to be running because no wait for"
10722
                           " sync mode was requested")
10723

    
10724
    assert self.owned_locks(locking.LEVEL_NODE_RES)
10725
    assert set([instance.name]) == self.owned_locks(locking.LEVEL_INSTANCE)
10726

    
10727

    
10728
class LUInstanceQueryData(NoHooksLU):
10729
  """Query runtime instance data.
10730

10731
  """
10732
  REQ_BGL = False
10733

    
10734
  def ExpandNames(self):
10735
    self.needed_locks = {}
10736

    
10737
    # Use locking if requested or when non-static information is wanted
10738
    if not (self.op.static or self.op.use_locking):
10739
      self.LogWarning("Non-static data requested, locks need to be acquired")
10740
      self.op.use_locking = True
10741

    
10742
    if self.op.instances or not self.op.use_locking:
10743
      # Expand instance names right here
10744
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10745
    else:
10746
      # Will use acquired locks
10747
      self.wanted_names = None
10748

    
10749
    if self.op.use_locking:
10750
      self.share_locks = _ShareAll()
10751

    
10752
      if self.wanted_names is None:
10753
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10754
      else:
10755
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10756

    
10757
      self.needed_locks[locking.LEVEL_NODE] = []
10758
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10759

    
10760
  def DeclareLocks(self, level):
10761
    if self.op.use_locking and level == locking.LEVEL_NODE:
10762
      self._LockInstancesNodes()
10763

    
10764
  def CheckPrereq(self):
10765
    """Check prerequisites.
10766

10767
    This only checks the optional instance list against the existing names.
10768

10769
    """
10770
    if self.wanted_names is None:
10771
      assert self.op.use_locking, "Locking was not used"
10772
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10773

    
10774
    self.wanted_instances = \
10775
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10776

    
10777
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10778
    """Returns the status of a block device
10779

10780
    """
10781
    if self.op.static or not node:
10782
      return None
10783

    
10784
    self.cfg.SetDiskID(dev, node)
10785

    
10786
    result = self.rpc.call_blockdev_find(node, dev)
10787
    if result.offline:
10788
      return None
10789

    
10790
    result.Raise("Can't compute disk status for %s" % instance_name)
10791

    
10792
    status = result.payload
10793
    if status is None:
10794
      return None
10795

    
10796
    return (status.dev_path, status.major, status.minor,
10797
            status.sync_percent, status.estimated_time,
10798
            status.is_degraded, status.ldisk_status)
10799

    
10800
  def _ComputeDiskStatus(self, instance, snode, dev):
10801
    """Compute block device status.
10802

10803
    """
10804
    if dev.dev_type in constants.LDS_DRBD:
10805
      # we change the snode then (otherwise we use the one passed in)
10806
      if dev.logical_id[0] == instance.primary_node:
10807
        snode = dev.logical_id[1]
10808
      else:
10809
        snode = dev.logical_id[0]
10810

    
10811
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10812
                                              instance.name, dev)
10813
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10814

    
10815
    if dev.children:
10816
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10817
                                        instance, snode),
10818
                         dev.children)
10819
    else:
10820
      dev_children = []
10821

    
10822
    return {
10823
      "iv_name": dev.iv_name,
10824
      "dev_type": dev.dev_type,
10825
      "logical_id": dev.logical_id,
10826
      "physical_id": dev.physical_id,
10827
      "pstatus": dev_pstatus,
10828
      "sstatus": dev_sstatus,
10829
      "children": dev_children,
10830
      "mode": dev.mode,
10831
      "size": dev.size,
10832
      }
10833

    
10834
  def Exec(self, feedback_fn):
10835
    """Gather and return data"""
10836
    result = {}
10837

    
10838
    cluster = self.cfg.GetClusterInfo()
10839

    
10840
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10841
                                          for i in self.wanted_instances)
10842
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10843
      if self.op.static or pnode.offline:
10844
        remote_state = None
10845
        if pnode.offline:
10846
          self.LogWarning("Primary node %s is marked offline, returning static"
10847
                          " information only for instance %s" %
10848
                          (pnode.name, instance.name))
10849
      else:
10850
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10851
                                                  instance.name,
10852
                                                  instance.hypervisor)
10853
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10854
        remote_info = remote_info.payload
10855
        if remote_info and "state" in remote_info:
10856
          remote_state = "up"
10857
        else:
10858
          remote_state = "down"
10859

    
10860
      if instance.admin_up:
10861
        config_state = "up"
10862
      else:
10863
        config_state = "down"
10864

    
10865
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10866
                  instance.disks)
10867

    
10868
      result[instance.name] = {
10869
        "name": instance.name,
10870
        "config_state": config_state,
10871
        "run_state": remote_state,
10872
        "pnode": instance.primary_node,
10873
        "snodes": instance.secondary_nodes,
10874
        "os": instance.os,
10875
        # this happens to be the same format used for hooks
10876
        "nics": _NICListToTuple(self, instance.nics),
10877
        "disk_template": instance.disk_template,
10878
        "disks": disks,
10879
        "hypervisor": instance.hypervisor,
10880
        "network_port": instance.network_port,
10881
        "hv_instance": instance.hvparams,
10882
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10883
        "be_instance": instance.beparams,
10884
        "be_actual": cluster.FillBE(instance),
10885
        "os_instance": instance.osparams,
10886
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10887
        "serial_no": instance.serial_no,
10888
        "mtime": instance.mtime,
10889
        "ctime": instance.ctime,
10890
        "uuid": instance.uuid,
10891
        }
10892

    
10893
    return result
10894

    
10895

    
10896
class LUInstanceSetParams(LogicalUnit):
10897
  """Modifies an instances's parameters.
10898

10899
  """
10900
  HPATH = "instance-modify"
10901
  HTYPE = constants.HTYPE_INSTANCE
10902
  REQ_BGL = False
10903

    
10904
  def CheckArguments(self):
10905
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10906
            self.op.hvparams or self.op.beparams or self.op.os_name):
10907
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10908

    
10909
    if self.op.hvparams:
10910
      _CheckGlobalHvParams(self.op.hvparams)
10911

    
10912
    # Disk validation
10913
    disk_addremove = 0
10914
    for disk_op, disk_dict in self.op.disks:
10915
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10916
      if disk_op == constants.DDM_REMOVE:
10917
        disk_addremove += 1
10918
        continue
10919
      elif disk_op == constants.DDM_ADD:
10920
        disk_addremove += 1
10921
      else:
10922
        if not isinstance(disk_op, int):
10923
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10924
        if not isinstance(disk_dict, dict):
10925
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10926
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10927

    
10928
      if disk_op == constants.DDM_ADD:
10929
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10930
        if mode not in constants.DISK_ACCESS_SET:
10931
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10932
                                     errors.ECODE_INVAL)
10933
        size = disk_dict.get(constants.IDISK_SIZE, None)
10934
        if size is None:
10935
          raise errors.OpPrereqError("Required disk parameter size missing",
10936
                                     errors.ECODE_INVAL)
10937
        try:
10938
          size = int(size)
10939
        except (TypeError, ValueError), err:
10940
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10941
                                     str(err), errors.ECODE_INVAL)
10942
        disk_dict[constants.IDISK_SIZE] = size
10943
      else:
10944
        # modification of disk
10945
        if constants.IDISK_SIZE in disk_dict:
10946
          raise errors.OpPrereqError("Disk size change not possible, use"
10947
                                     " grow-disk", errors.ECODE_INVAL)
10948

    
10949
    if disk_addremove > 1:
10950
      raise errors.OpPrereqError("Only one disk add or remove operation"
10951
                                 " supported at a time", errors.ECODE_INVAL)
10952

    
10953
    if self.op.disks and self.op.disk_template is not None:
10954
      raise errors.OpPrereqError("Disk template conversion and other disk"
10955
                                 " changes not supported at the same time",
10956
                                 errors.ECODE_INVAL)
10957

    
10958
    if (self.op.disk_template and
10959
        self.op.disk_template in constants.DTS_INT_MIRROR and
10960
        self.op.remote_node is None):
10961
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10962
                                 " one requires specifying a secondary node",
10963
                                 errors.ECODE_INVAL)
10964

    
10965
    # NIC validation
10966
    nic_addremove = 0
10967
    for nic_op, nic_dict in self.op.nics:
10968
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10969
      if nic_op == constants.DDM_REMOVE:
10970
        nic_addremove += 1
10971
        continue
10972
      elif nic_op == constants.DDM_ADD:
10973
        nic_addremove += 1
10974
      else:
10975
        if not isinstance(nic_op, int):
10976
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10977
        if not isinstance(nic_dict, dict):
10978
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10979
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10980

    
10981
      # nic_dict should be a dict
10982
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10983
      if nic_ip is not None:
10984
        if nic_ip.lower() == constants.VALUE_NONE:
10985
          nic_dict[constants.INIC_IP] = None
10986
        else:
10987
          if not netutils.IPAddress.IsValid(nic_ip):
10988
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10989
                                       errors.ECODE_INVAL)
10990

    
10991
      nic_bridge = nic_dict.get("bridge", None)
10992
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10993
      if nic_bridge and nic_link:
10994
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10995
                                   " at the same time", errors.ECODE_INVAL)
10996
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10997
        nic_dict["bridge"] = None
10998
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10999
        nic_dict[constants.INIC_LINK] = None
11000

    
11001
      if nic_op == constants.DDM_ADD:
11002
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
11003
        if nic_mac is None:
11004
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
11005

    
11006
      if constants.INIC_MAC in nic_dict:
11007
        nic_mac = nic_dict[constants.INIC_MAC]
11008
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11009
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
11010

    
11011
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
11012
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
11013
                                     " modifying an existing nic",
11014
                                     errors.ECODE_INVAL)
11015

    
11016
    if nic_addremove > 1:
11017
      raise errors.OpPrereqError("Only one NIC add or remove operation"
11018
                                 " supported at a time", errors.ECODE_INVAL)
11019

    
11020
  def ExpandNames(self):
11021
    self._ExpandAndLockInstance()
11022
    # Can't even acquire node locks in shared mode as upcoming changes in
11023
    # Ganeti 2.6 will start to modify the node object on disk conversion
11024
    self.needed_locks[locking.LEVEL_NODE] = []
11025
    self.needed_locks[locking.LEVEL_NODE_RES] = []
11026
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
11027

    
11028
  def DeclareLocks(self, level):
11029
    if level == locking.LEVEL_NODE:
11030
      self._LockInstancesNodes()
11031
      if self.op.disk_template and self.op.remote_node:
11032
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
11033
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
11034
    elif level == locking.LEVEL_NODE_RES and self.op.disk_template:
11035
      # Copy node locks
11036
      self.needed_locks[locking.LEVEL_NODE_RES] = \
11037
        self.needed_locks[locking.LEVEL_NODE][:]
11038

    
11039
  def BuildHooksEnv(self):
11040
    """Build hooks env.
11041

11042
    This runs on the master, primary and secondaries.
11043

11044
    """
11045
    args = dict()
11046
    if constants.BE_MEMORY in self.be_new:
11047
      args["memory"] = self.be_new[constants.BE_MEMORY]
11048
    if constants.BE_VCPUS in self.be_new:
11049
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
11050
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
11051
    # information at all.
11052
    if self.op.nics:
11053
      args["nics"] = []
11054
      nic_override = dict(self.op.nics)
11055
      for idx, nic in enumerate(self.instance.nics):
11056
        if idx in nic_override:
11057
          this_nic_override = nic_override[idx]
11058
        else:
11059
          this_nic_override = {}
11060
        if constants.INIC_IP in this_nic_override:
11061
          ip = this_nic_override[constants.INIC_IP]
11062
        else:
11063
          ip = nic.ip
11064
        if constants.INIC_MAC in this_nic_override:
11065
          mac = this_nic_override[constants.INIC_MAC]
11066
        else:
11067
          mac = nic.mac
11068
        if idx in self.nic_pnew:
11069
          nicparams = self.nic_pnew[idx]
11070
        else:
11071
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
11072
        mode = nicparams[constants.NIC_MODE]
11073
        link = nicparams[constants.NIC_LINK]
11074
        args["nics"].append((ip, mac, mode, link))
11075
      if constants.DDM_ADD in nic_override:
11076
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
11077
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
11078
        nicparams = self.nic_pnew[constants.DDM_ADD]
11079
        mode = nicparams[constants.NIC_MODE]
11080
        link = nicparams[constants.NIC_LINK]
11081
        args["nics"].append((ip, mac, mode, link))
11082
      elif constants.DDM_REMOVE in nic_override:
11083
        del args["nics"][-1]
11084

    
11085
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
11086
    if self.op.disk_template:
11087
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
11088

    
11089
    return env
11090

    
11091
  def BuildHooksNodes(self):
11092
    """Build hooks nodes.
11093

11094
    """
11095
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
11096
    return (nl, nl)
11097

    
11098
  def CheckPrereq(self):
11099
    """Check prerequisites.
11100

11101
    This only checks the instance list against the existing names.
11102

11103
    """
11104
    # checking the new params on the primary/secondary nodes
11105

    
11106
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11107
    cluster = self.cluster = self.cfg.GetClusterInfo()
11108
    assert self.instance is not None, \
11109
      "Cannot retrieve locked instance %s" % self.op.instance_name
11110
    pnode = instance.primary_node
11111
    nodelist = list(instance.all_nodes)
11112

    
11113
    # OS change
11114
    if self.op.os_name and not self.op.force:
11115
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
11116
                      self.op.force_variant)
11117
      instance_os = self.op.os_name
11118
    else:
11119
      instance_os = instance.os
11120

    
11121
    if self.op.disk_template:
11122
      if instance.disk_template == self.op.disk_template:
11123
        raise errors.OpPrereqError("Instance already has disk template %s" %
11124
                                   instance.disk_template, errors.ECODE_INVAL)
11125

    
11126
      if (instance.disk_template,
11127
          self.op.disk_template) not in self._DISK_CONVERSIONS:
11128
        raise errors.OpPrereqError("Unsupported disk template conversion from"
11129
                                   " %s to %s" % (instance.disk_template,
11130
                                                  self.op.disk_template),
11131
                                   errors.ECODE_INVAL)
11132
      _CheckInstanceDown(self, instance, "cannot change disk template")
11133
      if self.op.disk_template in constants.DTS_INT_MIRROR:
11134
        if self.op.remote_node == pnode:
11135
          raise errors.OpPrereqError("Given new secondary node %s is the same"
11136
                                     " as the primary node of the instance" %
11137
                                     self.op.remote_node, errors.ECODE_STATE)
11138
        _CheckNodeOnline(self, self.op.remote_node)
11139
        _CheckNodeNotDrained(self, self.op.remote_node)
11140
        # FIXME: here we assume that the old instance type is DT_PLAIN
11141
        assert instance.disk_template == constants.DT_PLAIN
11142
        disks = [{constants.IDISK_SIZE: d.size,
11143
                  constants.IDISK_VG: d.logical_id[0]}
11144
                 for d in instance.disks]
11145
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
11146
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
11147

    
11148
    # hvparams processing
11149
    if self.op.hvparams:
11150
      hv_type = instance.hypervisor
11151
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
11152
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
11153
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
11154

    
11155
      # local check
11156
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
11157
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
11158
      self.hv_proposed = self.hv_new = hv_new # the new actual values
11159
      self.hv_inst = i_hvdict # the new dict (without defaults)
11160
    else:
11161
      self.hv_proposed = cluster.SimpleFillHV(instance.hypervisor, instance.os,
11162
                                              instance.hvparams)
11163
      self.hv_new = self.hv_inst = {}
11164

    
11165
    # beparams processing
11166
    if self.op.beparams:
11167
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
11168
                                   use_none=True)
11169
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
11170
      be_new = cluster.SimpleFillBE(i_bedict)
11171
      self.be_proposed = self.be_new = be_new # the new actual values
11172
      self.be_inst = i_bedict # the new dict (without defaults)
11173
    else:
11174
      self.be_new = self.be_inst = {}
11175
      self.be_proposed = cluster.SimpleFillBE(instance.beparams)
11176
    be_old = cluster.FillBE(instance)
11177

    
11178
    # CPU param validation -- checking every time a paramtere is
11179
    # changed to cover all cases where either CPU mask or vcpus have
11180
    # changed
11181
    if (constants.BE_VCPUS in self.be_proposed and
11182
        constants.HV_CPU_MASK in self.hv_proposed):
11183
      cpu_list = \
11184
        utils.ParseMultiCpuMask(self.hv_proposed[constants.HV_CPU_MASK])
11185
      # Verify mask is consistent with number of vCPUs. Can skip this
11186
      # test if only 1 entry in the CPU mask, which means same mask
11187
      # is applied to all vCPUs.
11188
      if (len(cpu_list) > 1 and
11189
          len(cpu_list) != self.be_proposed[constants.BE_VCPUS]):
11190
        raise errors.OpPrereqError("Number of vCPUs [%d] does not match the"
11191
                                   " CPU mask [%s]" %
11192
                                   (self.be_proposed[constants.BE_VCPUS],
11193
                                    self.hv_proposed[constants.HV_CPU_MASK]),
11194
                                   errors.ECODE_INVAL)
11195

    
11196
      # Only perform this test if a new CPU mask is given
11197
      if constants.HV_CPU_MASK in self.hv_new:
11198
        # Calculate the largest CPU number requested
11199
        max_requested_cpu = max(map(max, cpu_list))
11200
        # Check that all of the instance's nodes have enough physical CPUs to
11201
        # satisfy the requested CPU mask
11202
        _CheckNodesPhysicalCPUs(self, instance.all_nodes,
11203
                                max_requested_cpu + 1, instance.hypervisor)
11204

    
11205
    # osparams processing
11206
    if self.op.osparams:
11207
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
11208
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
11209
      self.os_inst = i_osdict # the new dict (without defaults)
11210
    else:
11211
      self.os_inst = {}
11212

    
11213
    self.warn = []
11214

    
11215
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
11216
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
11217
      mem_check_list = [pnode]
11218
      if be_new[constants.BE_AUTO_BALANCE]:
11219
        # either we changed auto_balance to yes or it was from before
11220
        mem_check_list.extend(instance.secondary_nodes)
11221
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
11222
                                                  instance.hypervisor)
11223
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
11224
                                         instance.hypervisor)
11225
      pninfo = nodeinfo[pnode]
11226
      msg = pninfo.fail_msg
11227
      if msg:
11228
        # Assume the primary node is unreachable and go ahead
11229
        self.warn.append("Can't get info from primary node %s: %s" %
11230
                         (pnode, msg))
11231
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
11232
        self.warn.append("Node data from primary node %s doesn't contain"
11233
                         " free memory information" % pnode)
11234
      elif instance_info.fail_msg:
11235
        self.warn.append("Can't get instance runtime information: %s" %
11236
                        instance_info.fail_msg)
11237
      else:
11238
        if instance_info.payload:
11239
          current_mem = int(instance_info.payload["memory"])
11240
        else:
11241
          # Assume instance not running
11242
          # (there is a slight race condition here, but it's not very probable,
11243
          # and we have no other way to check)
11244
          current_mem = 0
11245
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
11246
                    pninfo.payload["memory_free"])
11247
        if miss_mem > 0:
11248
          raise errors.OpPrereqError("This change will prevent the instance"
11249
                                     " from starting, due to %d MB of memory"
11250
                                     " missing on its primary node" % miss_mem,
11251
                                     errors.ECODE_NORES)
11252

    
11253
      if be_new[constants.BE_AUTO_BALANCE]:
11254
        for node, nres in nodeinfo.items():
11255
          if node not in instance.secondary_nodes:
11256
            continue
11257
          nres.Raise("Can't get info from secondary node %s" % node,
11258
                     prereq=True, ecode=errors.ECODE_STATE)
11259
          if not isinstance(nres.payload.get("memory_free", None), int):
11260
            raise errors.OpPrereqError("Secondary node %s didn't return free"
11261
                                       " memory information" % node,
11262
                                       errors.ECODE_STATE)
11263
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
11264
            raise errors.OpPrereqError("This change will prevent the instance"
11265
                                       " from failover to its secondary node"
11266
                                       " %s, due to not enough memory" % node,
11267
                                       errors.ECODE_STATE)
11268

    
11269
    # NIC processing
11270
    self.nic_pnew = {}
11271
    self.nic_pinst = {}
11272
    for nic_op, nic_dict in self.op.nics:
11273
      if nic_op == constants.DDM_REMOVE:
11274
        if not instance.nics:
11275
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
11276
                                     errors.ECODE_INVAL)
11277
        continue
11278
      if nic_op != constants.DDM_ADD:
11279
        # an existing nic
11280
        if not instance.nics:
11281
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
11282
                                     " no NICs" % nic_op,
11283
                                     errors.ECODE_INVAL)
11284
        if nic_op < 0 or nic_op >= len(instance.nics):
11285
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
11286
                                     " are 0 to %d" %
11287
                                     (nic_op, len(instance.nics) - 1),
11288
                                     errors.ECODE_INVAL)
11289
        old_nic_params = instance.nics[nic_op].nicparams
11290
        old_nic_ip = instance.nics[nic_op].ip
11291
      else:
11292
        old_nic_params = {}
11293
        old_nic_ip = None
11294

    
11295
      update_params_dict = dict([(key, nic_dict[key])
11296
                                 for key in constants.NICS_PARAMETERS
11297
                                 if key in nic_dict])
11298

    
11299
      if "bridge" in nic_dict:
11300
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
11301

    
11302
      new_nic_params = _GetUpdatedParams(old_nic_params,
11303
                                         update_params_dict)
11304
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
11305
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
11306
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
11307
      self.nic_pinst[nic_op] = new_nic_params
11308
      self.nic_pnew[nic_op] = new_filled_nic_params
11309
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
11310

    
11311
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
11312
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
11313
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
11314
        if msg:
11315
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
11316
          if self.op.force:
11317
            self.warn.append(msg)
11318
          else:
11319
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
11320
      if new_nic_mode == constants.NIC_MODE_ROUTED:
11321
        if constants.INIC_IP in nic_dict:
11322
          nic_ip = nic_dict[constants.INIC_IP]
11323
        else:
11324
          nic_ip = old_nic_ip
11325
        if nic_ip is None:
11326
          raise errors.OpPrereqError("Cannot set the nic ip to None"
11327
                                     " on a routed nic", errors.ECODE_INVAL)
11328
      if constants.INIC_MAC in nic_dict:
11329
        nic_mac = nic_dict[constants.INIC_MAC]
11330
        if nic_mac is None:
11331
          raise errors.OpPrereqError("Cannot set the nic mac to None",
11332
                                     errors.ECODE_INVAL)
11333
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
11334
          # otherwise generate the mac
11335
          nic_dict[constants.INIC_MAC] = \
11336
            self.cfg.GenerateMAC(self.proc.GetECId())
11337
        else:
11338
          # or validate/reserve the current one
11339
          try:
11340
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
11341
          except errors.ReservationError:
11342
            raise errors.OpPrereqError("MAC address %s already in use"
11343
                                       " in cluster" % nic_mac,
11344
                                       errors.ECODE_NOTUNIQUE)
11345

    
11346
    # DISK processing
11347
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
11348
      raise errors.OpPrereqError("Disk operations not supported for"
11349
                                 " diskless instances",
11350
                                 errors.ECODE_INVAL)
11351
    for disk_op, _ in self.op.disks:
11352
      if disk_op == constants.DDM_REMOVE:
11353
        if len(instance.disks) == 1:
11354
          raise errors.OpPrereqError("Cannot remove the last disk of"
11355
                                     " an instance", errors.ECODE_INVAL)
11356
        _CheckInstanceDown(self, instance, "cannot remove disks")
11357

    
11358
      if (disk_op == constants.DDM_ADD and
11359
          len(instance.disks) >= constants.MAX_DISKS):
11360
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
11361
                                   " add more" % constants.MAX_DISKS,
11362
                                   errors.ECODE_STATE)
11363
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
11364
        # an existing disk
11365
        if disk_op < 0 or disk_op >= len(instance.disks):
11366
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
11367
                                     " are 0 to %d" %
11368
                                     (disk_op, len(instance.disks)),
11369
                                     errors.ECODE_INVAL)
11370

    
11371
  def _ConvertPlainToDrbd(self, feedback_fn):
11372
    """Converts an instance from plain to drbd.
11373

11374
    """
11375
    feedback_fn("Converting template to drbd")
11376
    instance = self.instance
11377
    pnode = instance.primary_node
11378
    snode = self.op.remote_node
11379

    
11380
    assert instance.disk_template == constants.DT_PLAIN
11381

    
11382
    # create a fake disk info for _GenerateDiskTemplate
11383
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
11384
                  constants.IDISK_VG: d.logical_id[0]}
11385
                 for d in instance.disks]
11386
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
11387
                                      instance.name, pnode, [snode],
11388
                                      disk_info, None, None, 0, feedback_fn)
11389
    info = _GetInstanceInfoText(instance)
11390
    feedback_fn("Creating aditional volumes...")
11391
    # first, create the missing data and meta devices
11392
    for disk in new_disks:
11393
      # unfortunately this is... not too nice
11394
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
11395
                            info, True)
11396
      for child in disk.children:
11397
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
11398
    # at this stage, all new LVs have been created, we can rename the
11399
    # old ones
11400
    feedback_fn("Renaming original volumes...")
11401
    rename_list = [(o, n.children[0].logical_id)
11402
                   for (o, n) in zip(instance.disks, new_disks)]
11403
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
11404
    result.Raise("Failed to rename original LVs")
11405

    
11406
    feedback_fn("Initializing DRBD devices...")
11407
    # all child devices are in place, we can now create the DRBD devices
11408
    for disk in new_disks:
11409
      for node in [pnode, snode]:
11410
        f_create = node == pnode
11411
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11412

    
11413
    # at this point, the instance has been modified
11414
    instance.disk_template = constants.DT_DRBD8
11415
    instance.disks = new_disks
11416
    self.cfg.Update(instance, feedback_fn)
11417

    
11418
    # Release node locks while waiting for sync
11419
    _ReleaseLocks(self, locking.LEVEL_NODE)
11420

    
11421
    # disks are created, waiting for sync
11422
    disk_abort = not _WaitForSync(self, instance,
11423
                                  oneshot=not self.op.wait_for_sync)
11424
    if disk_abort:
11425
      raise errors.OpExecError("There are some degraded disks for"
11426
                               " this instance, please cleanup manually")
11427

    
11428
    # Node resource locks will be released by caller
11429

    
11430
  def _ConvertDrbdToPlain(self, feedback_fn):
11431
    """Converts an instance from drbd to plain.
11432

11433
    """
11434
    instance = self.instance
11435

    
11436
    assert len(instance.secondary_nodes) == 1
11437
    assert instance.disk_template == constants.DT_DRBD8
11438

    
11439
    pnode = instance.primary_node
11440
    snode = instance.secondary_nodes[0]
11441
    feedback_fn("Converting template to plain")
11442

    
11443
    old_disks = instance.disks
11444
    new_disks = [d.children[0] for d in old_disks]
11445

    
11446
    # copy over size and mode
11447
    for parent, child in zip(old_disks, new_disks):
11448
      child.size = parent.size
11449
      child.mode = parent.mode
11450

    
11451
    # update instance structure
11452
    instance.disks = new_disks
11453
    instance.disk_template = constants.DT_PLAIN
11454
    self.cfg.Update(instance, feedback_fn)
11455

    
11456
    # Release locks in case removing disks takes a while
11457
    _ReleaseLocks(self, locking.LEVEL_NODE)
11458

    
11459
    feedback_fn("Removing volumes on the secondary node...")
11460
    for disk in old_disks:
11461
      self.cfg.SetDiskID(disk, snode)
11462
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11463
      if msg:
11464
        self.LogWarning("Could not remove block device %s on node %s,"
11465
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11466

    
11467
    feedback_fn("Removing unneeded volumes on the primary node...")
11468
    for idx, disk in enumerate(old_disks):
11469
      meta = disk.children[1]
11470
      self.cfg.SetDiskID(meta, pnode)
11471
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11472
      if msg:
11473
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11474
                        " continuing anyway: %s", idx, pnode, msg)
11475

    
11476
    # Node resource locks will be released by caller
11477

    
11478
  def Exec(self, feedback_fn):
11479
    """Modifies an instance.
11480

11481
    All parameters take effect only at the next restart of the instance.
11482

11483
    """
11484
    # Process here the warnings from CheckPrereq, as we don't have a
11485
    # feedback_fn there.
11486
    for warn in self.warn:
11487
      feedback_fn("WARNING: %s" % warn)
11488

    
11489
    assert ((self.op.disk_template is None) ^
11490
            bool(self.owned_locks(locking.LEVEL_NODE_RES))), \
11491
      "Not owning any node resource locks"
11492

    
11493
    result = []
11494
    instance = self.instance
11495
    # disk changes
11496
    for disk_op, disk_dict in self.op.disks:
11497
      if disk_op == constants.DDM_REMOVE:
11498
        # remove the last disk
11499
        device = instance.disks.pop()
11500
        device_idx = len(instance.disks)
11501
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11502
          self.cfg.SetDiskID(disk, node)
11503
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11504
          if msg:
11505
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11506
                            " continuing anyway", device_idx, node, msg)
11507
        result.append(("disk/%d" % device_idx, "remove"))
11508
      elif disk_op == constants.DDM_ADD:
11509
        # add a new disk
11510
        if instance.disk_template in (constants.DT_FILE,
11511
                                        constants.DT_SHARED_FILE):
11512
          file_driver, file_path = instance.disks[0].logical_id
11513
          file_path = os.path.dirname(file_path)
11514
        else:
11515
          file_driver = file_path = None
11516
        disk_idx_base = len(instance.disks)
11517
        new_disk = _GenerateDiskTemplate(self,
11518
                                         instance.disk_template,
11519
                                         instance.name, instance.primary_node,
11520
                                         instance.secondary_nodes,
11521
                                         [disk_dict],
11522
                                         file_path,
11523
                                         file_driver,
11524
                                         disk_idx_base, feedback_fn)[0]
11525
        instance.disks.append(new_disk)
11526
        info = _GetInstanceInfoText(instance)
11527

    
11528
        logging.info("Creating volume %s for instance %s",
11529
                     new_disk.iv_name, instance.name)
11530
        # Note: this needs to be kept in sync with _CreateDisks
11531
        #HARDCODE
11532
        for node in instance.all_nodes:
11533
          f_create = node == instance.primary_node
11534
          try:
11535
            _CreateBlockDev(self, node, instance, new_disk,
11536
                            f_create, info, f_create)
11537
          except errors.OpExecError, err:
11538
            self.LogWarning("Failed to create volume %s (%s) on"
11539
                            " node %s: %s",
11540
                            new_disk.iv_name, new_disk, node, err)
11541
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11542
                       (new_disk.size, new_disk.mode)))
11543
      else:
11544
        # change a given disk
11545
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11546
        result.append(("disk.mode/%d" % disk_op,
11547
                       disk_dict[constants.IDISK_MODE]))
11548

    
11549
    if self.op.disk_template:
11550
      if __debug__:
11551
        check_nodes = set(instance.all_nodes)
11552
        if self.op.remote_node:
11553
          check_nodes.add(self.op.remote_node)
11554
        for level in [locking.LEVEL_NODE, locking.LEVEL_NODE_RES]:
11555
          owned = self.owned_locks(level)
11556
          assert not (check_nodes - owned), \
11557
            ("Not owning the correct locks, owning %r, expected at least %r" %
11558
             (owned, check_nodes))
11559

    
11560
      r_shut = _ShutdownInstanceDisks(self, instance)
11561
      if not r_shut:
11562
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11563
                                 " proceed with disk template conversion")
11564
      mode = (instance.disk_template, self.op.disk_template)
11565
      try:
11566
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11567
      except:
11568
        self.cfg.ReleaseDRBDMinors(instance.name)
11569
        raise
11570
      result.append(("disk_template", self.op.disk_template))
11571

    
11572
      assert instance.disk_template == self.op.disk_template, \
11573
        ("Expected disk template '%s', found '%s'" %
11574
         (self.op.disk_template, instance.disk_template))
11575

    
11576
    # Release node and resource locks if there are any (they might already have
11577
    # been released during disk conversion)
11578
    _ReleaseLocks(self, locking.LEVEL_NODE)
11579
    _ReleaseLocks(self, locking.LEVEL_NODE_RES)
11580

    
11581
    # NIC changes
11582
    for nic_op, nic_dict in self.op.nics:
11583
      if nic_op == constants.DDM_REMOVE:
11584
        # remove the last nic
11585
        del instance.nics[-1]
11586
        result.append(("nic.%d" % len(instance.nics), "remove"))
11587
      elif nic_op == constants.DDM_ADD:
11588
        # mac and bridge should be set, by now
11589
        mac = nic_dict[constants.INIC_MAC]
11590
        ip = nic_dict.get(constants.INIC_IP, None)
11591
        nicparams = self.nic_pinst[constants.DDM_ADD]
11592
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11593
        instance.nics.append(new_nic)
11594
        result.append(("nic.%d" % (len(instance.nics) - 1),
11595
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11596
                       (new_nic.mac, new_nic.ip,
11597
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11598
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11599
                       )))
11600
      else:
11601
        for key in (constants.INIC_MAC, constants.INIC_IP):
11602
          if key in nic_dict:
11603
            setattr(instance.nics[nic_op], key, nic_dict[key])
11604
        if nic_op in self.nic_pinst:
11605
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11606
        for key, val in nic_dict.iteritems():
11607
          result.append(("nic.%s/%d" % (key, nic_op), val))
11608

    
11609
    # hvparams changes
11610
    if self.op.hvparams:
11611
      instance.hvparams = self.hv_inst
11612
      for key, val in self.op.hvparams.iteritems():
11613
        result.append(("hv/%s" % key, val))
11614

    
11615
    # beparams changes
11616
    if self.op.beparams:
11617
      instance.beparams = self.be_inst
11618
      for key, val in self.op.beparams.iteritems():
11619
        result.append(("be/%s" % key, val))
11620

    
11621
    # OS change
11622
    if self.op.os_name:
11623
      instance.os = self.op.os_name
11624

    
11625
    # osparams changes
11626
    if self.op.osparams:
11627
      instance.osparams = self.os_inst
11628
      for key, val in self.op.osparams.iteritems():
11629
        result.append(("os/%s" % key, val))
11630

    
11631
    self.cfg.Update(instance, feedback_fn)
11632

    
11633
    assert not (self.owned_locks(locking.LEVEL_NODE_RES) or
11634
                self.owned_locks(locking.LEVEL_NODE)), \
11635
      "All node locks should have been released by now"
11636

    
11637
    return result
11638

    
11639
  _DISK_CONVERSIONS = {
11640
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11641
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11642
    }
11643

    
11644

    
11645
class LUInstanceChangeGroup(LogicalUnit):
11646
  HPATH = "instance-change-group"
11647
  HTYPE = constants.HTYPE_INSTANCE
11648
  REQ_BGL = False
11649

    
11650
  def ExpandNames(self):
11651
    self.share_locks = _ShareAll()
11652
    self.needed_locks = {
11653
      locking.LEVEL_NODEGROUP: [],
11654
      locking.LEVEL_NODE: [],
11655
      }
11656

    
11657
    self._ExpandAndLockInstance()
11658

    
11659
    if self.op.target_groups:
11660
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11661
                                  self.op.target_groups)
11662
    else:
11663
      self.req_target_uuids = None
11664

    
11665
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11666

    
11667
  def DeclareLocks(self, level):
11668
    if level == locking.LEVEL_NODEGROUP:
11669
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11670

    
11671
      if self.req_target_uuids:
11672
        lock_groups = set(self.req_target_uuids)
11673

    
11674
        # Lock all groups used by instance optimistically; this requires going
11675
        # via the node before it's locked, requiring verification later on
11676
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11677
        lock_groups.update(instance_groups)
11678
      else:
11679
        # No target groups, need to lock all of them
11680
        lock_groups = locking.ALL_SET
11681

    
11682
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11683

    
11684
    elif level == locking.LEVEL_NODE:
11685
      if self.req_target_uuids:
11686
        # Lock all nodes used by instances
11687
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11688
        self._LockInstancesNodes()
11689

    
11690
        # Lock all nodes in all potential target groups
11691
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11692
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11693
        member_nodes = [node_name
11694
                        for group in lock_groups
11695
                        for node_name in self.cfg.GetNodeGroup(group).members]
11696
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11697
      else:
11698
        # Lock all nodes as all groups are potential targets
11699
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11700

    
11701
  def CheckPrereq(self):
11702
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11703
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11704
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11705

    
11706
    assert (self.req_target_uuids is None or
11707
            owned_groups.issuperset(self.req_target_uuids))
11708
    assert owned_instances == set([self.op.instance_name])
11709

    
11710
    # Get instance information
11711
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11712

    
11713
    # Check if node groups for locked instance are still correct
11714
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11715
      ("Instance %s's nodes changed while we kept the lock" %
11716
       self.op.instance_name)
11717

    
11718
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11719
                                           owned_groups)
11720

    
11721
    if self.req_target_uuids:
11722
      # User requested specific target groups
11723
      self.target_uuids = self.req_target_uuids
11724
    else:
11725
      # All groups except those used by the instance are potential targets
11726
      self.target_uuids = owned_groups - inst_groups
11727

    
11728
    conflicting_groups = self.target_uuids & inst_groups
11729
    if conflicting_groups:
11730
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11731
                                 " used by the instance '%s'" %
11732
                                 (utils.CommaJoin(conflicting_groups),
11733
                                  self.op.instance_name),
11734
                                 errors.ECODE_INVAL)
11735

    
11736
    if not self.target_uuids:
11737
      raise errors.OpPrereqError("There are no possible target groups",
11738
                                 errors.ECODE_INVAL)
11739

    
11740
  def BuildHooksEnv(self):
11741
    """Build hooks env.
11742

11743
    """
11744
    assert self.target_uuids
11745

    
11746
    env = {
11747
      "TARGET_GROUPS": " ".join(self.target_uuids),
11748
      }
11749

    
11750
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11751

    
11752
    return env
11753

    
11754
  def BuildHooksNodes(self):
11755
    """Build hooks nodes.
11756

11757
    """
11758
    mn = self.cfg.GetMasterNode()
11759
    return ([mn], [mn])
11760

    
11761
  def Exec(self, feedback_fn):
11762
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11763

    
11764
    assert instances == [self.op.instance_name], "Instance not locked"
11765

    
11766
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11767
                     instances=instances, target_groups=list(self.target_uuids))
11768

    
11769
    ial.Run(self.op.iallocator)
11770

    
11771
    if not ial.success:
11772
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11773
                                 " instance '%s' using iallocator '%s': %s" %
11774
                                 (self.op.instance_name, self.op.iallocator,
11775
                                  ial.info),
11776
                                 errors.ECODE_NORES)
11777

    
11778
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11779

    
11780
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11781
                 " instance '%s'", len(jobs), self.op.instance_name)
11782

    
11783
    return ResultWithJobs(jobs)
11784

    
11785

    
11786
class LUBackupQuery(NoHooksLU):
11787
  """Query the exports list
11788

11789
  """
11790
  REQ_BGL = False
11791

    
11792
  def ExpandNames(self):
11793
    self.needed_locks = {}
11794
    self.share_locks[locking.LEVEL_NODE] = 1
11795
    if not self.op.nodes:
11796
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11797
    else:
11798
      self.needed_locks[locking.LEVEL_NODE] = \
11799
        _GetWantedNodes(self, self.op.nodes)
11800

    
11801
  def Exec(self, feedback_fn):
11802
    """Compute the list of all the exported system images.
11803

11804
    @rtype: dict
11805
    @return: a dictionary with the structure node->(export-list)
11806
        where export-list is a list of the instances exported on
11807
        that node.
11808

11809
    """
11810
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11811
    rpcresult = self.rpc.call_export_list(self.nodes)
11812
    result = {}
11813
    for node in rpcresult:
11814
      if rpcresult[node].fail_msg:
11815
        result[node] = False
11816
      else:
11817
        result[node] = rpcresult[node].payload
11818

    
11819
    return result
11820

    
11821

    
11822
class LUBackupPrepare(NoHooksLU):
11823
  """Prepares an instance for an export and returns useful information.
11824

11825
  """
11826
  REQ_BGL = False
11827

    
11828
  def ExpandNames(self):
11829
    self._ExpandAndLockInstance()
11830

    
11831
  def CheckPrereq(self):
11832
    """Check prerequisites.
11833

11834
    """
11835
    instance_name = self.op.instance_name
11836

    
11837
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11838
    assert self.instance is not None, \
11839
          "Cannot retrieve locked instance %s" % self.op.instance_name
11840
    _CheckNodeOnline(self, self.instance.primary_node)
11841

    
11842
    self._cds = _GetClusterDomainSecret()
11843

    
11844
  def Exec(self, feedback_fn):
11845
    """Prepares an instance for an export.
11846

11847
    """
11848
    instance = self.instance
11849

    
11850
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11851
      salt = utils.GenerateSecret(8)
11852

    
11853
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11854
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11855
                                              constants.RIE_CERT_VALIDITY)
11856
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11857

    
11858
      (name, cert_pem) = result.payload
11859

    
11860
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11861
                                             cert_pem)
11862

    
11863
      return {
11864
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11865
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11866
                          salt),
11867
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11868
        }
11869

    
11870
    return None
11871

    
11872

    
11873
class LUBackupExport(LogicalUnit):
11874
  """Export an instance to an image in the cluster.
11875

11876
  """
11877
  HPATH = "instance-export"
11878
  HTYPE = constants.HTYPE_INSTANCE
11879
  REQ_BGL = False
11880

    
11881
  def CheckArguments(self):
11882
    """Check the arguments.
11883

11884
    """
11885
    self.x509_key_name = self.op.x509_key_name
11886
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11887

    
11888
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11889
      if not self.x509_key_name:
11890
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11891
                                   errors.ECODE_INVAL)
11892

    
11893
      if not self.dest_x509_ca_pem:
11894
        raise errors.OpPrereqError("Missing destination X509 CA",
11895
                                   errors.ECODE_INVAL)
11896

    
11897
  def ExpandNames(self):
11898
    self._ExpandAndLockInstance()
11899

    
11900
    # Lock all nodes for local exports
11901
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11902
      # FIXME: lock only instance primary and destination node
11903
      #
11904
      # Sad but true, for now we have do lock all nodes, as we don't know where
11905
      # the previous export might be, and in this LU we search for it and
11906
      # remove it from its current node. In the future we could fix this by:
11907
      #  - making a tasklet to search (share-lock all), then create the
11908
      #    new one, then one to remove, after
11909
      #  - removing the removal operation altogether
11910
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11911

    
11912
  def DeclareLocks(self, level):
11913
    """Last minute lock declaration."""
11914
    # All nodes are locked anyway, so nothing to do here.
11915

    
11916
  def BuildHooksEnv(self):
11917
    """Build hooks env.
11918

11919
    This will run on the master, primary node and target node.
11920

11921
    """
11922
    env = {
11923
      "EXPORT_MODE": self.op.mode,
11924
      "EXPORT_NODE": self.op.target_node,
11925
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11926
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11927
      # TODO: Generic function for boolean env variables
11928
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11929
      }
11930

    
11931
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11932

    
11933
    return env
11934

    
11935
  def BuildHooksNodes(self):
11936
    """Build hooks nodes.
11937

11938
    """
11939
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11940

    
11941
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11942
      nl.append(self.op.target_node)
11943

    
11944
    return (nl, nl)
11945

    
11946
  def CheckPrereq(self):
11947
    """Check prerequisites.
11948

11949
    This checks that the instance and node names are valid.
11950

11951
    """
11952
    instance_name = self.op.instance_name
11953

    
11954
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11955
    assert self.instance is not None, \
11956
          "Cannot retrieve locked instance %s" % self.op.instance_name
11957
    _CheckNodeOnline(self, self.instance.primary_node)
11958

    
11959
    if (self.op.remove_instance and self.instance.admin_up and
11960
        not self.op.shutdown):
11961
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11962
                                 " down before")
11963

    
11964
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11965
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11966
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11967
      assert self.dst_node is not None
11968

    
11969
      _CheckNodeOnline(self, self.dst_node.name)
11970
      _CheckNodeNotDrained(self, self.dst_node.name)
11971

    
11972
      self._cds = None
11973
      self.dest_disk_info = None
11974
      self.dest_x509_ca = None
11975

    
11976
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11977
      self.dst_node = None
11978

    
11979
      if len(self.op.target_node) != len(self.instance.disks):
11980
        raise errors.OpPrereqError(("Received destination information for %s"
11981
                                    " disks, but instance %s has %s disks") %
11982
                                   (len(self.op.target_node), instance_name,
11983
                                    len(self.instance.disks)),
11984
                                   errors.ECODE_INVAL)
11985

    
11986
      cds = _GetClusterDomainSecret()
11987

    
11988
      # Check X509 key name
11989
      try:
11990
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11991
      except (TypeError, ValueError), err:
11992
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11993

    
11994
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11995
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11996
                                   errors.ECODE_INVAL)
11997

    
11998
      # Load and verify CA
11999
      try:
12000
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
12001
      except OpenSSL.crypto.Error, err:
12002
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
12003
                                   (err, ), errors.ECODE_INVAL)
12004

    
12005
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
12006
      if errcode is not None:
12007
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
12008
                                   (msg, ), errors.ECODE_INVAL)
12009

    
12010
      self.dest_x509_ca = cert
12011

    
12012
      # Verify target information
12013
      disk_info = []
12014
      for idx, disk_data in enumerate(self.op.target_node):
12015
        try:
12016
          (host, port, magic) = \
12017
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
12018
        except errors.GenericError, err:
12019
          raise errors.OpPrereqError("Target info for disk %s: %s" %
12020
                                     (idx, err), errors.ECODE_INVAL)
12021

    
12022
        disk_info.append((host, port, magic))
12023

    
12024
      assert len(disk_info) == len(self.op.target_node)
12025
      self.dest_disk_info = disk_info
12026

    
12027
    else:
12028
      raise errors.ProgrammerError("Unhandled export mode %r" %
12029
                                   self.op.mode)
12030

    
12031
    # instance disk type verification
12032
    # TODO: Implement export support for file-based disks
12033
    for disk in self.instance.disks:
12034
      if disk.dev_type == constants.LD_FILE:
12035
        raise errors.OpPrereqError("Export not supported for instances with"
12036
                                   " file-based disks", errors.ECODE_INVAL)
12037

    
12038
  def _CleanupExports(self, feedback_fn):
12039
    """Removes exports of current instance from all other nodes.
12040

12041
    If an instance in a cluster with nodes A..D was exported to node C, its
12042
    exports will be removed from the nodes A, B and D.
12043

12044
    """
12045
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
12046

    
12047
    nodelist = self.cfg.GetNodeList()
12048
    nodelist.remove(self.dst_node.name)
12049

    
12050
    # on one-node clusters nodelist will be empty after the removal
12051
    # if we proceed the backup would be removed because OpBackupQuery
12052
    # substitutes an empty list with the full cluster node list.
12053
    iname = self.instance.name
12054
    if nodelist:
12055
      feedback_fn("Removing old exports for instance %s" % iname)
12056
      exportlist = self.rpc.call_export_list(nodelist)
12057
      for node in exportlist:
12058
        if exportlist[node].fail_msg:
12059
          continue
12060
        if iname in exportlist[node].payload:
12061
          msg = self.rpc.call_export_remove(node, iname).fail_msg
12062
          if msg:
12063
            self.LogWarning("Could not remove older export for instance %s"
12064
                            " on node %s: %s", iname, node, msg)
12065

    
12066
  def Exec(self, feedback_fn):
12067
    """Export an instance to an image in the cluster.
12068

12069
    """
12070
    assert self.op.mode in constants.EXPORT_MODES
12071

    
12072
    instance = self.instance
12073
    src_node = instance.primary_node
12074

    
12075
    if self.op.shutdown:
12076
      # shutdown the instance, but not the disks
12077
      feedback_fn("Shutting down instance %s" % instance.name)
12078
      result = self.rpc.call_instance_shutdown(src_node, instance,
12079
                                               self.op.shutdown_timeout)
12080
      # TODO: Maybe ignore failures if ignore_remove_failures is set
12081
      result.Raise("Could not shutdown instance %s on"
12082
                   " node %s" % (instance.name, src_node))
12083

    
12084
    # set the disks ID correctly since call_instance_start needs the
12085
    # correct drbd minor to create the symlinks
12086
    for disk in instance.disks:
12087
      self.cfg.SetDiskID(disk, src_node)
12088

    
12089
    activate_disks = (not instance.admin_up)
12090

    
12091
    if activate_disks:
12092
      # Activate the instance disks if we'exporting a stopped instance
12093
      feedback_fn("Activating disks for %s" % instance.name)
12094
      _StartInstanceDisks(self, instance, None)
12095

    
12096
    try:
12097
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
12098
                                                     instance)
12099

    
12100
      helper.CreateSnapshots()
12101
      try:
12102
        if (self.op.shutdown and instance.admin_up and
12103
            not self.op.remove_instance):
12104
          assert not activate_disks
12105
          feedback_fn("Starting instance %s" % instance.name)
12106
          result = self.rpc.call_instance_start(src_node,
12107
                                                (instance, None, None), False)
12108
          msg = result.fail_msg
12109
          if msg:
12110
            feedback_fn("Failed to start instance: %s" % msg)
12111
            _ShutdownInstanceDisks(self, instance)
12112
            raise errors.OpExecError("Could not start instance: %s" % msg)
12113

    
12114
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
12115
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
12116
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
12117
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
12118
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
12119

    
12120
          (key_name, _, _) = self.x509_key_name
12121

    
12122
          dest_ca_pem = \
12123
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
12124
                                            self.dest_x509_ca)
12125

    
12126
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
12127
                                                     key_name, dest_ca_pem,
12128
                                                     timeouts)
12129
      finally:
12130
        helper.Cleanup()
12131

    
12132
      # Check for backwards compatibility
12133
      assert len(dresults) == len(instance.disks)
12134
      assert compat.all(isinstance(i, bool) for i in dresults), \
12135
             "Not all results are boolean: %r" % dresults
12136

    
12137
    finally:
12138
      if activate_disks:
12139
        feedback_fn("Deactivating disks for %s" % instance.name)
12140
        _ShutdownInstanceDisks(self, instance)
12141

    
12142
    if not (compat.all(dresults) and fin_resu):
12143
      failures = []
12144
      if not fin_resu:
12145
        failures.append("export finalization")
12146
      if not compat.all(dresults):
12147
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
12148
                               if not dsk)
12149
        failures.append("disk export: disk(s) %s" % fdsk)
12150

    
12151
      raise errors.OpExecError("Export failed, errors in %s" %
12152
                               utils.CommaJoin(failures))
12153

    
12154
    # At this point, the export was successful, we can cleanup/finish
12155

    
12156
    # Remove instance if requested
12157
    if self.op.remove_instance:
12158
      feedback_fn("Removing instance %s" % instance.name)
12159
      _RemoveInstance(self, feedback_fn, instance,
12160
                      self.op.ignore_remove_failures)
12161

    
12162
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
12163
      self._CleanupExports(feedback_fn)
12164

    
12165
    return fin_resu, dresults
12166

    
12167

    
12168
class LUBackupRemove(NoHooksLU):
12169
  """Remove exports related to the named instance.
12170

12171
  """
12172
  REQ_BGL = False
12173

    
12174
  def ExpandNames(self):
12175
    self.needed_locks = {}
12176
    # We need all nodes to be locked in order for RemoveExport to work, but we
12177
    # don't need to lock the instance itself, as nothing will happen to it (and
12178
    # we can remove exports also for a removed instance)
12179
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
12180

    
12181
  def Exec(self, feedback_fn):
12182
    """Remove any export.
12183

12184
    """
12185
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
12186
    # If the instance was not found we'll try with the name that was passed in.
12187
    # This will only work if it was an FQDN, though.
12188
    fqdn_warn = False
12189
    if not instance_name:
12190
      fqdn_warn = True
12191
      instance_name = self.op.instance_name
12192

    
12193
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
12194
    exportlist = self.rpc.call_export_list(locked_nodes)
12195
    found = False
12196
    for node in exportlist:
12197
      msg = exportlist[node].fail_msg
12198
      if msg:
12199
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
12200
        continue
12201
      if instance_name in exportlist[node].payload:
12202
        found = True
12203
        result = self.rpc.call_export_remove(node, instance_name)
12204
        msg = result.fail_msg
12205
        if msg:
12206
          logging.error("Could not remove export for instance %s"
12207
                        " on node %s: %s", instance_name, node, msg)
12208

    
12209
    if fqdn_warn and not found:
12210
      feedback_fn("Export not found. If trying to remove an export belonging"
12211
                  " to a deleted instance please use its Fully Qualified"
12212
                  " Domain Name.")
12213

    
12214

    
12215
class LUGroupAdd(LogicalUnit):
12216
  """Logical unit for creating node groups.
12217

12218
  """
12219
  HPATH = "group-add"
12220
  HTYPE = constants.HTYPE_GROUP
12221
  REQ_BGL = False
12222

    
12223
  def ExpandNames(self):
12224
    # We need the new group's UUID here so that we can create and acquire the
12225
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
12226
    # that it should not check whether the UUID exists in the configuration.
12227
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
12228
    self.needed_locks = {}
12229
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12230

    
12231
  def CheckPrereq(self):
12232
    """Check prerequisites.
12233

12234
    This checks that the given group name is not an existing node group
12235
    already.
12236

12237
    """
12238
    try:
12239
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12240
    except errors.OpPrereqError:
12241
      pass
12242
    else:
12243
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
12244
                                 " node group (UUID: %s)" %
12245
                                 (self.op.group_name, existing_uuid),
12246
                                 errors.ECODE_EXISTS)
12247

    
12248
    if self.op.ndparams:
12249
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12250

    
12251
  def BuildHooksEnv(self):
12252
    """Build hooks env.
12253

12254
    """
12255
    return {
12256
      "GROUP_NAME": self.op.group_name,
12257
      }
12258

    
12259
  def BuildHooksNodes(self):
12260
    """Build hooks nodes.
12261

12262
    """
12263
    mn = self.cfg.GetMasterNode()
12264
    return ([mn], [mn])
12265

    
12266
  def Exec(self, feedback_fn):
12267
    """Add the node group to the cluster.
12268

12269
    """
12270
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
12271
                                  uuid=self.group_uuid,
12272
                                  alloc_policy=self.op.alloc_policy,
12273
                                  ndparams=self.op.ndparams)
12274

    
12275
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
12276
    del self.remove_locks[locking.LEVEL_NODEGROUP]
12277

    
12278

    
12279
class LUGroupAssignNodes(NoHooksLU):
12280
  """Logical unit for assigning nodes to groups.
12281

12282
  """
12283
  REQ_BGL = False
12284

    
12285
  def ExpandNames(self):
12286
    # These raise errors.OpPrereqError on their own:
12287
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12288
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
12289

    
12290
    # We want to lock all the affected nodes and groups. We have readily
12291
    # available the list of nodes, and the *destination* group. To gather the
12292
    # list of "source" groups, we need to fetch node information later on.
12293
    self.needed_locks = {
12294
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
12295
      locking.LEVEL_NODE: self.op.nodes,
12296
      }
12297

    
12298
  def DeclareLocks(self, level):
12299
    if level == locking.LEVEL_NODEGROUP:
12300
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
12301

    
12302
      # Try to get all affected nodes' groups without having the group or node
12303
      # lock yet. Needs verification later in the code flow.
12304
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
12305

    
12306
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
12307

    
12308
  def CheckPrereq(self):
12309
    """Check prerequisites.
12310

12311
    """
12312
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
12313
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
12314
            frozenset(self.op.nodes))
12315

    
12316
    expected_locks = (set([self.group_uuid]) |
12317
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
12318
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
12319
    if actual_locks != expected_locks:
12320
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
12321
                               " current groups are '%s', used to be '%s'" %
12322
                               (utils.CommaJoin(expected_locks),
12323
                                utils.CommaJoin(actual_locks)))
12324

    
12325
    self.node_data = self.cfg.GetAllNodesInfo()
12326
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12327
    instance_data = self.cfg.GetAllInstancesInfo()
12328

    
12329
    if self.group is None:
12330
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12331
                               (self.op.group_name, self.group_uuid))
12332

    
12333
    (new_splits, previous_splits) = \
12334
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
12335
                                             for node in self.op.nodes],
12336
                                            self.node_data, instance_data)
12337

    
12338
    if new_splits:
12339
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
12340

    
12341
      if not self.op.force:
12342
        raise errors.OpExecError("The following instances get split by this"
12343
                                 " change and --force was not given: %s" %
12344
                                 fmt_new_splits)
12345
      else:
12346
        self.LogWarning("This operation will split the following instances: %s",
12347
                        fmt_new_splits)
12348

    
12349
        if previous_splits:
12350
          self.LogWarning("In addition, these already-split instances continue"
12351
                          " to be split across groups: %s",
12352
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
12353

    
12354
  def Exec(self, feedback_fn):
12355
    """Assign nodes to a new group.
12356

12357
    """
12358
    for node in self.op.nodes:
12359
      self.node_data[node].group = self.group_uuid
12360

    
12361
    # FIXME: Depends on side-effects of modifying the result of
12362
    # C{cfg.GetAllNodesInfo}
12363

    
12364
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
12365

    
12366
  @staticmethod
12367
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
12368
    """Check for split instances after a node assignment.
12369

12370
    This method considers a series of node assignments as an atomic operation,
12371
    and returns information about split instances after applying the set of
12372
    changes.
12373

12374
    In particular, it returns information about newly split instances, and
12375
    instances that were already split, and remain so after the change.
12376

12377
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
12378
    considered.
12379

12380
    @type changes: list of (node_name, new_group_uuid) pairs.
12381
    @param changes: list of node assignments to consider.
12382
    @param node_data: a dict with data for all nodes
12383
    @param instance_data: a dict with all instances to consider
12384
    @rtype: a two-tuple
12385
    @return: a list of instances that were previously okay and result split as a
12386
      consequence of this change, and a list of instances that were previously
12387
      split and this change does not fix.
12388

12389
    """
12390
    changed_nodes = dict((node, group) for node, group in changes
12391
                         if node_data[node].group != group)
12392

    
12393
    all_split_instances = set()
12394
    previously_split_instances = set()
12395

    
12396
    def InstanceNodes(instance):
12397
      return [instance.primary_node] + list(instance.secondary_nodes)
12398

    
12399
    for inst in instance_data.values():
12400
      if inst.disk_template not in constants.DTS_INT_MIRROR:
12401
        continue
12402

    
12403
      instance_nodes = InstanceNodes(inst)
12404

    
12405
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
12406
        previously_split_instances.add(inst.name)
12407

    
12408
      if len(set(changed_nodes.get(node, node_data[node].group)
12409
                 for node in instance_nodes)) > 1:
12410
        all_split_instances.add(inst.name)
12411

    
12412
    return (list(all_split_instances - previously_split_instances),
12413
            list(previously_split_instances & all_split_instances))
12414

    
12415

    
12416
class _GroupQuery(_QueryBase):
12417
  FIELDS = query.GROUP_FIELDS
12418

    
12419
  def ExpandNames(self, lu):
12420
    lu.needed_locks = {}
12421

    
12422
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
12423
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
12424

    
12425
    if not self.names:
12426
      self.wanted = [name_to_uuid[name]
12427
                     for name in utils.NiceSort(name_to_uuid.keys())]
12428
    else:
12429
      # Accept names to be either names or UUIDs.
12430
      missing = []
12431
      self.wanted = []
12432
      all_uuid = frozenset(self._all_groups.keys())
12433

    
12434
      for name in self.names:
12435
        if name in all_uuid:
12436
          self.wanted.append(name)
12437
        elif name in name_to_uuid:
12438
          self.wanted.append(name_to_uuid[name])
12439
        else:
12440
          missing.append(name)
12441

    
12442
      if missing:
12443
        raise errors.OpPrereqError("Some groups do not exist: %s" %
12444
                                   utils.CommaJoin(missing),
12445
                                   errors.ECODE_NOENT)
12446

    
12447
  def DeclareLocks(self, lu, level):
12448
    pass
12449

    
12450
  def _GetQueryData(self, lu):
12451
    """Computes the list of node groups and their attributes.
12452

12453
    """
12454
    do_nodes = query.GQ_NODE in self.requested_data
12455
    do_instances = query.GQ_INST in self.requested_data
12456

    
12457
    group_to_nodes = None
12458
    group_to_instances = None
12459

    
12460
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12461
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12462
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12463
    # instance->node. Hence, we will need to process nodes even if we only need
12464
    # instance information.
12465
    if do_nodes or do_instances:
12466
      all_nodes = lu.cfg.GetAllNodesInfo()
12467
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12468
      node_to_group = {}
12469

    
12470
      for node in all_nodes.values():
12471
        if node.group in group_to_nodes:
12472
          group_to_nodes[node.group].append(node.name)
12473
          node_to_group[node.name] = node.group
12474

    
12475
      if do_instances:
12476
        all_instances = lu.cfg.GetAllInstancesInfo()
12477
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12478

    
12479
        for instance in all_instances.values():
12480
          node = instance.primary_node
12481
          if node in node_to_group:
12482
            group_to_instances[node_to_group[node]].append(instance.name)
12483

    
12484
        if not do_nodes:
12485
          # Do not pass on node information if it was not requested.
12486
          group_to_nodes = None
12487

    
12488
    return query.GroupQueryData([self._all_groups[uuid]
12489
                                 for uuid in self.wanted],
12490
                                group_to_nodes, group_to_instances)
12491

    
12492

    
12493
class LUGroupQuery(NoHooksLU):
12494
  """Logical unit for querying node groups.
12495

12496
  """
12497
  REQ_BGL = False
12498

    
12499
  def CheckArguments(self):
12500
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12501
                          self.op.output_fields, False)
12502

    
12503
  def ExpandNames(self):
12504
    self.gq.ExpandNames(self)
12505

    
12506
  def DeclareLocks(self, level):
12507
    self.gq.DeclareLocks(self, level)
12508

    
12509
  def Exec(self, feedback_fn):
12510
    return self.gq.OldStyleQuery(self)
12511

    
12512

    
12513
class LUGroupSetParams(LogicalUnit):
12514
  """Modifies the parameters of a node group.
12515

12516
  """
12517
  HPATH = "group-modify"
12518
  HTYPE = constants.HTYPE_GROUP
12519
  REQ_BGL = False
12520

    
12521
  def CheckArguments(self):
12522
    all_changes = [
12523
      self.op.ndparams,
12524
      self.op.alloc_policy,
12525
      ]
12526

    
12527
    if all_changes.count(None) == len(all_changes):
12528
      raise errors.OpPrereqError("Please pass at least one modification",
12529
                                 errors.ECODE_INVAL)
12530

    
12531
  def ExpandNames(self):
12532
    # This raises errors.OpPrereqError on its own:
12533
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12534

    
12535
    self.needed_locks = {
12536
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12537
      }
12538

    
12539
  def CheckPrereq(self):
12540
    """Check prerequisites.
12541

12542
    """
12543
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12544

    
12545
    if self.group is None:
12546
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12547
                               (self.op.group_name, self.group_uuid))
12548

    
12549
    if self.op.ndparams:
12550
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12551
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12552
      self.new_ndparams = new_ndparams
12553

    
12554
  def BuildHooksEnv(self):
12555
    """Build hooks env.
12556

12557
    """
12558
    return {
12559
      "GROUP_NAME": self.op.group_name,
12560
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12561
      }
12562

    
12563
  def BuildHooksNodes(self):
12564
    """Build hooks nodes.
12565

12566
    """
12567
    mn = self.cfg.GetMasterNode()
12568
    return ([mn], [mn])
12569

    
12570
  def Exec(self, feedback_fn):
12571
    """Modifies the node group.
12572

12573
    """
12574
    result = []
12575

    
12576
    if self.op.ndparams:
12577
      self.group.ndparams = self.new_ndparams
12578
      result.append(("ndparams", str(self.group.ndparams)))
12579

    
12580
    if self.op.alloc_policy:
12581
      self.group.alloc_policy = self.op.alloc_policy
12582

    
12583
    self.cfg.Update(self.group, feedback_fn)
12584
    return result
12585

    
12586

    
12587
class LUGroupRemove(LogicalUnit):
12588
  HPATH = "group-remove"
12589
  HTYPE = constants.HTYPE_GROUP
12590
  REQ_BGL = False
12591

    
12592
  def ExpandNames(self):
12593
    # This will raises errors.OpPrereqError on its own:
12594
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12595
    self.needed_locks = {
12596
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12597
      }
12598

    
12599
  def CheckPrereq(self):
12600
    """Check prerequisites.
12601

12602
    This checks that the given group name exists as a node group, that is
12603
    empty (i.e., contains no nodes), and that is not the last group of the
12604
    cluster.
12605

12606
    """
12607
    # Verify that the group is empty.
12608
    group_nodes = [node.name
12609
                   for node in self.cfg.GetAllNodesInfo().values()
12610
                   if node.group == self.group_uuid]
12611

    
12612
    if group_nodes:
12613
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12614
                                 " nodes: %s" %
12615
                                 (self.op.group_name,
12616
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12617
                                 errors.ECODE_STATE)
12618

    
12619
    # Verify the cluster would not be left group-less.
12620
    if len(self.cfg.GetNodeGroupList()) == 1:
12621
      raise errors.OpPrereqError("Group '%s' is the only group,"
12622
                                 " cannot be removed" %
12623
                                 self.op.group_name,
12624
                                 errors.ECODE_STATE)
12625

    
12626
  def BuildHooksEnv(self):
12627
    """Build hooks env.
12628

12629
    """
12630
    return {
12631
      "GROUP_NAME": self.op.group_name,
12632
      }
12633

    
12634
  def BuildHooksNodes(self):
12635
    """Build hooks nodes.
12636

12637
    """
12638
    mn = self.cfg.GetMasterNode()
12639
    return ([mn], [mn])
12640

    
12641
  def Exec(self, feedback_fn):
12642
    """Remove the node group.
12643

12644
    """
12645
    try:
12646
      self.cfg.RemoveNodeGroup(self.group_uuid)
12647
    except errors.ConfigurationError:
12648
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12649
                               (self.op.group_name, self.group_uuid))
12650

    
12651
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12652

    
12653

    
12654
class LUGroupRename(LogicalUnit):
12655
  HPATH = "group-rename"
12656
  HTYPE = constants.HTYPE_GROUP
12657
  REQ_BGL = False
12658

    
12659
  def ExpandNames(self):
12660
    # This raises errors.OpPrereqError on its own:
12661
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12662

    
12663
    self.needed_locks = {
12664
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12665
      }
12666

    
12667
  def CheckPrereq(self):
12668
    """Check prerequisites.
12669

12670
    Ensures requested new name is not yet used.
12671

12672
    """
12673
    try:
12674
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12675
    except errors.OpPrereqError:
12676
      pass
12677
    else:
12678
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12679
                                 " node group (UUID: %s)" %
12680
                                 (self.op.new_name, new_name_uuid),
12681
                                 errors.ECODE_EXISTS)
12682

    
12683
  def BuildHooksEnv(self):
12684
    """Build hooks env.
12685

12686
    """
12687
    return {
12688
      "OLD_NAME": self.op.group_name,
12689
      "NEW_NAME": self.op.new_name,
12690
      }
12691

    
12692
  def BuildHooksNodes(self):
12693
    """Build hooks nodes.
12694

12695
    """
12696
    mn = self.cfg.GetMasterNode()
12697

    
12698
    all_nodes = self.cfg.GetAllNodesInfo()
12699
    all_nodes.pop(mn, None)
12700

    
12701
    run_nodes = [mn]
12702
    run_nodes.extend(node.name for node in all_nodes.values()
12703
                     if node.group == self.group_uuid)
12704

    
12705
    return (run_nodes, run_nodes)
12706

    
12707
  def Exec(self, feedback_fn):
12708
    """Rename the node group.
12709

12710
    """
12711
    group = self.cfg.GetNodeGroup(self.group_uuid)
12712

    
12713
    if group is None:
12714
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12715
                               (self.op.group_name, self.group_uuid))
12716

    
12717
    group.name = self.op.new_name
12718
    self.cfg.Update(group, feedback_fn)
12719

    
12720
    return self.op.new_name
12721

    
12722

    
12723
class LUGroupEvacuate(LogicalUnit):
12724
  HPATH = "group-evacuate"
12725
  HTYPE = constants.HTYPE_GROUP
12726
  REQ_BGL = False
12727

    
12728
  def ExpandNames(self):
12729
    # This raises errors.OpPrereqError on its own:
12730
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12731

    
12732
    if self.op.target_groups:
12733
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12734
                                  self.op.target_groups)
12735
    else:
12736
      self.req_target_uuids = []
12737

    
12738
    if self.group_uuid in self.req_target_uuids:
12739
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12740
                                 " as a target group (targets are %s)" %
12741
                                 (self.group_uuid,
12742
                                  utils.CommaJoin(self.req_target_uuids)),
12743
                                 errors.ECODE_INVAL)
12744

    
12745
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12746

    
12747
    self.share_locks = _ShareAll()
12748
    self.needed_locks = {
12749
      locking.LEVEL_INSTANCE: [],
12750
      locking.LEVEL_NODEGROUP: [],
12751
      locking.LEVEL_NODE: [],
12752
      }
12753

    
12754
  def DeclareLocks(self, level):
12755
    if level == locking.LEVEL_INSTANCE:
12756
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12757

    
12758
      # Lock instances optimistically, needs verification once node and group
12759
      # locks have been acquired
12760
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12761
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12762

    
12763
    elif level == locking.LEVEL_NODEGROUP:
12764
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12765

    
12766
      if self.req_target_uuids:
12767
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12768

    
12769
        # Lock all groups used by instances optimistically; this requires going
12770
        # via the node before it's locked, requiring verification later on
12771
        lock_groups.update(group_uuid
12772
                           for instance_name in
12773
                             self.owned_locks(locking.LEVEL_INSTANCE)
12774
                           for group_uuid in
12775
                             self.cfg.GetInstanceNodeGroups(instance_name))
12776
      else:
12777
        # No target groups, need to lock all of them
12778
        lock_groups = locking.ALL_SET
12779

    
12780
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12781

    
12782
    elif level == locking.LEVEL_NODE:
12783
      # This will only lock the nodes in the group to be evacuated which
12784
      # contain actual instances
12785
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12786
      self._LockInstancesNodes()
12787

    
12788
      # Lock all nodes in group to be evacuated and target groups
12789
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12790
      assert self.group_uuid in owned_groups
12791
      member_nodes = [node_name
12792
                      for group in owned_groups
12793
                      for node_name in self.cfg.GetNodeGroup(group).members]
12794
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12795

    
12796
  def CheckPrereq(self):
12797
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12798
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12799
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12800

    
12801
    assert owned_groups.issuperset(self.req_target_uuids)
12802
    assert self.group_uuid in owned_groups
12803

    
12804
    # Check if locked instances are still correct
12805
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12806

    
12807
    # Get instance information
12808
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12809

    
12810
    # Check if node groups for locked instances are still correct
12811
    for instance_name in owned_instances:
12812
      inst = self.instances[instance_name]
12813
      assert owned_nodes.issuperset(inst.all_nodes), \
12814
        "Instance %s's nodes changed while we kept the lock" % instance_name
12815

    
12816
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12817
                                             owned_groups)
12818

    
12819
      assert self.group_uuid in inst_groups, \
12820
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12821

    
12822
    if self.req_target_uuids:
12823
      # User requested specific target groups
12824
      self.target_uuids = self.req_target_uuids
12825
    else:
12826
      # All groups except the one to be evacuated are potential targets
12827
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12828
                           if group_uuid != self.group_uuid]
12829

    
12830
      if not self.target_uuids:
12831
        raise errors.OpPrereqError("There are no possible target groups",
12832
                                   errors.ECODE_INVAL)
12833

    
12834
  def BuildHooksEnv(self):
12835
    """Build hooks env.
12836

12837
    """
12838
    return {
12839
      "GROUP_NAME": self.op.group_name,
12840
      "TARGET_GROUPS": " ".join(self.target_uuids),
12841
      }
12842

    
12843
  def BuildHooksNodes(self):
12844
    """Build hooks nodes.
12845

12846
    """
12847
    mn = self.cfg.GetMasterNode()
12848

    
12849
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12850

    
12851
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12852

    
12853
    return (run_nodes, run_nodes)
12854

    
12855
  def Exec(self, feedback_fn):
12856
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12857

    
12858
    assert self.group_uuid not in self.target_uuids
12859

    
12860
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12861
                     instances=instances, target_groups=self.target_uuids)
12862

    
12863
    ial.Run(self.op.iallocator)
12864

    
12865
    if not ial.success:
12866
      raise errors.OpPrereqError("Can't compute group evacuation using"
12867
                                 " iallocator '%s': %s" %
12868
                                 (self.op.iallocator, ial.info),
12869
                                 errors.ECODE_NORES)
12870

    
12871
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12872

    
12873
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12874
                 len(jobs), self.op.group_name)
12875

    
12876
    return ResultWithJobs(jobs)
12877

    
12878

    
12879
class TagsLU(NoHooksLU): # pylint: disable=W0223
12880
  """Generic tags LU.
12881

12882
  This is an abstract class which is the parent of all the other tags LUs.
12883

12884
  """
12885
  def ExpandNames(self):
12886
    self.group_uuid = None
12887
    self.needed_locks = {}
12888
    if self.op.kind == constants.TAG_NODE:
12889
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12890
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12891
    elif self.op.kind == constants.TAG_INSTANCE:
12892
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12893
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12894
    elif self.op.kind == constants.TAG_NODEGROUP:
12895
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12896

    
12897
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12898
    # not possible to acquire the BGL based on opcode parameters)
12899

    
12900
  def CheckPrereq(self):
12901
    """Check prerequisites.
12902

12903
    """
12904
    if self.op.kind == constants.TAG_CLUSTER:
12905
      self.target = self.cfg.GetClusterInfo()
12906
    elif self.op.kind == constants.TAG_NODE:
12907
      self.target = self.cfg.GetNodeInfo(self.op.name)
12908
    elif self.op.kind == constants.TAG_INSTANCE:
12909
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12910
    elif self.op.kind == constants.TAG_NODEGROUP:
12911
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12912
    else:
12913
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12914
                                 str(self.op.kind), errors.ECODE_INVAL)
12915

    
12916

    
12917
class LUTagsGet(TagsLU):
12918
  """Returns the tags of a given object.
12919

12920
  """
12921
  REQ_BGL = False
12922

    
12923
  def ExpandNames(self):
12924
    TagsLU.ExpandNames(self)
12925

    
12926
    # Share locks as this is only a read operation
12927
    self.share_locks = _ShareAll()
12928

    
12929
  def Exec(self, feedback_fn):
12930
    """Returns the tag list.
12931

12932
    """
12933
    return list(self.target.GetTags())
12934

    
12935

    
12936
class LUTagsSearch(NoHooksLU):
12937
  """Searches the tags for a given pattern.
12938

12939
  """
12940
  REQ_BGL = False
12941

    
12942
  def ExpandNames(self):
12943
    self.needed_locks = {}
12944

    
12945
  def CheckPrereq(self):
12946
    """Check prerequisites.
12947

12948
    This checks the pattern passed for validity by compiling it.
12949

12950
    """
12951
    try:
12952
      self.re = re.compile(self.op.pattern)
12953
    except re.error, err:
12954
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12955
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12956

    
12957
  def Exec(self, feedback_fn):
12958
    """Returns the tag list.
12959

12960
    """
12961
    cfg = self.cfg
12962
    tgts = [("/cluster", cfg.GetClusterInfo())]
12963
    ilist = cfg.GetAllInstancesInfo().values()
12964
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12965
    nlist = cfg.GetAllNodesInfo().values()
12966
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12967
    tgts.extend(("/nodegroup/%s" % n.name, n)
12968
                for n in cfg.GetAllNodeGroupsInfo().values())
12969
    results = []
12970
    for path, target in tgts:
12971
      for tag in target.GetTags():
12972
        if self.re.search(tag):
12973
          results.append((path, tag))
12974
    return results
12975

    
12976

    
12977
class LUTagsSet(TagsLU):
12978
  """Sets a tag on a given object.
12979

12980
  """
12981
  REQ_BGL = False
12982

    
12983
  def CheckPrereq(self):
12984
    """Check prerequisites.
12985

12986
    This checks the type and length of the tag name and value.
12987

12988
    """
12989
    TagsLU.CheckPrereq(self)
12990
    for tag in self.op.tags:
12991
      objects.TaggableObject.ValidateTag(tag)
12992

    
12993
  def Exec(self, feedback_fn):
12994
    """Sets the tag.
12995

12996
    """
12997
    try:
12998
      for tag in self.op.tags:
12999
        self.target.AddTag(tag)
13000
    except errors.TagError, err:
13001
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
13002
    self.cfg.Update(self.target, feedback_fn)
13003

    
13004

    
13005
class LUTagsDel(TagsLU):
13006
  """Delete a list of tags from a given object.
13007

13008
  """
13009
  REQ_BGL = False
13010

    
13011
  def CheckPrereq(self):
13012
    """Check prerequisites.
13013

13014
    This checks that we have the given tag.
13015

13016
    """
13017
    TagsLU.CheckPrereq(self)
13018
    for tag in self.op.tags:
13019
      objects.TaggableObject.ValidateTag(tag)
13020
    del_tags = frozenset(self.op.tags)
13021
    cur_tags = self.target.GetTags()
13022

    
13023
    diff_tags = del_tags - cur_tags
13024
    if diff_tags:
13025
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
13026
      raise errors.OpPrereqError("Tag(s) %s not found" %
13027
                                 (utils.CommaJoin(diff_names), ),
13028
                                 errors.ECODE_NOENT)
13029

    
13030
  def Exec(self, feedback_fn):
13031
    """Remove the tag from the object.
13032

13033
    """
13034
    for tag in self.op.tags:
13035
      self.target.RemoveTag(tag)
13036
    self.cfg.Update(self.target, feedback_fn)
13037

    
13038

    
13039
class LUTestDelay(NoHooksLU):
13040
  """Sleep for a specified amount of time.
13041

13042
  This LU sleeps on the master and/or nodes for a specified amount of
13043
  time.
13044

13045
  """
13046
  REQ_BGL = False
13047

    
13048
  def ExpandNames(self):
13049
    """Expand names and set required locks.
13050

13051
    This expands the node list, if any.
13052

13053
    """
13054
    self.needed_locks = {}
13055
    if self.op.on_nodes:
13056
      # _GetWantedNodes can be used here, but is not always appropriate to use
13057
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
13058
      # more information.
13059
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
13060
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
13061

    
13062
  def _TestDelay(self):
13063
    """Do the actual sleep.
13064

13065
    """
13066
    if self.op.on_master:
13067
      if not utils.TestDelay(self.op.duration):
13068
        raise errors.OpExecError("Error during master delay test")
13069
    if self.op.on_nodes:
13070
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
13071
      for node, node_result in result.items():
13072
        node_result.Raise("Failure during rpc call to node %s" % node)
13073

    
13074
  def Exec(self, feedback_fn):
13075
    """Execute the test delay opcode, with the wanted repetitions.
13076

13077
    """
13078
    if self.op.repeat == 0:
13079
      self._TestDelay()
13080
    else:
13081
      top_value = self.op.repeat - 1
13082
      for i in range(self.op.repeat):
13083
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
13084
        self._TestDelay()
13085

    
13086

    
13087
class LUTestJqueue(NoHooksLU):
13088
  """Utility LU to test some aspects of the job queue.
13089

13090
  """
13091
  REQ_BGL = False
13092

    
13093
  # Must be lower than default timeout for WaitForJobChange to see whether it
13094
  # notices changed jobs
13095
  _CLIENT_CONNECT_TIMEOUT = 20.0
13096
  _CLIENT_CONFIRM_TIMEOUT = 60.0
13097

    
13098
  @classmethod
13099
  def _NotifyUsingSocket(cls, cb, errcls):
13100
    """Opens a Unix socket and waits for another program to connect.
13101

13102
    @type cb: callable
13103
    @param cb: Callback to send socket name to client
13104
    @type errcls: class
13105
    @param errcls: Exception class to use for errors
13106

13107
    """
13108
    # Using a temporary directory as there's no easy way to create temporary
13109
    # sockets without writing a custom loop around tempfile.mktemp and
13110
    # socket.bind
13111
    tmpdir = tempfile.mkdtemp()
13112
    try:
13113
      tmpsock = utils.PathJoin(tmpdir, "sock")
13114

    
13115
      logging.debug("Creating temporary socket at %s", tmpsock)
13116
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
13117
      try:
13118
        sock.bind(tmpsock)
13119
        sock.listen(1)
13120

    
13121
        # Send details to client
13122
        cb(tmpsock)
13123

    
13124
        # Wait for client to connect before continuing
13125
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
13126
        try:
13127
          (conn, _) = sock.accept()
13128
        except socket.error, err:
13129
          raise errcls("Client didn't connect in time (%s)" % err)
13130
      finally:
13131
        sock.close()
13132
    finally:
13133
      # Remove as soon as client is connected
13134
      shutil.rmtree(tmpdir)
13135

    
13136
    # Wait for client to close
13137
    try:
13138
      try:
13139
        # pylint: disable=E1101
13140
        # Instance of '_socketobject' has no ... member
13141
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
13142
        conn.recv(1)
13143
      except socket.error, err:
13144
        raise errcls("Client failed to confirm notification (%s)" % err)
13145
    finally:
13146
      conn.close()
13147

    
13148
  def _SendNotification(self, test, arg, sockname):
13149
    """Sends a notification to the client.
13150

13151
    @type test: string
13152
    @param test: Test name
13153
    @param arg: Test argument (depends on test)
13154
    @type sockname: string
13155
    @param sockname: Socket path
13156

13157
    """
13158
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
13159

    
13160
  def _Notify(self, prereq, test, arg):
13161
    """Notifies the client of a test.
13162

13163
    @type prereq: bool
13164
    @param prereq: Whether this is a prereq-phase test
13165
    @type test: string
13166
    @param test: Test name
13167
    @param arg: Test argument (depends on test)
13168

13169
    """
13170
    if prereq:
13171
      errcls = errors.OpPrereqError
13172
    else:
13173
      errcls = errors.OpExecError
13174

    
13175
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
13176
                                                  test, arg),
13177
                                   errcls)
13178

    
13179
  def CheckArguments(self):
13180
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
13181
    self.expandnames_calls = 0
13182

    
13183
  def ExpandNames(self):
13184
    checkargs_calls = getattr(self, "checkargs_calls", 0)
13185
    if checkargs_calls < 1:
13186
      raise errors.ProgrammerError("CheckArguments was not called")
13187

    
13188
    self.expandnames_calls += 1
13189

    
13190
    if self.op.notify_waitlock:
13191
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
13192

    
13193
    self.LogInfo("Expanding names")
13194

    
13195
    # Get lock on master node (just to get a lock, not for a particular reason)
13196
    self.needed_locks = {
13197
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
13198
      }
13199

    
13200
  def Exec(self, feedback_fn):
13201
    if self.expandnames_calls < 1:
13202
      raise errors.ProgrammerError("ExpandNames was not called")
13203

    
13204
    if self.op.notify_exec:
13205
      self._Notify(False, constants.JQT_EXEC, None)
13206

    
13207
    self.LogInfo("Executing")
13208

    
13209
    if self.op.log_messages:
13210
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
13211
      for idx, msg in enumerate(self.op.log_messages):
13212
        self.LogInfo("Sending log message %s", idx + 1)
13213
        feedback_fn(constants.JQT_MSGPREFIX + msg)
13214
        # Report how many test messages have been sent
13215
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
13216

    
13217
    if self.op.fail:
13218
      raise errors.OpExecError("Opcode failure was requested")
13219

    
13220
    return True
13221

    
13222

    
13223
class IAllocator(object):
13224
  """IAllocator framework.
13225

13226
  An IAllocator instance has three sets of attributes:
13227
    - cfg that is needed to query the cluster
13228
    - input data (all members of the _KEYS class attribute are required)
13229
    - four buffer attributes (in|out_data|text), that represent the
13230
      input (to the external script) in text and data structure format,
13231
      and the output from it, again in two formats
13232
    - the result variables from the script (success, info, nodes) for
13233
      easy usage
13234

13235
  """
13236
  # pylint: disable=R0902
13237
  # lots of instance attributes
13238

    
13239
  def __init__(self, cfg, rpc_runner, mode, **kwargs):
13240
    self.cfg = cfg
13241
    self.rpc = rpc_runner
13242
    # init buffer variables
13243
    self.in_text = self.out_text = self.in_data = self.out_data = None
13244
    # init all input fields so that pylint is happy
13245
    self.mode = mode
13246
    self.memory = self.disks = self.disk_template = None
13247
    self.os = self.tags = self.nics = self.vcpus = None
13248
    self.hypervisor = None
13249
    self.relocate_from = None
13250
    self.name = None
13251
    self.instances = None
13252
    self.evac_mode = None
13253
    self.target_groups = []
13254
    # computed fields
13255
    self.required_nodes = None
13256
    # init result fields
13257
    self.success = self.info = self.result = None
13258

    
13259
    try:
13260
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
13261
    except KeyError:
13262
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
13263
                                   " IAllocator" % self.mode)
13264

    
13265
    keyset = [n for (n, _) in keydata]
13266

    
13267
    for key in kwargs:
13268
      if key not in keyset:
13269
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
13270
                                     " IAllocator" % key)
13271
      setattr(self, key, kwargs[key])
13272

    
13273
    for key in keyset:
13274
      if key not in kwargs:
13275
        raise errors.ProgrammerError("Missing input parameter '%s' to"
13276
                                     " IAllocator" % key)
13277
    self._BuildInputData(compat.partial(fn, self), keydata)
13278

    
13279
  def _ComputeClusterData(self):
13280
    """Compute the generic allocator input data.
13281

13282
    This is the data that is independent of the actual operation.
13283

13284
    """
13285
    cfg = self.cfg
13286
    cluster_info = cfg.GetClusterInfo()
13287
    # cluster data
13288
    data = {
13289
      "version": constants.IALLOCATOR_VERSION,
13290
      "cluster_name": cfg.GetClusterName(),
13291
      "cluster_tags": list(cluster_info.GetTags()),
13292
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
13293
      # we don't have job IDs
13294
      }
13295
    ninfo = cfg.GetAllNodesInfo()
13296
    iinfo = cfg.GetAllInstancesInfo().values()
13297
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
13298

    
13299
    # node data
13300
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
13301

    
13302
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
13303
      hypervisor_name = self.hypervisor
13304
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
13305
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
13306
    else:
13307
      hypervisor_name = cluster_info.enabled_hypervisors[0]
13308

    
13309
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
13310
                                        hypervisor_name)
13311
    node_iinfo = \
13312
      self.rpc.call_all_instances_info(node_list,
13313
                                       cluster_info.enabled_hypervisors)
13314

    
13315
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
13316

    
13317
    config_ndata = self._ComputeBasicNodeData(ninfo)
13318
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
13319
                                                 i_list, config_ndata)
13320
    assert len(data["nodes"]) == len(ninfo), \
13321
        "Incomplete node data computed"
13322

    
13323
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
13324

    
13325
    self.in_data = data
13326

    
13327
  @staticmethod
13328
  def _ComputeNodeGroupData(cfg):
13329
    """Compute node groups data.
13330

13331
    """
13332
    ng = dict((guuid, {
13333
      "name": gdata.name,
13334
      "alloc_policy": gdata.alloc_policy,
13335
      })
13336
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
13337

    
13338
    return ng
13339

    
13340
  @staticmethod
13341
  def _ComputeBasicNodeData(node_cfg):
13342
    """Compute global node data.
13343

13344
    @rtype: dict
13345
    @returns: a dict of name: (node dict, node config)
13346

13347
    """
13348
    # fill in static (config-based) values
13349
    node_results = dict((ninfo.name, {
13350
      "tags": list(ninfo.GetTags()),
13351
      "primary_ip": ninfo.primary_ip,
13352
      "secondary_ip": ninfo.secondary_ip,
13353
      "offline": ninfo.offline,
13354
      "drained": ninfo.drained,
13355
      "master_candidate": ninfo.master_candidate,
13356
      "group": ninfo.group,
13357
      "master_capable": ninfo.master_capable,
13358
      "vm_capable": ninfo.vm_capable,
13359
      })
13360
      for ninfo in node_cfg.values())
13361

    
13362
    return node_results
13363

    
13364
  @staticmethod
13365
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
13366
                              node_results):
13367
    """Compute global node data.
13368

13369
    @param node_results: the basic node structures as filled from the config
13370

13371
    """
13372
    # make a copy of the current dict
13373
    node_results = dict(node_results)
13374
    for nname, nresult in node_data.items():
13375
      assert nname in node_results, "Missing basic data for node %s" % nname
13376
      ninfo = node_cfg[nname]
13377

    
13378
      if not (ninfo.offline or ninfo.drained):
13379
        nresult.Raise("Can't get data for node %s" % nname)
13380
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
13381
                                nname)
13382
        remote_info = nresult.payload
13383

    
13384
        for attr in ["memory_total", "memory_free", "memory_dom0",
13385
                     "vg_size", "vg_free", "cpu_total"]:
13386
          if attr not in remote_info:
13387
            raise errors.OpExecError("Node '%s' didn't return attribute"
13388
                                     " '%s'" % (nname, attr))
13389
          if not isinstance(remote_info[attr], int):
13390
            raise errors.OpExecError("Node '%s' returned invalid value"
13391
                                     " for '%s': %s" %
13392
                                     (nname, attr, remote_info[attr]))
13393
        # compute memory used by primary instances
13394
        i_p_mem = i_p_up_mem = 0
13395
        for iinfo, beinfo in i_list:
13396
          if iinfo.primary_node == nname:
13397
            i_p_mem += beinfo[constants.BE_MEMORY]
13398
            if iinfo.name not in node_iinfo[nname].payload:
13399
              i_used_mem = 0
13400
            else:
13401
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
13402
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
13403
            remote_info["memory_free"] -= max(0, i_mem_diff)
13404

    
13405
            if iinfo.admin_up:
13406
              i_p_up_mem += beinfo[constants.BE_MEMORY]
13407

    
13408
        # compute memory used by instances
13409
        pnr_dyn = {
13410
          "total_memory": remote_info["memory_total"],
13411
          "reserved_memory": remote_info["memory_dom0"],
13412
          "free_memory": remote_info["memory_free"],
13413
          "total_disk": remote_info["vg_size"],
13414
          "free_disk": remote_info["vg_free"],
13415
          "total_cpus": remote_info["cpu_total"],
13416
          "i_pri_memory": i_p_mem,
13417
          "i_pri_up_memory": i_p_up_mem,
13418
          }
13419
        pnr_dyn.update(node_results[nname])
13420
        node_results[nname] = pnr_dyn
13421

    
13422
    return node_results
13423

    
13424
  @staticmethod
13425
  def _ComputeInstanceData(cluster_info, i_list):
13426
    """Compute global instance data.
13427

13428
    """
13429
    instance_data = {}
13430
    for iinfo, beinfo in i_list:
13431
      nic_data = []
13432
      for nic in iinfo.nics:
13433
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
13434
        nic_dict = {
13435
          "mac": nic.mac,
13436
          "ip": nic.ip,
13437
          "mode": filled_params[constants.NIC_MODE],
13438
          "link": filled_params[constants.NIC_LINK],
13439
          }
13440
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
13441
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
13442
        nic_data.append(nic_dict)
13443
      pir = {
13444
        "tags": list(iinfo.GetTags()),
13445
        "admin_up": iinfo.admin_up,
13446
        "vcpus": beinfo[constants.BE_VCPUS],
13447
        "memory": beinfo[constants.BE_MEMORY],
13448
        "os": iinfo.os,
13449
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13450
        "nics": nic_data,
13451
        "disks": [{constants.IDISK_SIZE: dsk.size,
13452
                   constants.IDISK_MODE: dsk.mode}
13453
                  for dsk in iinfo.disks],
13454
        "disk_template": iinfo.disk_template,
13455
        "hypervisor": iinfo.hypervisor,
13456
        }
13457
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13458
                                                 pir["disks"])
13459
      instance_data[iinfo.name] = pir
13460

    
13461
    return instance_data
13462

    
13463
  def _AddNewInstance(self):
13464
    """Add new instance data to allocator structure.
13465

13466
    This in combination with _AllocatorGetClusterData will create the
13467
    correct structure needed as input for the allocator.
13468

13469
    The checks for the completeness of the opcode must have already been
13470
    done.
13471

13472
    """
13473
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13474

    
13475
    if self.disk_template in constants.DTS_INT_MIRROR:
13476
      self.required_nodes = 2
13477
    else:
13478
      self.required_nodes = 1
13479

    
13480
    request = {
13481
      "name": self.name,
13482
      "disk_template": self.disk_template,
13483
      "tags": self.tags,
13484
      "os": self.os,
13485
      "vcpus": self.vcpus,
13486
      "memory": self.memory,
13487
      "disks": self.disks,
13488
      "disk_space_total": disk_space,
13489
      "nics": self.nics,
13490
      "required_nodes": self.required_nodes,
13491
      "hypervisor": self.hypervisor,
13492
      }
13493

    
13494
    return request
13495

    
13496
  def _AddRelocateInstance(self):
13497
    """Add relocate instance data to allocator structure.
13498

13499
    This in combination with _IAllocatorGetClusterData will create the
13500
    correct structure needed as input for the allocator.
13501

13502
    The checks for the completeness of the opcode must have already been
13503
    done.
13504

13505
    """
13506
    instance = self.cfg.GetInstanceInfo(self.name)
13507
    if instance is None:
13508
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13509
                                   " IAllocator" % self.name)
13510

    
13511
    if instance.disk_template not in constants.DTS_MIRRORED:
13512
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13513
                                 errors.ECODE_INVAL)
13514

    
13515
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13516
        len(instance.secondary_nodes) != 1:
13517
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13518
                                 errors.ECODE_STATE)
13519

    
13520
    self.required_nodes = 1
13521
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13522
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13523

    
13524
    request = {
13525
      "name": self.name,
13526
      "disk_space_total": disk_space,
13527
      "required_nodes": self.required_nodes,
13528
      "relocate_from": self.relocate_from,
13529
      }
13530
    return request
13531

    
13532
  def _AddNodeEvacuate(self):
13533
    """Get data for node-evacuate requests.
13534

13535
    """
13536
    return {
13537
      "instances": self.instances,
13538
      "evac_mode": self.evac_mode,
13539
      }
13540

    
13541
  def _AddChangeGroup(self):
13542
    """Get data for node-evacuate requests.
13543

13544
    """
13545
    return {
13546
      "instances": self.instances,
13547
      "target_groups": self.target_groups,
13548
      }
13549

    
13550
  def _BuildInputData(self, fn, keydata):
13551
    """Build input data structures.
13552

13553
    """
13554
    self._ComputeClusterData()
13555

    
13556
    request = fn()
13557
    request["type"] = self.mode
13558
    for keyname, keytype in keydata:
13559
      if keyname not in request:
13560
        raise errors.ProgrammerError("Request parameter %s is missing" %
13561
                                     keyname)
13562
      val = request[keyname]
13563
      if not keytype(val):
13564
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13565
                                     " validation, value %s, expected"
13566
                                     " type %s" % (keyname, val, keytype))
13567
    self.in_data["request"] = request
13568

    
13569
    self.in_text = serializer.Dump(self.in_data)
13570

    
13571
  _STRING_LIST = ht.TListOf(ht.TString)
13572
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13573
     # pylint: disable=E1101
13574
     # Class '...' has no 'OP_ID' member
13575
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13576
                          opcodes.OpInstanceMigrate.OP_ID,
13577
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13578
     })))
13579

    
13580
  _NEVAC_MOVED = \
13581
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13582
                       ht.TItems([ht.TNonEmptyString,
13583
                                  ht.TNonEmptyString,
13584
                                  ht.TListOf(ht.TNonEmptyString),
13585
                                 ])))
13586
  _NEVAC_FAILED = \
13587
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13588
                       ht.TItems([ht.TNonEmptyString,
13589
                                  ht.TMaybeString,
13590
                                 ])))
13591
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13592
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13593

    
13594
  _MODE_DATA = {
13595
    constants.IALLOCATOR_MODE_ALLOC:
13596
      (_AddNewInstance,
13597
       [
13598
        ("name", ht.TString),
13599
        ("memory", ht.TInt),
13600
        ("disks", ht.TListOf(ht.TDict)),
13601
        ("disk_template", ht.TString),
13602
        ("os", ht.TString),
13603
        ("tags", _STRING_LIST),
13604
        ("nics", ht.TListOf(ht.TDict)),
13605
        ("vcpus", ht.TInt),
13606
        ("hypervisor", ht.TString),
13607
        ], ht.TList),
13608
    constants.IALLOCATOR_MODE_RELOC:
13609
      (_AddRelocateInstance,
13610
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13611
       ht.TList),
13612
     constants.IALLOCATOR_MODE_NODE_EVAC:
13613
      (_AddNodeEvacuate, [
13614
        ("instances", _STRING_LIST),
13615
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13616
        ], _NEVAC_RESULT),
13617
     constants.IALLOCATOR_MODE_CHG_GROUP:
13618
      (_AddChangeGroup, [
13619
        ("instances", _STRING_LIST),
13620
        ("target_groups", _STRING_LIST),
13621
        ], _NEVAC_RESULT),
13622
    }
13623

    
13624
  def Run(self, name, validate=True, call_fn=None):
13625
    """Run an instance allocator and return the results.
13626

13627
    """
13628
    if call_fn is None:
13629
      call_fn = self.rpc.call_iallocator_runner
13630

    
13631
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13632
    result.Raise("Failure while running the iallocator script")
13633

    
13634
    self.out_text = result.payload
13635
    if validate:
13636
      self._ValidateResult()
13637

    
13638
  def _ValidateResult(self):
13639
    """Process the allocator results.
13640

13641
    This will process and if successful save the result in
13642
    self.out_data and the other parameters.
13643

13644
    """
13645
    try:
13646
      rdict = serializer.Load(self.out_text)
13647
    except Exception, err:
13648
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13649

    
13650
    if not isinstance(rdict, dict):
13651
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13652

    
13653
    # TODO: remove backwards compatiblity in later versions
13654
    if "nodes" in rdict and "result" not in rdict:
13655
      rdict["result"] = rdict["nodes"]
13656
      del rdict["nodes"]
13657

    
13658
    for key in "success", "info", "result":
13659
      if key not in rdict:
13660
        raise errors.OpExecError("Can't parse iallocator results:"
13661
                                 " missing key '%s'" % key)
13662
      setattr(self, key, rdict[key])
13663

    
13664
    if not self._result_check(self.result):
13665
      raise errors.OpExecError("Iallocator returned invalid result,"
13666
                               " expected %s, got %s" %
13667
                               (self._result_check, self.result),
13668
                               errors.ECODE_INVAL)
13669

    
13670
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13671
      assert self.relocate_from is not None
13672
      assert self.required_nodes == 1
13673

    
13674
      node2group = dict((name, ndata["group"])
13675
                        for (name, ndata) in self.in_data["nodes"].items())
13676

    
13677
      fn = compat.partial(self._NodesToGroups, node2group,
13678
                          self.in_data["nodegroups"])
13679

    
13680
      instance = self.cfg.GetInstanceInfo(self.name)
13681
      request_groups = fn(self.relocate_from + [instance.primary_node])
13682
      result_groups = fn(rdict["result"] + [instance.primary_node])
13683

    
13684
      if self.success and not set(result_groups).issubset(request_groups):
13685
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13686
                                 " differ from original groups (%s)" %
13687
                                 (utils.CommaJoin(result_groups),
13688
                                  utils.CommaJoin(request_groups)))
13689

    
13690
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13691
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13692

    
13693
    self.out_data = rdict
13694

    
13695
  @staticmethod
13696
  def _NodesToGroups(node2group, groups, nodes):
13697
    """Returns a list of unique group names for a list of nodes.
13698

13699
    @type node2group: dict
13700
    @param node2group: Map from node name to group UUID
13701
    @type groups: dict
13702
    @param groups: Group information
13703
    @type nodes: list
13704
    @param nodes: Node names
13705

13706
    """
13707
    result = set()
13708

    
13709
    for node in nodes:
13710
      try:
13711
        group_uuid = node2group[node]
13712
      except KeyError:
13713
        # Ignore unknown node
13714
        pass
13715
      else:
13716
        try:
13717
          group = groups[group_uuid]
13718
        except KeyError:
13719
          # Can't find group, let's use UUID
13720
          group_name = group_uuid
13721
        else:
13722
          group_name = group["name"]
13723

    
13724
        result.add(group_name)
13725

    
13726
    return sorted(result)
13727

    
13728

    
13729
class LUTestAllocator(NoHooksLU):
13730
  """Run allocator tests.
13731

13732
  This LU runs the allocator tests
13733

13734
  """
13735
  def CheckPrereq(self):
13736
    """Check prerequisites.
13737

13738
    This checks the opcode parameters depending on the director and mode test.
13739

13740
    """
13741
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13742
      for attr in ["memory", "disks", "disk_template",
13743
                   "os", "tags", "nics", "vcpus"]:
13744
        if not hasattr(self.op, attr):
13745
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13746
                                     attr, errors.ECODE_INVAL)
13747
      iname = self.cfg.ExpandInstanceName(self.op.name)
13748
      if iname is not None:
13749
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13750
                                   iname, errors.ECODE_EXISTS)
13751
      if not isinstance(self.op.nics, list):
13752
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13753
                                   errors.ECODE_INVAL)
13754
      if not isinstance(self.op.disks, list):
13755
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13756
                                   errors.ECODE_INVAL)
13757
      for row in self.op.disks:
13758
        if (not isinstance(row, dict) or
13759
            constants.IDISK_SIZE not in row or
13760
            not isinstance(row[constants.IDISK_SIZE], int) or
13761
            constants.IDISK_MODE not in row or
13762
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13763
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13764
                                     " parameter", errors.ECODE_INVAL)
13765
      if self.op.hypervisor is None:
13766
        self.op.hypervisor = self.cfg.GetHypervisorType()
13767
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13768
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13769
      self.op.name = fname
13770
      self.relocate_from = \
13771
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13772
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13773
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13774
      if not self.op.instances:
13775
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13776
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13777
    else:
13778
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13779
                                 self.op.mode, errors.ECODE_INVAL)
13780

    
13781
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13782
      if self.op.allocator is None:
13783
        raise errors.OpPrereqError("Missing allocator name",
13784
                                   errors.ECODE_INVAL)
13785
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13786
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13787
                                 self.op.direction, errors.ECODE_INVAL)
13788

    
13789
  def Exec(self, feedback_fn):
13790
    """Run the allocator test.
13791

13792
    """
13793
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13794
      ial = IAllocator(self.cfg, self.rpc,
13795
                       mode=self.op.mode,
13796
                       name=self.op.name,
13797
                       memory=self.op.memory,
13798
                       disks=self.op.disks,
13799
                       disk_template=self.op.disk_template,
13800
                       os=self.op.os,
13801
                       tags=self.op.tags,
13802
                       nics=self.op.nics,
13803
                       vcpus=self.op.vcpus,
13804
                       hypervisor=self.op.hypervisor,
13805
                       )
13806
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13807
      ial = IAllocator(self.cfg, self.rpc,
13808
                       mode=self.op.mode,
13809
                       name=self.op.name,
13810
                       relocate_from=list(self.relocate_from),
13811
                       )
13812
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13813
      ial = IAllocator(self.cfg, self.rpc,
13814
                       mode=self.op.mode,
13815
                       instances=self.op.instances,
13816
                       target_groups=self.op.target_groups)
13817
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13818
      ial = IAllocator(self.cfg, self.rpc,
13819
                       mode=self.op.mode,
13820
                       instances=self.op.instances,
13821
                       evac_mode=self.op.evac_mode)
13822
    else:
13823
      raise errors.ProgrammerError("Uncatched mode %s in"
13824
                                   " LUTestAllocator.Exec", self.op.mode)
13825

    
13826
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13827
      result = ial.in_text
13828
    else:
13829
      ial.Run(self.op.allocator, validate=False)
13830
      result = ial.out_text
13831
    return result
13832

    
13833

    
13834
#: Query type implementations
13835
_QUERY_IMPL = {
13836
  constants.QR_INSTANCE: _InstanceQuery,
13837
  constants.QR_NODE: _NodeQuery,
13838
  constants.QR_GROUP: _GroupQuery,
13839
  constants.QR_OS: _OsQuery,
13840
  }
13841

    
13842
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13843

    
13844

    
13845
def _GetQueryImplementation(name):
13846
  """Returns the implemtnation for a query type.
13847

13848
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13849

13850
  """
13851
  try:
13852
    return _QUERY_IMPL[name]
13853
  except KeyError:
13854
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13855
                               errors.ECODE_INVAL)