Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ b9ff3e15

History | View | Annotate | Download (472 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable-msg=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
137
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable-msg=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable-msg=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_stop_master(master, False)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable-msg=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1514
  """Verifies the cluster config.
1515

1516
  """
1517
  REQ_BGL = True
1518

    
1519
  def _VerifyHVP(self, hvp_data):
1520
    """Verifies locally the syntax of the hypervisor parameters.
1521

1522
    """
1523
    for item, hv_name, hv_params in hvp_data:
1524
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1525
             (item, hv_name))
1526
      try:
1527
        hv_class = hypervisor.GetHypervisor(hv_name)
1528
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1529
        hv_class.CheckParameterSyntax(hv_params)
1530
      except errors.GenericError, err:
1531
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1532

    
1533
  def ExpandNames(self):
1534
    # Information can be safely retrieved as the BGL is acquired in exclusive
1535
    # mode
1536
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1537
    self.all_node_info = self.cfg.GetAllNodesInfo()
1538
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1539
    self.needed_locks = {}
1540

    
1541
  def Exec(self, feedback_fn):
1542
    """Verify integrity of cluster, performing various test on nodes.
1543

1544
    """
1545
    self.bad = False
1546
    self._feedback_fn = feedback_fn
1547

    
1548
    feedback_fn("* Verifying cluster config")
1549

    
1550
    for msg in self.cfg.VerifyConfig():
1551
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1552

    
1553
    feedback_fn("* Verifying cluster certificate files")
1554

    
1555
    for cert_filename in constants.ALL_CERT_FILES:
1556
      (errcode, msg) = _VerifyCertificate(cert_filename)
1557
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1558

    
1559
    feedback_fn("* Verifying hypervisor parameters")
1560

    
1561
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1562
                                                self.all_inst_info.values()))
1563

    
1564
    feedback_fn("* Verifying all nodes belong to an existing group")
1565

    
1566
    # We do this verification here because, should this bogus circumstance
1567
    # occur, it would never be caught by VerifyGroup, which only acts on
1568
    # nodes/instances reachable from existing node groups.
1569

    
1570
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1571
                         if node.group not in self.all_group_info)
1572

    
1573
    dangling_instances = {}
1574
    no_node_instances = []
1575

    
1576
    for inst in self.all_inst_info.values():
1577
      if inst.primary_node in dangling_nodes:
1578
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1579
      elif inst.primary_node not in self.all_node_info:
1580
        no_node_instances.append(inst.name)
1581

    
1582
    pretty_dangling = [
1583
        "%s (%s)" %
1584
        (node.name,
1585
         utils.CommaJoin(dangling_instances.get(node.name,
1586
                                                ["no instances"])))
1587
        for node in dangling_nodes]
1588

    
1589
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1590
                  "the following nodes (and their instances) belong to a non"
1591
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1592

    
1593
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1594
                  "the following instances have a non-existing primary-node:"
1595
                  " %s", utils.CommaJoin(no_node_instances))
1596

    
1597
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1598

    
1599

    
1600
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1601
  """Verifies the status of a node group.
1602

1603
  """
1604
  HPATH = "cluster-verify"
1605
  HTYPE = constants.HTYPE_CLUSTER
1606
  REQ_BGL = False
1607

    
1608
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1609

    
1610
  class NodeImage(object):
1611
    """A class representing the logical and physical status of a node.
1612

1613
    @type name: string
1614
    @ivar name: the node name to which this object refers
1615
    @ivar volumes: a structure as returned from
1616
        L{ganeti.backend.GetVolumeList} (runtime)
1617
    @ivar instances: a list of running instances (runtime)
1618
    @ivar pinst: list of configured primary instances (config)
1619
    @ivar sinst: list of configured secondary instances (config)
1620
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1621
        instances for which this node is secondary (config)
1622
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1623
    @ivar dfree: free disk, as reported by the node (runtime)
1624
    @ivar offline: the offline status (config)
1625
    @type rpc_fail: boolean
1626
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1627
        not whether the individual keys were correct) (runtime)
1628
    @type lvm_fail: boolean
1629
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1630
    @type hyp_fail: boolean
1631
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1632
    @type ghost: boolean
1633
    @ivar ghost: whether this is a known node or not (config)
1634
    @type os_fail: boolean
1635
    @ivar os_fail: whether the RPC call didn't return valid OS data
1636
    @type oslist: list
1637
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1638
    @type vm_capable: boolean
1639
    @ivar vm_capable: whether the node can host instances
1640

1641
    """
1642
    def __init__(self, offline=False, name=None, vm_capable=True):
1643
      self.name = name
1644
      self.volumes = {}
1645
      self.instances = []
1646
      self.pinst = []
1647
      self.sinst = []
1648
      self.sbp = {}
1649
      self.mfree = 0
1650
      self.dfree = 0
1651
      self.offline = offline
1652
      self.vm_capable = vm_capable
1653
      self.rpc_fail = False
1654
      self.lvm_fail = False
1655
      self.hyp_fail = False
1656
      self.ghost = False
1657
      self.os_fail = False
1658
      self.oslist = {}
1659

    
1660
  def ExpandNames(self):
1661
    # This raises errors.OpPrereqError on its own:
1662
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1663

    
1664
    # Get instances in node group; this is unsafe and needs verification later
1665
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1666

    
1667
    self.needed_locks = {
1668
      locking.LEVEL_INSTANCE: inst_names,
1669
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1670
      locking.LEVEL_NODE: [],
1671
      }
1672

    
1673
    self.share_locks = _ShareAll()
1674

    
1675
  def DeclareLocks(self, level):
1676
    if level == locking.LEVEL_NODE:
1677
      # Get members of node group; this is unsafe and needs verification later
1678
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1679

    
1680
      all_inst_info = self.cfg.GetAllInstancesInfo()
1681

    
1682
      # In Exec(), we warn about mirrored instances that have primary and
1683
      # secondary living in separate node groups. To fully verify that
1684
      # volumes for these instances are healthy, we will need to do an
1685
      # extra call to their secondaries. We ensure here those nodes will
1686
      # be locked.
1687
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1688
        # Important: access only the instances whose lock is owned
1689
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1690
          nodes.update(all_inst_info[inst].secondary_nodes)
1691

    
1692
      self.needed_locks[locking.LEVEL_NODE] = nodes
1693

    
1694
  def CheckPrereq(self):
1695
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1696
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1697

    
1698
    unlocked_nodes = \
1699
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1700

    
1701
    unlocked_instances = \
1702
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1703

    
1704
    if unlocked_nodes:
1705
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1706
                                 utils.CommaJoin(unlocked_nodes))
1707

    
1708
    if unlocked_instances:
1709
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1710
                                 utils.CommaJoin(unlocked_instances))
1711

    
1712
    self.all_node_info = self.cfg.GetAllNodesInfo()
1713
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1714

    
1715
    self.my_node_names = utils.NiceSort(group_nodes)
1716
    self.my_inst_names = utils.NiceSort(group_instances)
1717

    
1718
    self.my_node_info = dict((name, self.all_node_info[name])
1719
                             for name in self.my_node_names)
1720

    
1721
    self.my_inst_info = dict((name, self.all_inst_info[name])
1722
                             for name in self.my_inst_names)
1723

    
1724
    # We detect here the nodes that will need the extra RPC calls for verifying
1725
    # split LV volumes; they should be locked.
1726
    extra_lv_nodes = set()
1727

    
1728
    for inst in self.my_inst_info.values():
1729
      if inst.disk_template in constants.DTS_INT_MIRROR:
1730
        group = self.my_node_info[inst.primary_node].group
1731
        for nname in inst.secondary_nodes:
1732
          if self.all_node_info[nname].group != group:
1733
            extra_lv_nodes.add(nname)
1734

    
1735
    unlocked_lv_nodes = \
1736
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1737

    
1738
    if unlocked_lv_nodes:
1739
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1740
                                 utils.CommaJoin(unlocked_lv_nodes))
1741
    self.extra_lv_nodes = list(extra_lv_nodes)
1742

    
1743
  def _VerifyNode(self, ninfo, nresult):
1744
    """Perform some basic validation on data returned from a node.
1745

1746
      - check the result data structure is well formed and has all the
1747
        mandatory fields
1748
      - check ganeti version
1749

1750
    @type ninfo: L{objects.Node}
1751
    @param ninfo: the node to check
1752
    @param nresult: the results from the node
1753
    @rtype: boolean
1754
    @return: whether overall this call was successful (and we can expect
1755
         reasonable values in the respose)
1756

1757
    """
1758
    node = ninfo.name
1759
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1760

    
1761
    # main result, nresult should be a non-empty dict
1762
    test = not nresult or not isinstance(nresult, dict)
1763
    _ErrorIf(test, self.ENODERPC, node,
1764
                  "unable to verify node: no data returned")
1765
    if test:
1766
      return False
1767

    
1768
    # compares ganeti version
1769
    local_version = constants.PROTOCOL_VERSION
1770
    remote_version = nresult.get("version", None)
1771
    test = not (remote_version and
1772
                isinstance(remote_version, (list, tuple)) and
1773
                len(remote_version) == 2)
1774
    _ErrorIf(test, self.ENODERPC, node,
1775
             "connection to node returned invalid data")
1776
    if test:
1777
      return False
1778

    
1779
    test = local_version != remote_version[0]
1780
    _ErrorIf(test, self.ENODEVERSION, node,
1781
             "incompatible protocol versions: master %s,"
1782
             " node %s", local_version, remote_version[0])
1783
    if test:
1784
      return False
1785

    
1786
    # node seems compatible, we can actually try to look into its results
1787

    
1788
    # full package version
1789
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1790
                  self.ENODEVERSION, node,
1791
                  "software version mismatch: master %s, node %s",
1792
                  constants.RELEASE_VERSION, remote_version[1],
1793
                  code=self.ETYPE_WARNING)
1794

    
1795
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1796
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1797
      for hv_name, hv_result in hyp_result.iteritems():
1798
        test = hv_result is not None
1799
        _ErrorIf(test, self.ENODEHV, node,
1800
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1801

    
1802
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1803
    if ninfo.vm_capable and isinstance(hvp_result, list):
1804
      for item, hv_name, hv_result in hvp_result:
1805
        _ErrorIf(True, self.ENODEHV, node,
1806
                 "hypervisor %s parameter verify failure (source %s): %s",
1807
                 hv_name, item, hv_result)
1808

    
1809
    test = nresult.get(constants.NV_NODESETUP,
1810
                       ["Missing NODESETUP results"])
1811
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1812
             "; ".join(test))
1813

    
1814
    return True
1815

    
1816
  def _VerifyNodeTime(self, ninfo, nresult,
1817
                      nvinfo_starttime, nvinfo_endtime):
1818
    """Check the node time.
1819

1820
    @type ninfo: L{objects.Node}
1821
    @param ninfo: the node to check
1822
    @param nresult: the remote results for the node
1823
    @param nvinfo_starttime: the start time of the RPC call
1824
    @param nvinfo_endtime: the end time of the RPC call
1825

1826
    """
1827
    node = ninfo.name
1828
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1829

    
1830
    ntime = nresult.get(constants.NV_TIME, None)
1831
    try:
1832
      ntime_merged = utils.MergeTime(ntime)
1833
    except (ValueError, TypeError):
1834
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1835
      return
1836

    
1837
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1838
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1839
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1840
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1841
    else:
1842
      ntime_diff = None
1843

    
1844
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1845
             "Node time diverges by at least %s from master node time",
1846
             ntime_diff)
1847

    
1848
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1849
    """Check the node LVM results.
1850

1851
    @type ninfo: L{objects.Node}
1852
    @param ninfo: the node to check
1853
    @param nresult: the remote results for the node
1854
    @param vg_name: the configured VG name
1855

1856
    """
1857
    if vg_name is None:
1858
      return
1859

    
1860
    node = ninfo.name
1861
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1862

    
1863
    # checks vg existence and size > 20G
1864
    vglist = nresult.get(constants.NV_VGLIST, None)
1865
    test = not vglist
1866
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1867
    if not test:
1868
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1869
                                            constants.MIN_VG_SIZE)
1870
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1871

    
1872
    # check pv names
1873
    pvlist = nresult.get(constants.NV_PVLIST, None)
1874
    test = pvlist is None
1875
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1876
    if not test:
1877
      # check that ':' is not present in PV names, since it's a
1878
      # special character for lvcreate (denotes the range of PEs to
1879
      # use on the PV)
1880
      for _, pvname, owner_vg in pvlist:
1881
        test = ":" in pvname
1882
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1883
                 " '%s' of VG '%s'", pvname, owner_vg)
1884

    
1885
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1886
    """Check the node bridges.
1887

1888
    @type ninfo: L{objects.Node}
1889
    @param ninfo: the node to check
1890
    @param nresult: the remote results for the node
1891
    @param bridges: the expected list of bridges
1892

1893
    """
1894
    if not bridges:
1895
      return
1896

    
1897
    node = ninfo.name
1898
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1899

    
1900
    missing = nresult.get(constants.NV_BRIDGES, None)
1901
    test = not isinstance(missing, list)
1902
    _ErrorIf(test, self.ENODENET, node,
1903
             "did not return valid bridge information")
1904
    if not test:
1905
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1906
               utils.CommaJoin(sorted(missing)))
1907

    
1908
  def _VerifyNodeNetwork(self, ninfo, nresult):
1909
    """Check the node network connectivity results.
1910

1911
    @type ninfo: L{objects.Node}
1912
    @param ninfo: the node to check
1913
    @param nresult: the remote results for the node
1914

1915
    """
1916
    node = ninfo.name
1917
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1918

    
1919
    test = constants.NV_NODELIST not in nresult
1920
    _ErrorIf(test, self.ENODESSH, node,
1921
             "node hasn't returned node ssh connectivity data")
1922
    if not test:
1923
      if nresult[constants.NV_NODELIST]:
1924
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1925
          _ErrorIf(True, self.ENODESSH, node,
1926
                   "ssh communication with node '%s': %s", a_node, a_msg)
1927

    
1928
    test = constants.NV_NODENETTEST not in nresult
1929
    _ErrorIf(test, self.ENODENET, node,
1930
             "node hasn't returned node tcp connectivity data")
1931
    if not test:
1932
      if nresult[constants.NV_NODENETTEST]:
1933
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1934
        for anode in nlist:
1935
          _ErrorIf(True, self.ENODENET, node,
1936
                   "tcp communication with node '%s': %s",
1937
                   anode, nresult[constants.NV_NODENETTEST][anode])
1938

    
1939
    test = constants.NV_MASTERIP not in nresult
1940
    _ErrorIf(test, self.ENODENET, node,
1941
             "node hasn't returned node master IP reachability data")
1942
    if not test:
1943
      if not nresult[constants.NV_MASTERIP]:
1944
        if node == self.master_node:
1945
          msg = "the master node cannot reach the master IP (not configured?)"
1946
        else:
1947
          msg = "cannot reach the master IP"
1948
        _ErrorIf(True, self.ENODENET, node, msg)
1949

    
1950
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1951
                      diskstatus):
1952
    """Verify an instance.
1953

1954
    This function checks to see if the required block devices are
1955
    available on the instance's node.
1956

1957
    """
1958
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1959
    node_current = instanceconfig.primary_node
1960

    
1961
    node_vol_should = {}
1962
    instanceconfig.MapLVsByNode(node_vol_should)
1963

    
1964
    for node in node_vol_should:
1965
      n_img = node_image[node]
1966
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1967
        # ignore missing volumes on offline or broken nodes
1968
        continue
1969
      for volume in node_vol_should[node]:
1970
        test = volume not in n_img.volumes
1971
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1972
                 "volume %s missing on node %s", volume, node)
1973

    
1974
    if instanceconfig.admin_up:
1975
      pri_img = node_image[node_current]
1976
      test = instance not in pri_img.instances and not pri_img.offline
1977
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1978
               "instance not running on its primary node %s",
1979
               node_current)
1980

    
1981
    diskdata = [(nname, success, status, idx)
1982
                for (nname, disks) in diskstatus.items()
1983
                for idx, (success, status) in enumerate(disks)]
1984

    
1985
    for nname, success, bdev_status, idx in diskdata:
1986
      # the 'ghost node' construction in Exec() ensures that we have a
1987
      # node here
1988
      snode = node_image[nname]
1989
      bad_snode = snode.ghost or snode.offline
1990
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1991
               self.EINSTANCEFAULTYDISK, instance,
1992
               "couldn't retrieve status for disk/%s on %s: %s",
1993
               idx, nname, bdev_status)
1994
      _ErrorIf((instanceconfig.admin_up and success and
1995
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1996
               self.EINSTANCEFAULTYDISK, instance,
1997
               "disk/%s on %s is faulty", idx, nname)
1998

    
1999
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2000
    """Verify if there are any unknown volumes in the cluster.
2001

2002
    The .os, .swap and backup volumes are ignored. All other volumes are
2003
    reported as unknown.
2004

2005
    @type reserved: L{ganeti.utils.FieldSet}
2006
    @param reserved: a FieldSet of reserved volume names
2007

2008
    """
2009
    for node, n_img in node_image.items():
2010
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2011
        # skip non-healthy nodes
2012
        continue
2013
      for volume in n_img.volumes:
2014
        test = ((node not in node_vol_should or
2015
                volume not in node_vol_should[node]) and
2016
                not reserved.Matches(volume))
2017
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2018
                      "volume %s is unknown", volume)
2019

    
2020
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2021
    """Verify N+1 Memory Resilience.
2022

2023
    Check that if one single node dies we can still start all the
2024
    instances it was primary for.
2025

2026
    """
2027
    cluster_info = self.cfg.GetClusterInfo()
2028
    for node, n_img in node_image.items():
2029
      # This code checks that every node which is now listed as
2030
      # secondary has enough memory to host all instances it is
2031
      # supposed to should a single other node in the cluster fail.
2032
      # FIXME: not ready for failover to an arbitrary node
2033
      # FIXME: does not support file-backed instances
2034
      # WARNING: we currently take into account down instances as well
2035
      # as up ones, considering that even if they're down someone
2036
      # might want to start them even in the event of a node failure.
2037
      if n_img.offline:
2038
        # we're skipping offline nodes from the N+1 warning, since
2039
        # most likely we don't have good memory infromation from them;
2040
        # we already list instances living on such nodes, and that's
2041
        # enough warning
2042
        continue
2043
      for prinode, instances in n_img.sbp.items():
2044
        needed_mem = 0
2045
        for instance in instances:
2046
          bep = cluster_info.FillBE(instance_cfg[instance])
2047
          if bep[constants.BE_AUTO_BALANCE]:
2048
            needed_mem += bep[constants.BE_MEMORY]
2049
        test = n_img.mfree < needed_mem
2050
        self._ErrorIf(test, self.ENODEN1, node,
2051
                      "not enough memory to accomodate instance failovers"
2052
                      " should node %s fail (%dMiB needed, %dMiB available)",
2053
                      prinode, needed_mem, n_img.mfree)
2054

    
2055
  @classmethod
2056
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2057
                   (files_all, files_all_opt, files_mc, files_vm)):
2058
    """Verifies file checksums collected from all nodes.
2059

2060
    @param errorif: Callback for reporting errors
2061
    @param nodeinfo: List of L{objects.Node} objects
2062
    @param master_node: Name of master node
2063
    @param all_nvinfo: RPC results
2064

2065
    """
2066
    node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2067

    
2068
    assert master_node in node_names
2069
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2070
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2071
           "Found file listed in more than one file list"
2072

    
2073
    # Define functions determining which nodes to consider for a file
2074
    file2nodefn = dict([(filename, fn)
2075
      for (files, fn) in [(files_all, None),
2076
                          (files_all_opt, None),
2077
                          (files_mc, lambda node: (node.master_candidate or
2078
                                                   node.name == master_node)),
2079
                          (files_vm, lambda node: node.vm_capable)]
2080
      for filename in files])
2081

    
2082
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2083

    
2084
    for node in nodeinfo:
2085
      if node.offline:
2086
        continue
2087

    
2088
      nresult = all_nvinfo[node.name]
2089

    
2090
      if nresult.fail_msg or not nresult.payload:
2091
        node_files = None
2092
      else:
2093
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2094

    
2095
      test = not (node_files and isinstance(node_files, dict))
2096
      errorif(test, cls.ENODEFILECHECK, node.name,
2097
              "Node did not return file checksum data")
2098
      if test:
2099
        continue
2100

    
2101
      for (filename, checksum) in node_files.items():
2102
        # Check if the file should be considered for a node
2103
        fn = file2nodefn[filename]
2104
        if fn is None or fn(node):
2105
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2106

    
2107
    for (filename, checksums) in fileinfo.items():
2108
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2109

    
2110
      # Nodes having the file
2111
      with_file = frozenset(node_name
2112
                            for nodes in fileinfo[filename].values()
2113
                            for node_name in nodes)
2114

    
2115
      # Nodes missing file
2116
      missing_file = node_names - with_file
2117

    
2118
      if filename in files_all_opt:
2119
        # All or no nodes
2120
        errorif(missing_file and missing_file != node_names,
2121
                cls.ECLUSTERFILECHECK, None,
2122
                "File %s is optional, but it must exist on all or no"
2123
                " nodes (not found on %s)",
2124
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2125
      else:
2126
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2127
                "File %s is missing from node(s) %s", filename,
2128
                utils.CommaJoin(utils.NiceSort(missing_file)))
2129

    
2130
      # See if there are multiple versions of the file
2131
      test = len(checksums) > 1
2132
      if test:
2133
        variants = ["variant %s on %s" %
2134
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2135
                    for (idx, (checksum, nodes)) in
2136
                      enumerate(sorted(checksums.items()))]
2137
      else:
2138
        variants = []
2139

    
2140
      errorif(test, cls.ECLUSTERFILECHECK, None,
2141
              "File %s found with %s different checksums (%s)",
2142
              filename, len(checksums), "; ".join(variants))
2143

    
2144
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2145
                      drbd_map):
2146
    """Verifies and the node DRBD status.
2147

2148
    @type ninfo: L{objects.Node}
2149
    @param ninfo: the node to check
2150
    @param nresult: the remote results for the node
2151
    @param instanceinfo: the dict of instances
2152
    @param drbd_helper: the configured DRBD usermode helper
2153
    @param drbd_map: the DRBD map as returned by
2154
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2155

2156
    """
2157
    node = ninfo.name
2158
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2159

    
2160
    if drbd_helper:
2161
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2162
      test = (helper_result == None)
2163
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2164
               "no drbd usermode helper returned")
2165
      if helper_result:
2166
        status, payload = helper_result
2167
        test = not status
2168
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2169
                 "drbd usermode helper check unsuccessful: %s", payload)
2170
        test = status and (payload != drbd_helper)
2171
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2172
                 "wrong drbd usermode helper: %s", payload)
2173

    
2174
    # compute the DRBD minors
2175
    node_drbd = {}
2176
    for minor, instance in drbd_map[node].items():
2177
      test = instance not in instanceinfo
2178
      _ErrorIf(test, self.ECLUSTERCFG, None,
2179
               "ghost instance '%s' in temporary DRBD map", instance)
2180
        # ghost instance should not be running, but otherwise we
2181
        # don't give double warnings (both ghost instance and
2182
        # unallocated minor in use)
2183
      if test:
2184
        node_drbd[minor] = (instance, False)
2185
      else:
2186
        instance = instanceinfo[instance]
2187
        node_drbd[minor] = (instance.name, instance.admin_up)
2188

    
2189
    # and now check them
2190
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2191
    test = not isinstance(used_minors, (tuple, list))
2192
    _ErrorIf(test, self.ENODEDRBD, node,
2193
             "cannot parse drbd status file: %s", str(used_minors))
2194
    if test:
2195
      # we cannot check drbd status
2196
      return
2197

    
2198
    for minor, (iname, must_exist) in node_drbd.items():
2199
      test = minor not in used_minors and must_exist
2200
      _ErrorIf(test, self.ENODEDRBD, node,
2201
               "drbd minor %d of instance %s is not active", minor, iname)
2202
    for minor in used_minors:
2203
      test = minor not in node_drbd
2204
      _ErrorIf(test, self.ENODEDRBD, node,
2205
               "unallocated drbd minor %d is in use", minor)
2206

    
2207
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2208
    """Builds the node OS structures.
2209

2210
    @type ninfo: L{objects.Node}
2211
    @param ninfo: the node to check
2212
    @param nresult: the remote results for the node
2213
    @param nimg: the node image object
2214

2215
    """
2216
    node = ninfo.name
2217
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2218

    
2219
    remote_os = nresult.get(constants.NV_OSLIST, None)
2220
    test = (not isinstance(remote_os, list) or
2221
            not compat.all(isinstance(v, list) and len(v) == 7
2222
                           for v in remote_os))
2223

    
2224
    _ErrorIf(test, self.ENODEOS, node,
2225
             "node hasn't returned valid OS data")
2226

    
2227
    nimg.os_fail = test
2228

    
2229
    if test:
2230
      return
2231

    
2232
    os_dict = {}
2233

    
2234
    for (name, os_path, status, diagnose,
2235
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2236

    
2237
      if name not in os_dict:
2238
        os_dict[name] = []
2239

    
2240
      # parameters is a list of lists instead of list of tuples due to
2241
      # JSON lacking a real tuple type, fix it:
2242
      parameters = [tuple(v) for v in parameters]
2243
      os_dict[name].append((os_path, status, diagnose,
2244
                            set(variants), set(parameters), set(api_ver)))
2245

    
2246
    nimg.oslist = os_dict
2247

    
2248
  def _VerifyNodeOS(self, ninfo, nimg, base):
2249
    """Verifies the node OS list.
2250

2251
    @type ninfo: L{objects.Node}
2252
    @param ninfo: the node to check
2253
    @param nimg: the node image object
2254
    @param base: the 'template' node we match against (e.g. from the master)
2255

2256
    """
2257
    node = ninfo.name
2258
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2259

    
2260
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2261

    
2262
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2263
    for os_name, os_data in nimg.oslist.items():
2264
      assert os_data, "Empty OS status for OS %s?!" % os_name
2265
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2266
      _ErrorIf(not f_status, self.ENODEOS, node,
2267
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2268
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2269
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2270
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2271
      # comparisons with the 'base' image
2272
      test = os_name not in base.oslist
2273
      _ErrorIf(test, self.ENODEOS, node,
2274
               "Extra OS %s not present on reference node (%s)",
2275
               os_name, base.name)
2276
      if test:
2277
        continue
2278
      assert base.oslist[os_name], "Base node has empty OS status?"
2279
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2280
      if not b_status:
2281
        # base OS is invalid, skipping
2282
        continue
2283
      for kind, a, b in [("API version", f_api, b_api),
2284
                         ("variants list", f_var, b_var),
2285
                         ("parameters", beautify_params(f_param),
2286
                          beautify_params(b_param))]:
2287
        _ErrorIf(a != b, self.ENODEOS, node,
2288
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2289
                 kind, os_name, base.name,
2290
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2291

    
2292
    # check any missing OSes
2293
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2294
    _ErrorIf(missing, self.ENODEOS, node,
2295
             "OSes present on reference node %s but missing on this node: %s",
2296
             base.name, utils.CommaJoin(missing))
2297

    
2298
  def _VerifyOob(self, ninfo, nresult):
2299
    """Verifies out of band functionality of a node.
2300

2301
    @type ninfo: L{objects.Node}
2302
    @param ninfo: the node to check
2303
    @param nresult: the remote results for the node
2304

2305
    """
2306
    node = ninfo.name
2307
    # We just have to verify the paths on master and/or master candidates
2308
    # as the oob helper is invoked on the master
2309
    if ((ninfo.master_candidate or ninfo.master_capable) and
2310
        constants.NV_OOB_PATHS in nresult):
2311
      for path_result in nresult[constants.NV_OOB_PATHS]:
2312
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2313

    
2314
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2315
    """Verifies and updates the node volume data.
2316

2317
    This function will update a L{NodeImage}'s internal structures
2318
    with data from the remote call.
2319

2320
    @type ninfo: L{objects.Node}
2321
    @param ninfo: the node to check
2322
    @param nresult: the remote results for the node
2323
    @param nimg: the node image object
2324
    @param vg_name: the configured VG name
2325

2326
    """
2327
    node = ninfo.name
2328
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2329

    
2330
    nimg.lvm_fail = True
2331
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2332
    if vg_name is None:
2333
      pass
2334
    elif isinstance(lvdata, basestring):
2335
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2336
               utils.SafeEncode(lvdata))
2337
    elif not isinstance(lvdata, dict):
2338
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2339
    else:
2340
      nimg.volumes = lvdata
2341
      nimg.lvm_fail = False
2342

    
2343
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2344
    """Verifies and updates the node instance list.
2345

2346
    If the listing was successful, then updates this node's instance
2347
    list. Otherwise, it marks the RPC call as failed for the instance
2348
    list key.
2349

2350
    @type ninfo: L{objects.Node}
2351
    @param ninfo: the node to check
2352
    @param nresult: the remote results for the node
2353
    @param nimg: the node image object
2354

2355
    """
2356
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2357
    test = not isinstance(idata, list)
2358
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2359
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2360
    if test:
2361
      nimg.hyp_fail = True
2362
    else:
2363
      nimg.instances = idata
2364

    
2365
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2366
    """Verifies and computes a node information map
2367

2368
    @type ninfo: L{objects.Node}
2369
    @param ninfo: the node to check
2370
    @param nresult: the remote results for the node
2371
    @param nimg: the node image object
2372
    @param vg_name: the configured VG name
2373

2374
    """
2375
    node = ninfo.name
2376
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2377

    
2378
    # try to read free memory (from the hypervisor)
2379
    hv_info = nresult.get(constants.NV_HVINFO, None)
2380
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2381
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2382
    if not test:
2383
      try:
2384
        nimg.mfree = int(hv_info["memory_free"])
2385
      except (ValueError, TypeError):
2386
        _ErrorIf(True, self.ENODERPC, node,
2387
                 "node returned invalid nodeinfo, check hypervisor")
2388

    
2389
    # FIXME: devise a free space model for file based instances as well
2390
    if vg_name is not None:
2391
      test = (constants.NV_VGLIST not in nresult or
2392
              vg_name not in nresult[constants.NV_VGLIST])
2393
      _ErrorIf(test, self.ENODELVM, node,
2394
               "node didn't return data for the volume group '%s'"
2395
               " - it is either missing or broken", vg_name)
2396
      if not test:
2397
        try:
2398
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2399
        except (ValueError, TypeError):
2400
          _ErrorIf(True, self.ENODERPC, node,
2401
                   "node returned invalid LVM info, check LVM status")
2402

    
2403
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2404
    """Gets per-disk status information for all instances.
2405

2406
    @type nodelist: list of strings
2407
    @param nodelist: Node names
2408
    @type node_image: dict of (name, L{objects.Node})
2409
    @param node_image: Node objects
2410
    @type instanceinfo: dict of (name, L{objects.Instance})
2411
    @param instanceinfo: Instance objects
2412
    @rtype: {instance: {node: [(succes, payload)]}}
2413
    @return: a dictionary of per-instance dictionaries with nodes as
2414
        keys and disk information as values; the disk information is a
2415
        list of tuples (success, payload)
2416

2417
    """
2418
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2419

    
2420
    node_disks = {}
2421
    node_disks_devonly = {}
2422
    diskless_instances = set()
2423
    diskless = constants.DT_DISKLESS
2424

    
2425
    for nname in nodelist:
2426
      node_instances = list(itertools.chain(node_image[nname].pinst,
2427
                                            node_image[nname].sinst))
2428
      diskless_instances.update(inst for inst in node_instances
2429
                                if instanceinfo[inst].disk_template == diskless)
2430
      disks = [(inst, disk)
2431
               for inst in node_instances
2432
               for disk in instanceinfo[inst].disks]
2433

    
2434
      if not disks:
2435
        # No need to collect data
2436
        continue
2437

    
2438
      node_disks[nname] = disks
2439

    
2440
      # Creating copies as SetDiskID below will modify the objects and that can
2441
      # lead to incorrect data returned from nodes
2442
      devonly = [dev.Copy() for (_, dev) in disks]
2443

    
2444
      for dev in devonly:
2445
        self.cfg.SetDiskID(dev, nname)
2446

    
2447
      node_disks_devonly[nname] = devonly
2448

    
2449
    assert len(node_disks) == len(node_disks_devonly)
2450

    
2451
    # Collect data from all nodes with disks
2452
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2453
                                                          node_disks_devonly)
2454

    
2455
    assert len(result) == len(node_disks)
2456

    
2457
    instdisk = {}
2458

    
2459
    for (nname, nres) in result.items():
2460
      disks = node_disks[nname]
2461

    
2462
      if nres.offline:
2463
        # No data from this node
2464
        data = len(disks) * [(False, "node offline")]
2465
      else:
2466
        msg = nres.fail_msg
2467
        _ErrorIf(msg, self.ENODERPC, nname,
2468
                 "while getting disk information: %s", msg)
2469
        if msg:
2470
          # No data from this node
2471
          data = len(disks) * [(False, msg)]
2472
        else:
2473
          data = []
2474
          for idx, i in enumerate(nres.payload):
2475
            if isinstance(i, (tuple, list)) and len(i) == 2:
2476
              data.append(i)
2477
            else:
2478
              logging.warning("Invalid result from node %s, entry %d: %s",
2479
                              nname, idx, i)
2480
              data.append((False, "Invalid result from the remote node"))
2481

    
2482
      for ((inst, _), status) in zip(disks, data):
2483
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2484

    
2485
    # Add empty entries for diskless instances.
2486
    for inst in diskless_instances:
2487
      assert inst not in instdisk
2488
      instdisk[inst] = {}
2489

    
2490
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2491
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2492
                      compat.all(isinstance(s, (tuple, list)) and
2493
                                 len(s) == 2 for s in statuses)
2494
                      for inst, nnames in instdisk.items()
2495
                      for nname, statuses in nnames.items())
2496
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2497

    
2498
    return instdisk
2499

    
2500
  def BuildHooksEnv(self):
2501
    """Build hooks env.
2502

2503
    Cluster-Verify hooks just ran in the post phase and their failure makes
2504
    the output be logged in the verify output and the verification to fail.
2505

2506
    """
2507
    env = {
2508
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2509
      }
2510

    
2511
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2512
               for node in self.my_node_info.values())
2513

    
2514
    return env
2515

    
2516
  def BuildHooksNodes(self):
2517
    """Build hooks nodes.
2518

2519
    """
2520
    return ([], self.my_node_names)
2521

    
2522
  def Exec(self, feedback_fn):
2523
    """Verify integrity of the node group, performing various test on nodes.
2524

2525
    """
2526
    # This method has too many local variables. pylint: disable-msg=R0914
2527

    
2528
    if not self.my_node_names:
2529
      # empty node group
2530
      feedback_fn("* Empty node group, skipping verification")
2531
      return True
2532

    
2533
    self.bad = False
2534
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2535
    verbose = self.op.verbose
2536
    self._feedback_fn = feedback_fn
2537

    
2538
    vg_name = self.cfg.GetVGName()
2539
    drbd_helper = self.cfg.GetDRBDHelper()
2540
    cluster = self.cfg.GetClusterInfo()
2541
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2542
    hypervisors = cluster.enabled_hypervisors
2543
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2544

    
2545
    i_non_redundant = [] # Non redundant instances
2546
    i_non_a_balanced = [] # Non auto-balanced instances
2547
    n_offline = 0 # Count of offline nodes
2548
    n_drained = 0 # Count of nodes being drained
2549
    node_vol_should = {}
2550

    
2551
    # FIXME: verify OS list
2552

    
2553
    # File verification
2554
    filemap = _ComputeAncillaryFiles(cluster, False)
2555

    
2556
    # do local checksums
2557
    master_node = self.master_node = self.cfg.GetMasterNode()
2558
    master_ip = self.cfg.GetMasterIP()
2559

    
2560
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2561

    
2562
    # We will make nodes contact all nodes in their group, and one node from
2563
    # every other group.
2564
    # TODO: should it be a *random* node, different every time?
2565
    online_nodes = [node.name for node in node_data_list if not node.offline]
2566
    other_group_nodes = {}
2567

    
2568
    for name in sorted(self.all_node_info):
2569
      node = self.all_node_info[name]
2570
      if (node.group not in other_group_nodes
2571
          and node.group != self.group_uuid
2572
          and not node.offline):
2573
        other_group_nodes[node.group] = node.name
2574

    
2575
    node_verify_param = {
2576
      constants.NV_FILELIST:
2577
        utils.UniqueSequence(filename
2578
                             for files in filemap
2579
                             for filename in files),
2580
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2581
      constants.NV_HYPERVISOR: hypervisors,
2582
      constants.NV_HVPARAMS:
2583
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2584
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2585
                                 for node in node_data_list
2586
                                 if not node.offline],
2587
      constants.NV_INSTANCELIST: hypervisors,
2588
      constants.NV_VERSION: None,
2589
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2590
      constants.NV_NODESETUP: None,
2591
      constants.NV_TIME: None,
2592
      constants.NV_MASTERIP: (master_node, master_ip),
2593
      constants.NV_OSLIST: None,
2594
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2595
      }
2596

    
2597
    if vg_name is not None:
2598
      node_verify_param[constants.NV_VGLIST] = None
2599
      node_verify_param[constants.NV_LVLIST] = vg_name
2600
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2601
      node_verify_param[constants.NV_DRBDLIST] = None
2602

    
2603
    if drbd_helper:
2604
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2605

    
2606
    # bridge checks
2607
    # FIXME: this needs to be changed per node-group, not cluster-wide
2608
    bridges = set()
2609
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2610
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2611
      bridges.add(default_nicpp[constants.NIC_LINK])
2612
    for instance in self.my_inst_info.values():
2613
      for nic in instance.nics:
2614
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2615
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2616
          bridges.add(full_nic[constants.NIC_LINK])
2617

    
2618
    if bridges:
2619
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2620

    
2621
    # Build our expected cluster state
2622
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2623
                                                 name=node.name,
2624
                                                 vm_capable=node.vm_capable))
2625
                      for node in node_data_list)
2626

    
2627
    # Gather OOB paths
2628
    oob_paths = []
2629
    for node in self.all_node_info.values():
2630
      path = _SupportsOob(self.cfg, node)
2631
      if path and path not in oob_paths:
2632
        oob_paths.append(path)
2633

    
2634
    if oob_paths:
2635
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2636

    
2637
    for instance in self.my_inst_names:
2638
      inst_config = self.my_inst_info[instance]
2639

    
2640
      for nname in inst_config.all_nodes:
2641
        if nname not in node_image:
2642
          gnode = self.NodeImage(name=nname)
2643
          gnode.ghost = (nname not in self.all_node_info)
2644
          node_image[nname] = gnode
2645

    
2646
      inst_config.MapLVsByNode(node_vol_should)
2647

    
2648
      pnode = inst_config.primary_node
2649
      node_image[pnode].pinst.append(instance)
2650

    
2651
      for snode in inst_config.secondary_nodes:
2652
        nimg = node_image[snode]
2653
        nimg.sinst.append(instance)
2654
        if pnode not in nimg.sbp:
2655
          nimg.sbp[pnode] = []
2656
        nimg.sbp[pnode].append(instance)
2657

    
2658
    # At this point, we have the in-memory data structures complete,
2659
    # except for the runtime information, which we'll gather next
2660

    
2661
    # Due to the way our RPC system works, exact response times cannot be
2662
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2663
    # time before and after executing the request, we can at least have a time
2664
    # window.
2665
    nvinfo_starttime = time.time()
2666
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2667
                                           node_verify_param,
2668
                                           self.cfg.GetClusterName())
2669
    nvinfo_endtime = time.time()
2670

    
2671
    if self.extra_lv_nodes and vg_name is not None:
2672
      extra_lv_nvinfo = \
2673
          self.rpc.call_node_verify(self.extra_lv_nodes,
2674
                                    {constants.NV_LVLIST: vg_name},
2675
                                    self.cfg.GetClusterName())
2676
    else:
2677
      extra_lv_nvinfo = {}
2678

    
2679
    all_drbd_map = self.cfg.ComputeDRBDMap()
2680

    
2681
    feedback_fn("* Gathering disk information (%s nodes)" %
2682
                len(self.my_node_names))
2683
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2684
                                     self.my_inst_info)
2685

    
2686
    feedback_fn("* Verifying configuration file consistency")
2687

    
2688
    # If not all nodes are being checked, we need to make sure the master node
2689
    # and a non-checked vm_capable node are in the list.
2690
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2691
    if absent_nodes:
2692
      vf_nvinfo = all_nvinfo.copy()
2693
      vf_node_info = list(self.my_node_info.values())
2694
      additional_nodes = []
2695
      if master_node not in self.my_node_info:
2696
        additional_nodes.append(master_node)
2697
        vf_node_info.append(self.all_node_info[master_node])
2698
      # Add the first vm_capable node we find which is not included
2699
      for node in absent_nodes:
2700
        nodeinfo = self.all_node_info[node]
2701
        if nodeinfo.vm_capable and not nodeinfo.offline:
2702
          additional_nodes.append(node)
2703
          vf_node_info.append(self.all_node_info[node])
2704
          break
2705
      key = constants.NV_FILELIST
2706
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2707
                                                 {key: node_verify_param[key]},
2708
                                                 self.cfg.GetClusterName()))
2709
    else:
2710
      vf_nvinfo = all_nvinfo
2711
      vf_node_info = self.my_node_info.values()
2712

    
2713
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2714

    
2715
    feedback_fn("* Verifying node status")
2716

    
2717
    refos_img = None
2718

    
2719
    for node_i in node_data_list:
2720
      node = node_i.name
2721
      nimg = node_image[node]
2722

    
2723
      if node_i.offline:
2724
        if verbose:
2725
          feedback_fn("* Skipping offline node %s" % (node,))
2726
        n_offline += 1
2727
        continue
2728

    
2729
      if node == master_node:
2730
        ntype = "master"
2731
      elif node_i.master_candidate:
2732
        ntype = "master candidate"
2733
      elif node_i.drained:
2734
        ntype = "drained"
2735
        n_drained += 1
2736
      else:
2737
        ntype = "regular"
2738
      if verbose:
2739
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2740

    
2741
      msg = all_nvinfo[node].fail_msg
2742
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2743
      if msg:
2744
        nimg.rpc_fail = True
2745
        continue
2746

    
2747
      nresult = all_nvinfo[node].payload
2748

    
2749
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2750
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2751
      self._VerifyNodeNetwork(node_i, nresult)
2752
      self._VerifyOob(node_i, nresult)
2753

    
2754
      if nimg.vm_capable:
2755
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2756
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2757
                             all_drbd_map)
2758

    
2759
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2760
        self._UpdateNodeInstances(node_i, nresult, nimg)
2761
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2762
        self._UpdateNodeOS(node_i, nresult, nimg)
2763

    
2764
        if not nimg.os_fail:
2765
          if refos_img is None:
2766
            refos_img = nimg
2767
          self._VerifyNodeOS(node_i, nimg, refos_img)
2768
        self._VerifyNodeBridges(node_i, nresult, bridges)
2769

    
2770
        # Check whether all running instancies are primary for the node. (This
2771
        # can no longer be done from _VerifyInstance below, since some of the
2772
        # wrong instances could be from other node groups.)
2773
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2774

    
2775
        for inst in non_primary_inst:
2776
          test = inst in self.all_inst_info
2777
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2778
                   "instance should not run on node %s", node_i.name)
2779
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2780
                   "node is running unknown instance %s", inst)
2781

    
2782
    for node, result in extra_lv_nvinfo.items():
2783
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2784
                              node_image[node], vg_name)
2785

    
2786
    feedback_fn("* Verifying instance status")
2787
    for instance in self.my_inst_names:
2788
      if verbose:
2789
        feedback_fn("* Verifying instance %s" % instance)
2790
      inst_config = self.my_inst_info[instance]
2791
      self._VerifyInstance(instance, inst_config, node_image,
2792
                           instdisk[instance])
2793
      inst_nodes_offline = []
2794

    
2795
      pnode = inst_config.primary_node
2796
      pnode_img = node_image[pnode]
2797
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2798
               self.ENODERPC, pnode, "instance %s, connection to"
2799
               " primary node failed", instance)
2800

    
2801
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2802
               self.EINSTANCEBADNODE, instance,
2803
               "instance is marked as running and lives on offline node %s",
2804
               inst_config.primary_node)
2805

    
2806
      # If the instance is non-redundant we cannot survive losing its primary
2807
      # node, so we are not N+1 compliant. On the other hand we have no disk
2808
      # templates with more than one secondary so that situation is not well
2809
      # supported either.
2810
      # FIXME: does not support file-backed instances
2811
      if not inst_config.secondary_nodes:
2812
        i_non_redundant.append(instance)
2813

    
2814
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2815
               instance, "instance has multiple secondary nodes: %s",
2816
               utils.CommaJoin(inst_config.secondary_nodes),
2817
               code=self.ETYPE_WARNING)
2818

    
2819
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2820
        pnode = inst_config.primary_node
2821
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2822
        instance_groups = {}
2823

    
2824
        for node in instance_nodes:
2825
          instance_groups.setdefault(self.all_node_info[node].group,
2826
                                     []).append(node)
2827

    
2828
        pretty_list = [
2829
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2830
          # Sort so that we always list the primary node first.
2831
          for group, nodes in sorted(instance_groups.items(),
2832
                                     key=lambda (_, nodes): pnode in nodes,
2833
                                     reverse=True)]
2834

    
2835
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2836
                      instance, "instance has primary and secondary nodes in"
2837
                      " different groups: %s", utils.CommaJoin(pretty_list),
2838
                      code=self.ETYPE_WARNING)
2839

    
2840
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2841
        i_non_a_balanced.append(instance)
2842

    
2843
      for snode in inst_config.secondary_nodes:
2844
        s_img = node_image[snode]
2845
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2846
                 "instance %s, connection to secondary node failed", instance)
2847

    
2848
        if s_img.offline:
2849
          inst_nodes_offline.append(snode)
2850

    
2851
      # warn that the instance lives on offline nodes
2852
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2853
               "instance has offline secondary node(s) %s",
2854
               utils.CommaJoin(inst_nodes_offline))
2855
      # ... or ghost/non-vm_capable nodes
2856
      for node in inst_config.all_nodes:
2857
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2858
                 "instance lives on ghost node %s", node)
2859
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2860
                 instance, "instance lives on non-vm_capable node %s", node)
2861

    
2862
    feedback_fn("* Verifying orphan volumes")
2863
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2864

    
2865
    # We will get spurious "unknown volume" warnings if any node of this group
2866
    # is secondary for an instance whose primary is in another group. To avoid
2867
    # them, we find these instances and add their volumes to node_vol_should.
2868
    for inst in self.all_inst_info.values():
2869
      for secondary in inst.secondary_nodes:
2870
        if (secondary in self.my_node_info
2871
            and inst.name not in self.my_inst_info):
2872
          inst.MapLVsByNode(node_vol_should)
2873
          break
2874

    
2875
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2876

    
2877
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2878
      feedback_fn("* Verifying N+1 Memory redundancy")
2879
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2880

    
2881
    feedback_fn("* Other Notes")
2882
    if i_non_redundant:
2883
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2884
                  % len(i_non_redundant))
2885

    
2886
    if i_non_a_balanced:
2887
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2888
                  % len(i_non_a_balanced))
2889

    
2890
    if n_offline:
2891
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2892

    
2893
    if n_drained:
2894
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2895

    
2896
    return not self.bad
2897

    
2898
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2899
    """Analyze the post-hooks' result
2900

2901
    This method analyses the hook result, handles it, and sends some
2902
    nicely-formatted feedback back to the user.
2903

2904
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2905
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2906
    @param hooks_results: the results of the multi-node hooks rpc call
2907
    @param feedback_fn: function used send feedback back to the caller
2908
    @param lu_result: previous Exec result
2909
    @return: the new Exec result, based on the previous result
2910
        and hook results
2911

2912
    """
2913
    # We only really run POST phase hooks, only for non-empty groups,
2914
    # and are only interested in their results
2915
    if not self.my_node_names:
2916
      # empty node group
2917
      pass
2918
    elif phase == constants.HOOKS_PHASE_POST:
2919
      # Used to change hooks' output to proper indentation
2920
      feedback_fn("* Hooks Results")
2921
      assert hooks_results, "invalid result from hooks"
2922

    
2923
      for node_name in hooks_results:
2924
        res = hooks_results[node_name]
2925
        msg = res.fail_msg
2926
        test = msg and not res.offline
2927
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2928
                      "Communication failure in hooks execution: %s", msg)
2929
        if res.offline or msg:
2930
          # No need to investigate payload if node is offline or gave an error.
2931
          # override manually lu_result here as _ErrorIf only
2932
          # overrides self.bad
2933
          lu_result = 1
2934
          continue
2935
        for script, hkr, output in res.payload:
2936
          test = hkr == constants.HKR_FAIL
2937
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2938
                        "Script %s failed, output:", script)
2939
          if test:
2940
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2941
            feedback_fn("%s" % output)
2942
            lu_result = 0
2943

    
2944
    return lu_result
2945

    
2946

    
2947
class LUClusterVerifyDisks(NoHooksLU):
2948
  """Verifies the cluster disks status.
2949

2950
  """
2951
  REQ_BGL = False
2952

    
2953
  def ExpandNames(self):
2954
    self.share_locks = _ShareAll()
2955
    self.needed_locks = {
2956
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
2957
      }
2958

    
2959
  def Exec(self, feedback_fn):
2960
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
2961

    
2962
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2963
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2964
                           for group in group_names])
2965

    
2966

    
2967
class LUGroupVerifyDisks(NoHooksLU):
2968
  """Verifies the status of all disks in a node group.
2969

2970
  """
2971
  REQ_BGL = False
2972

    
2973
  def ExpandNames(self):
2974
    # Raises errors.OpPrereqError on its own if group can't be found
2975
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2976

    
2977
    self.share_locks = _ShareAll()
2978
    self.needed_locks = {
2979
      locking.LEVEL_INSTANCE: [],
2980
      locking.LEVEL_NODEGROUP: [],
2981
      locking.LEVEL_NODE: [],
2982
      }
2983

    
2984
  def DeclareLocks(self, level):
2985
    if level == locking.LEVEL_INSTANCE:
2986
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
2987

    
2988
      # Lock instances optimistically, needs verification once node and group
2989
      # locks have been acquired
2990
      self.needed_locks[locking.LEVEL_INSTANCE] = \
2991
        self.cfg.GetNodeGroupInstances(self.group_uuid)
2992

    
2993
    elif level == locking.LEVEL_NODEGROUP:
2994
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2995

    
2996
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
2997
        set([self.group_uuid] +
2998
            # Lock all groups used by instances optimistically; this requires
2999
            # going via the node before it's locked, requiring verification
3000
            # later on
3001
            [group_uuid
3002
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3003
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3004

    
3005
    elif level == locking.LEVEL_NODE:
3006
      # This will only lock the nodes in the group to be verified which contain
3007
      # actual instances
3008
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3009
      self._LockInstancesNodes()
3010

    
3011
      # Lock all nodes in group to be verified
3012
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3013
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3014
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3015

    
3016
  def CheckPrereq(self):
3017
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3018
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3019
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3020

    
3021
    assert self.group_uuid in owned_groups
3022

    
3023
    # Check if locked instances are still correct
3024
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3025

    
3026
    # Get instance information
3027
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3028

    
3029
    # Check if node groups for locked instances are still correct
3030
    for (instance_name, inst) in self.instances.items():
3031
      assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
3032
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3033
      assert owned_nodes.issuperset(inst.all_nodes), \
3034
        "Instance %s's nodes changed while we kept the lock" % instance_name
3035

    
3036
      _CheckInstanceNodeGroups(self.cfg, instance_name, owned_groups)
3037

    
3038
  def Exec(self, feedback_fn):
3039
    """Verify integrity of cluster disks.
3040

3041
    @rtype: tuple of three items
3042
    @return: a tuple of (dict of node-to-node_error, list of instances
3043
        which need activate-disks, dict of instance: (node, volume) for
3044
        missing volumes
3045

3046
    """
3047
    res_nodes = {}
3048
    res_instances = set()
3049
    res_missing = {}
3050

    
3051
    nv_dict = _MapInstanceDisksToNodes([inst
3052
                                        for inst in self.instances.values()
3053
                                        if inst.admin_up])
3054

    
3055
    if nv_dict:
3056
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3057
                             set(self.cfg.GetVmCapableNodeList()))
3058

    
3059
      node_lvs = self.rpc.call_lv_list(nodes, [])
3060

    
3061
      for (node, node_res) in node_lvs.items():
3062
        if node_res.offline:
3063
          continue
3064

    
3065
        msg = node_res.fail_msg
3066
        if msg:
3067
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3068
          res_nodes[node] = msg
3069
          continue
3070

    
3071
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3072
          inst = nv_dict.pop((node, lv_name), None)
3073
          if not (lv_online or inst is None):
3074
            res_instances.add(inst)
3075

    
3076
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3077
      # better
3078
      for key, inst in nv_dict.iteritems():
3079
        res_missing.setdefault(inst, []).append(key)
3080

    
3081
    return (res_nodes, list(res_instances), res_missing)
3082

    
3083

    
3084
class LUClusterRepairDiskSizes(NoHooksLU):
3085
  """Verifies the cluster disks sizes.
3086

3087
  """
3088
  REQ_BGL = False
3089

    
3090
  def ExpandNames(self):
3091
    if self.op.instances:
3092
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3093
      self.needed_locks = {
3094
        locking.LEVEL_NODE: [],
3095
        locking.LEVEL_INSTANCE: self.wanted_names,
3096
        }
3097
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3098
    else:
3099
      self.wanted_names = None
3100
      self.needed_locks = {
3101
        locking.LEVEL_NODE: locking.ALL_SET,
3102
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3103
        }
3104
    self.share_locks = _ShareAll()
3105

    
3106
  def DeclareLocks(self, level):
3107
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3108
      self._LockInstancesNodes(primary_only=True)
3109

    
3110
  def CheckPrereq(self):
3111
    """Check prerequisites.
3112

3113
    This only checks the optional instance list against the existing names.
3114

3115
    """
3116
    if self.wanted_names is None:
3117
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3118

    
3119
    self.wanted_instances = \
3120
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3121

    
3122
  def _EnsureChildSizes(self, disk):
3123
    """Ensure children of the disk have the needed disk size.
3124

3125
    This is valid mainly for DRBD8 and fixes an issue where the
3126
    children have smaller disk size.
3127

3128
    @param disk: an L{ganeti.objects.Disk} object
3129

3130
    """
3131
    if disk.dev_type == constants.LD_DRBD8:
3132
      assert disk.children, "Empty children for DRBD8?"
3133
      fchild = disk.children[0]
3134
      mismatch = fchild.size < disk.size
3135
      if mismatch:
3136
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3137
                     fchild.size, disk.size)
3138
        fchild.size = disk.size
3139

    
3140
      # and we recurse on this child only, not on the metadev
3141
      return self._EnsureChildSizes(fchild) or mismatch
3142
    else:
3143
      return False
3144

    
3145
  def Exec(self, feedback_fn):
3146
    """Verify the size of cluster disks.
3147

3148
    """
3149
    # TODO: check child disks too
3150
    # TODO: check differences in size between primary/secondary nodes
3151
    per_node_disks = {}
3152
    for instance in self.wanted_instances:
3153
      pnode = instance.primary_node
3154
      if pnode not in per_node_disks:
3155
        per_node_disks[pnode] = []
3156
      for idx, disk in enumerate(instance.disks):
3157
        per_node_disks[pnode].append((instance, idx, disk))
3158

    
3159
    changed = []
3160
    for node, dskl in per_node_disks.items():
3161
      newl = [v[2].Copy() for v in dskl]
3162
      for dsk in newl:
3163
        self.cfg.SetDiskID(dsk, node)
3164
      result = self.rpc.call_blockdev_getsize(node, newl)
3165
      if result.fail_msg:
3166
        self.LogWarning("Failure in blockdev_getsize call to node"
3167
                        " %s, ignoring", node)
3168
        continue
3169
      if len(result.payload) != len(dskl):
3170
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3171
                        " result.payload=%s", node, len(dskl), result.payload)
3172
        self.LogWarning("Invalid result from node %s, ignoring node results",
3173
                        node)
3174
        continue
3175
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3176
        if size is None:
3177
          self.LogWarning("Disk %d of instance %s did not return size"
3178
                          " information, ignoring", idx, instance.name)
3179
          continue
3180
        if not isinstance(size, (int, long)):
3181
          self.LogWarning("Disk %d of instance %s did not return valid"
3182
                          " size information, ignoring", idx, instance.name)
3183
          continue
3184
        size = size >> 20
3185
        if size != disk.size:
3186
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3187
                       " correcting: recorded %d, actual %d", idx,
3188
                       instance.name, disk.size, size)
3189
          disk.size = size
3190
          self.cfg.Update(instance, feedback_fn)
3191
          changed.append((instance.name, idx, size))
3192
        if self._EnsureChildSizes(disk):
3193
          self.cfg.Update(instance, feedback_fn)
3194
          changed.append((instance.name, idx, disk.size))
3195
    return changed
3196

    
3197

    
3198
class LUClusterRename(LogicalUnit):
3199
  """Rename the cluster.
3200

3201
  """
3202
  HPATH = "cluster-rename"
3203
  HTYPE = constants.HTYPE_CLUSTER
3204

    
3205
  def BuildHooksEnv(self):
3206
    """Build hooks env.
3207

3208
    """
3209
    return {
3210
      "OP_TARGET": self.cfg.GetClusterName(),
3211
      "NEW_NAME": self.op.name,
3212
      }
3213

    
3214
  def BuildHooksNodes(self):
3215
    """Build hooks nodes.
3216

3217
    """
3218
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3219

    
3220
  def CheckPrereq(self):
3221
    """Verify that the passed name is a valid one.
3222

3223
    """
3224
    hostname = netutils.GetHostname(name=self.op.name,
3225
                                    family=self.cfg.GetPrimaryIPFamily())
3226

    
3227
    new_name = hostname.name
3228
    self.ip = new_ip = hostname.ip
3229
    old_name = self.cfg.GetClusterName()
3230
    old_ip = self.cfg.GetMasterIP()
3231
    if new_name == old_name and new_ip == old_ip:
3232
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3233
                                 " cluster has changed",
3234
                                 errors.ECODE_INVAL)
3235
    if new_ip != old_ip:
3236
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3237
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3238
                                   " reachable on the network" %
3239
                                   new_ip, errors.ECODE_NOTUNIQUE)
3240

    
3241
    self.op.name = new_name
3242

    
3243
  def Exec(self, feedback_fn):
3244
    """Rename the cluster.
3245

3246
    """
3247
    clustername = self.op.name
3248
    ip = self.ip
3249

    
3250
    # shutdown the master IP
3251
    master = self.cfg.GetMasterNode()
3252
    result = self.rpc.call_node_stop_master(master, False)
3253
    result.Raise("Could not disable the master role")
3254

    
3255
    try:
3256
      cluster = self.cfg.GetClusterInfo()
3257
      cluster.cluster_name = clustername
3258
      cluster.master_ip = ip
3259
      self.cfg.Update(cluster, feedback_fn)
3260

    
3261
      # update the known hosts file
3262
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3263
      node_list = self.cfg.GetOnlineNodeList()
3264
      try:
3265
        node_list.remove(master)
3266
      except ValueError:
3267
        pass
3268
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3269
    finally:
3270
      result = self.rpc.call_node_start_master(master, False, False)
3271
      msg = result.fail_msg
3272
      if msg:
3273
        self.LogWarning("Could not re-enable the master role on"
3274
                        " the master, please restart manually: %s", msg)
3275

    
3276
    return clustername
3277

    
3278

    
3279
class LUClusterSetParams(LogicalUnit):
3280
  """Change the parameters of the cluster.
3281

3282
  """
3283
  HPATH = "cluster-modify"
3284
  HTYPE = constants.HTYPE_CLUSTER
3285
  REQ_BGL = False
3286

    
3287
  def CheckArguments(self):
3288
    """Check parameters
3289

3290
    """
3291
    if self.op.uid_pool:
3292
      uidpool.CheckUidPool(self.op.uid_pool)
3293

    
3294
    if self.op.add_uids:
3295
      uidpool.CheckUidPool(self.op.add_uids)
3296

    
3297
    if self.op.remove_uids:
3298
      uidpool.CheckUidPool(self.op.remove_uids)
3299

    
3300
  def ExpandNames(self):
3301
    # FIXME: in the future maybe other cluster params won't require checking on
3302
    # all nodes to be modified.
3303
    self.needed_locks = {
3304
      locking.LEVEL_NODE: locking.ALL_SET,
3305
    }
3306
    self.share_locks[locking.LEVEL_NODE] = 1
3307

    
3308
  def BuildHooksEnv(self):
3309
    """Build hooks env.
3310

3311
    """
3312
    return {
3313
      "OP_TARGET": self.cfg.GetClusterName(),
3314
      "NEW_VG_NAME": self.op.vg_name,
3315
      }
3316

    
3317
  def BuildHooksNodes(self):
3318
    """Build hooks nodes.
3319

3320
    """
3321
    mn = self.cfg.GetMasterNode()
3322
    return ([mn], [mn])
3323

    
3324
  def CheckPrereq(self):
3325
    """Check prerequisites.
3326

3327
    This checks whether the given params don't conflict and
3328
    if the given volume group is valid.
3329

3330
    """
3331
    if self.op.vg_name is not None and not self.op.vg_name:
3332
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3333
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3334
                                   " instances exist", errors.ECODE_INVAL)
3335

    
3336
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3337
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3338
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3339
                                   " drbd-based instances exist",
3340
                                   errors.ECODE_INVAL)
3341

    
3342
    node_list = self.owned_locks(locking.LEVEL_NODE)
3343

    
3344
    # if vg_name not None, checks given volume group on all nodes
3345
    if self.op.vg_name:
3346
      vglist = self.rpc.call_vg_list(node_list)
3347
      for node in node_list:
3348
        msg = vglist[node].fail_msg
3349
        if msg:
3350
          # ignoring down node
3351
          self.LogWarning("Error while gathering data on node %s"
3352
                          " (ignoring node): %s", node, msg)
3353
          continue
3354
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3355
                                              self.op.vg_name,
3356
                                              constants.MIN_VG_SIZE)
3357
        if vgstatus:
3358
          raise errors.OpPrereqError("Error on node '%s': %s" %
3359
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3360

    
3361
    if self.op.drbd_helper:
3362
      # checks given drbd helper on all nodes
3363
      helpers = self.rpc.call_drbd_helper(node_list)
3364
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3365
        if ninfo.offline:
3366
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3367
          continue
3368
        msg = helpers[node].fail_msg
3369
        if msg:
3370
          raise errors.OpPrereqError("Error checking drbd helper on node"
3371
                                     " '%s': %s" % (node, msg),
3372
                                     errors.ECODE_ENVIRON)
3373
        node_helper = helpers[node].payload
3374
        if node_helper != self.op.drbd_helper:
3375
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3376
                                     (node, node_helper), errors.ECODE_ENVIRON)
3377

    
3378
    self.cluster = cluster = self.cfg.GetClusterInfo()
3379
    # validate params changes
3380
    if self.op.beparams:
3381
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3382
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3383

    
3384
    if self.op.ndparams:
3385
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3386
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3387

    
3388
      # TODO: we need a more general way to handle resetting
3389
      # cluster-level parameters to default values
3390
      if self.new_ndparams["oob_program"] == "":
3391
        self.new_ndparams["oob_program"] = \
3392
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3393

    
3394
    if self.op.nicparams:
3395
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3396
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3397
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3398
      nic_errors = []
3399

    
3400
      # check all instances for consistency
3401
      for instance in self.cfg.GetAllInstancesInfo().values():
3402
        for nic_idx, nic in enumerate(instance.nics):
3403
          params_copy = copy.deepcopy(nic.nicparams)
3404
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3405

    
3406
          # check parameter syntax
3407
          try:
3408
            objects.NIC.CheckParameterSyntax(params_filled)
3409
          except errors.ConfigurationError, err:
3410
            nic_errors.append("Instance %s, nic/%d: %s" %
3411
                              (instance.name, nic_idx, err))
3412

    
3413
          # if we're moving instances to routed, check that they have an ip
3414
          target_mode = params_filled[constants.NIC_MODE]
3415
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3416
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3417
                              " address" % (instance.name, nic_idx))
3418
      if nic_errors:
3419
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3420
                                   "\n".join(nic_errors))
3421

    
3422
    # hypervisor list/parameters
3423
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3424
    if self.op.hvparams:
3425
      for hv_name, hv_dict in self.op.hvparams.items():
3426
        if hv_name not in self.new_hvparams:
3427
          self.new_hvparams[hv_name] = hv_dict
3428
        else:
3429
          self.new_hvparams[hv_name].update(hv_dict)
3430

    
3431
    # os hypervisor parameters
3432
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3433
    if self.op.os_hvp:
3434
      for os_name, hvs in self.op.os_hvp.items():
3435
        if os_name not in self.new_os_hvp:
3436
          self.new_os_hvp[os_name] = hvs
3437
        else:
3438
          for hv_name, hv_dict in hvs.items():
3439
            if hv_name not in self.new_os_hvp[os_name]:
3440
              self.new_os_hvp[os_name][hv_name] = hv_dict
3441
            else:
3442
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3443

    
3444
    # os parameters
3445
    self.new_osp = objects.FillDict(cluster.osparams, {})
3446
    if self.op.osparams:
3447
      for os_name, osp in self.op.osparams.items():
3448
        if os_name not in self.new_osp:
3449
          self.new_osp[os_name] = {}
3450

    
3451
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3452
                                                  use_none=True)
3453

    
3454
        if not self.new_osp[os_name]:
3455
          # we removed all parameters
3456
          del self.new_osp[os_name]
3457
        else:
3458
          # check the parameter validity (remote check)
3459
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3460
                         os_name, self.new_osp[os_name])
3461

    
3462
    # changes to the hypervisor list
3463
    if self.op.enabled_hypervisors is not None:
3464
      self.hv_list = self.op.enabled_hypervisors
3465
      for hv in self.hv_list:
3466
        # if the hypervisor doesn't already exist in the cluster
3467
        # hvparams, we initialize it to empty, and then (in both
3468
        # cases) we make sure to fill the defaults, as we might not
3469
        # have a complete defaults list if the hypervisor wasn't
3470
        # enabled before
3471
        if hv not in new_hvp:
3472
          new_hvp[hv] = {}
3473
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3474
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3475
    else:
3476
      self.hv_list = cluster.enabled_hypervisors
3477

    
3478
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3479
      # either the enabled list has changed, or the parameters have, validate
3480
      for hv_name, hv_params in self.new_hvparams.items():
3481
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3482
            (self.op.enabled_hypervisors and
3483
             hv_name in self.op.enabled_hypervisors)):
3484
          # either this is a new hypervisor, or its parameters have changed
3485
          hv_class = hypervisor.GetHypervisor(hv_name)
3486
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3487
          hv_class.CheckParameterSyntax(hv_params)
3488
          _CheckHVParams(self, node_list, hv_name, hv_params)
3489

    
3490
    if self.op.os_hvp:
3491
      # no need to check any newly-enabled hypervisors, since the
3492
      # defaults have already been checked in the above code-block
3493
      for os_name, os_hvp in self.new_os_hvp.items():
3494
        for hv_name, hv_params in os_hvp.items():
3495
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3496
          # we need to fill in the new os_hvp on top of the actual hv_p
3497
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3498
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3499
          hv_class = hypervisor.GetHypervisor(hv_name)
3500
          hv_class.CheckParameterSyntax(new_osp)
3501
          _CheckHVParams(self, node_list, hv_name, new_osp)
3502

    
3503
    if self.op.default_iallocator:
3504
      alloc_script = utils.FindFile(self.op.default_iallocator,
3505
                                    constants.IALLOCATOR_SEARCH_PATH,
3506
                                    os.path.isfile)
3507
      if alloc_script is None:
3508
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3509
                                   " specified" % self.op.default_iallocator,
3510
                                   errors.ECODE_INVAL)
3511

    
3512
  def Exec(self, feedback_fn):
3513
    """Change the parameters of the cluster.
3514

3515
    """
3516
    if self.op.vg_name is not None:
3517
      new_volume = self.op.vg_name
3518
      if not new_volume:
3519
        new_volume = None
3520
      if new_volume != self.cfg.GetVGName():
3521
        self.cfg.SetVGName(new_volume)
3522
      else:
3523
        feedback_fn("Cluster LVM configuration already in desired"
3524
                    " state, not changing")
3525
    if self.op.drbd_helper is not None:
3526
      new_helper = self.op.drbd_helper
3527
      if not new_helper:
3528
        new_helper = None
3529
      if new_helper != self.cfg.GetDRBDHelper():
3530
        self.cfg.SetDRBDHelper(new_helper)
3531
      else:
3532
        feedback_fn("Cluster DRBD helper already in desired state,"
3533
                    " not changing")
3534
    if self.op.hvparams:
3535
      self.cluster.hvparams = self.new_hvparams
3536
    if self.op.os_hvp:
3537
      self.cluster.os_hvp = self.new_os_hvp
3538
    if self.op.enabled_hypervisors is not None:
3539
      self.cluster.hvparams = self.new_hvparams
3540
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3541
    if self.op.beparams:
3542
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3543
    if self.op.nicparams:
3544
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3545
    if self.op.osparams:
3546
      self.cluster.osparams = self.new_osp
3547
    if self.op.ndparams:
3548
      self.cluster.ndparams = self.new_ndparams
3549

    
3550
    if self.op.candidate_pool_size is not None:
3551
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3552
      # we need to update the pool size here, otherwise the save will fail
3553
      _AdjustCandidatePool(self, [])
3554

    
3555
    if self.op.maintain_node_health is not None:
3556
      self.cluster.maintain_node_health = self.op.maintain_node_health
3557

    
3558
    if self.op.prealloc_wipe_disks is not None:
3559
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3560

    
3561
    if self.op.add_uids is not None:
3562
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3563

    
3564
    if self.op.remove_uids is not None:
3565
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3566

    
3567
    if self.op.uid_pool is not None:
3568
      self.cluster.uid_pool = self.op.uid_pool
3569

    
3570
    if self.op.default_iallocator is not None:
3571
      self.cluster.default_iallocator = self.op.default_iallocator
3572

    
3573
    if self.op.reserved_lvs is not None:
3574
      self.cluster.reserved_lvs = self.op.reserved_lvs
3575

    
3576
    def helper_os(aname, mods, desc):
3577
      desc += " OS list"
3578
      lst = getattr(self.cluster, aname)
3579
      for key, val in mods:
3580
        if key == constants.DDM_ADD:
3581
          if val in lst:
3582
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3583
          else:
3584
            lst.append(val)
3585
        elif key == constants.DDM_REMOVE:
3586
          if val in lst:
3587
            lst.remove(val)
3588
          else:
3589
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3590
        else:
3591
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3592

    
3593
    if self.op.hidden_os:
3594
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3595

    
3596
    if self.op.blacklisted_os:
3597
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3598

    
3599
    if self.op.master_netdev:
3600
      master = self.cfg.GetMasterNode()
3601
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3602
                  self.cluster.master_netdev)
3603
      result = self.rpc.call_node_stop_master(master, False)
3604
      result.Raise("Could not disable the master ip")
3605
      feedback_fn("Changing master_netdev from %s to %s" %
3606
                  (self.cluster.master_netdev, self.op.master_netdev))
3607
      self.cluster.master_netdev = self.op.master_netdev
3608

    
3609
    self.cfg.Update(self.cluster, feedback_fn)
3610

    
3611
    if self.op.master_netdev:
3612
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3613
                  self.op.master_netdev)
3614
      result = self.rpc.call_node_start_master(master, False, False)
3615
      if result.fail_msg:
3616
        self.LogWarning("Could not re-enable the master ip on"
3617
                        " the master, please restart manually: %s",
3618
                        result.fail_msg)
3619

    
3620

    
3621
def _UploadHelper(lu, nodes, fname):
3622
  """Helper for uploading a file and showing warnings.
3623

3624
  """
3625
  if os.path.exists(fname):
3626
    result = lu.rpc.call_upload_file(nodes, fname)
3627
    for to_node, to_result in result.items():
3628
      msg = to_result.fail_msg
3629
      if msg:
3630
        msg = ("Copy of file %s to node %s failed: %s" %
3631
               (fname, to_node, msg))
3632
        lu.proc.LogWarning(msg)
3633

    
3634

    
3635
def _ComputeAncillaryFiles(cluster, redist):
3636
  """Compute files external to Ganeti which need to be consistent.
3637

3638
  @type redist: boolean
3639
  @param redist: Whether to include files which need to be redistributed
3640

3641
  """
3642
  # Compute files for all nodes
3643
  files_all = set([
3644
    constants.SSH_KNOWN_HOSTS_FILE,
3645
    constants.CONFD_HMAC_KEY,
3646
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3647
    ])
3648

    
3649
  if not redist:
3650
    files_all.update(constants.ALL_CERT_FILES)
3651
    files_all.update(ssconf.SimpleStore().GetFileList())
3652

    
3653
  if cluster.modify_etc_hosts:
3654
    files_all.add(constants.ETC_HOSTS)
3655

    
3656
  # Files which must either exist on all nodes or on none
3657
  files_all_opt = set([
3658
    constants.RAPI_USERS_FILE,
3659
    ])
3660

    
3661
  # Files which should only be on master candidates
3662
  files_mc = set()
3663
  if not redist:
3664
    files_mc.add(constants.CLUSTER_CONF_FILE)
3665

    
3666
  # Files which should only be on VM-capable nodes
3667
  files_vm = set(filename
3668
    for hv_name in cluster.enabled_hypervisors
3669
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3670

    
3671
  # Filenames must be unique
3672
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3673
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3674
         "Found file listed in more than one file list"
3675

    
3676
  return (files_all, files_all_opt, files_mc, files_vm)
3677

    
3678

    
3679
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3680
  """Distribute additional files which are part of the cluster configuration.
3681

3682
  ConfigWriter takes care of distributing the config and ssconf files, but
3683
  there are more files which should be distributed to all nodes. This function
3684
  makes sure those are copied.
3685

3686
  @param lu: calling logical unit
3687
  @param additional_nodes: list of nodes not in the config to distribute to
3688
  @type additional_vm: boolean
3689
  @param additional_vm: whether the additional nodes are vm-capable or not
3690

3691
  """
3692
  # Gather target nodes
3693
  cluster = lu.cfg.GetClusterInfo()
3694
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3695

    
3696
  online_nodes = lu.cfg.GetOnlineNodeList()
3697
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3698

    
3699
  if additional_nodes is not None:
3700
    online_nodes.extend(additional_nodes)
3701
    if additional_vm:
3702
      vm_nodes.extend(additional_nodes)
3703

    
3704
  # Never distribute to master node
3705
  for nodelist in [online_nodes, vm_nodes]:
3706
    if master_info.name in nodelist:
3707
      nodelist.remove(master_info.name)
3708

    
3709
  # Gather file lists
3710
  (files_all, files_all_opt, files_mc, files_vm) = \
3711
    _ComputeAncillaryFiles(cluster, True)
3712

    
3713
  # Never re-distribute configuration file from here
3714
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3715
              constants.CLUSTER_CONF_FILE in files_vm)
3716
  assert not files_mc, "Master candidates not handled in this function"
3717

    
3718
  filemap = [
3719
    (online_nodes, files_all),
3720
    (online_nodes, files_all_opt),
3721
    (vm_nodes, files_vm),
3722
    ]
3723

    
3724
  # Upload the files
3725
  for (node_list, files) in filemap:
3726
    for fname in files:
3727
      _UploadHelper(lu, node_list, fname)
3728

    
3729

    
3730
class LUClusterRedistConf(NoHooksLU):
3731
  """Force the redistribution of cluster configuration.
3732

3733
  This is a very simple LU.
3734

3735
  """
3736
  REQ_BGL = False
3737

    
3738
  def ExpandNames(self):
3739
    self.needed_locks = {
3740
      locking.LEVEL_NODE: locking.ALL_SET,
3741
    }
3742
    self.share_locks[locking.LEVEL_NODE] = 1
3743

    
3744
  def Exec(self, feedback_fn):
3745
    """Redistribute the configuration.
3746

3747
    """
3748
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3749
    _RedistributeAncillaryFiles(self)
3750

    
3751

    
3752
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3753
  """Sleep and poll for an instance's disk to sync.
3754

3755
  """
3756
  if not instance.disks or disks is not None and not disks:
3757
    return True
3758

    
3759
  disks = _ExpandCheckDisks(instance, disks)
3760

    
3761
  if not oneshot:
3762
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3763

    
3764
  node = instance.primary_node
3765

    
3766
  for dev in disks:
3767
    lu.cfg.SetDiskID(dev, node)
3768

    
3769
  # TODO: Convert to utils.Retry
3770

    
3771
  retries = 0
3772
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3773
  while True:
3774
    max_time = 0
3775
    done = True
3776
    cumul_degraded = False
3777
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3778
    msg = rstats.fail_msg
3779
    if msg:
3780
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3781
      retries += 1
3782
      if retries >= 10:
3783
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3784
                                 " aborting." % node)
3785
      time.sleep(6)
3786
      continue
3787
    rstats = rstats.payload
3788
    retries = 0
3789
    for i, mstat in enumerate(rstats):
3790
      if mstat is None:
3791
        lu.LogWarning("Can't compute data for node %s/%s",
3792
                           node, disks[i].iv_name)
3793
        continue
3794

    
3795
      cumul_degraded = (cumul_degraded or
3796
                        (mstat.is_degraded and mstat.sync_percent is None))
3797
      if mstat.sync_percent is not None:
3798
        done = False
3799
        if mstat.estimated_time is not None:
3800
          rem_time = ("%s remaining (estimated)" %
3801
                      utils.FormatSeconds(mstat.estimated_time))
3802
          max_time = mstat.estimated_time
3803
        else:
3804
          rem_time = "no time estimate"
3805
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3806
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3807

    
3808
    # if we're done but degraded, let's do a few small retries, to
3809
    # make sure we see a stable and not transient situation; therefore
3810
    # we force restart of the loop
3811
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3812
      logging.info("Degraded disks found, %d retries left", degr_retries)
3813
      degr_retries -= 1
3814
      time.sleep(1)
3815
      continue
3816

    
3817
    if done or oneshot:
3818
      break
3819

    
3820
    time.sleep(min(60, max_time))
3821

    
3822
  if done:
3823
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3824
  return not cumul_degraded
3825

    
3826

    
3827
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3828
  """Check that mirrors are not degraded.
3829

3830
  The ldisk parameter, if True, will change the test from the
3831
  is_degraded attribute (which represents overall non-ok status for
3832
  the device(s)) to the ldisk (representing the local storage status).
3833

3834
  """
3835
  lu.cfg.SetDiskID(dev, node)
3836

    
3837
  result = True
3838

    
3839
  if on_primary or dev.AssembleOnSecondary():
3840
    rstats = lu.rpc.call_blockdev_find(node, dev)
3841
    msg = rstats.fail_msg
3842
    if msg:
3843
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3844
      result = False
3845
    elif not rstats.payload:
3846
      lu.LogWarning("Can't find disk on node %s", node)
3847
      result = False
3848
    else:
3849
      if ldisk:
3850
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3851
      else:
3852
        result = result and not rstats.payload.is_degraded
3853

    
3854
  if dev.children:
3855
    for child in dev.children:
3856
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3857

    
3858
  return result
3859

    
3860

    
3861
class LUOobCommand(NoHooksLU):
3862
  """Logical unit for OOB handling.
3863

3864
  """
3865
  REG_BGL = False
3866
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3867

    
3868
  def ExpandNames(self):
3869
    """Gather locks we need.
3870

3871
    """
3872
    if self.op.node_names:
3873
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3874
      lock_names = self.op.node_names
3875
    else:
3876
      lock_names = locking.ALL_SET
3877

    
3878
    self.needed_locks = {
3879
      locking.LEVEL_NODE: lock_names,
3880
      }
3881

    
3882
  def CheckPrereq(self):
3883
    """Check prerequisites.
3884

3885
    This checks:
3886
     - the node exists in the configuration
3887
     - OOB is supported
3888

3889
    Any errors are signaled by raising errors.OpPrereqError.
3890

3891
    """
3892
    self.nodes = []
3893
    self.master_node = self.cfg.GetMasterNode()
3894

    
3895
    assert self.op.power_delay >= 0.0
3896

    
3897
    if self.op.node_names:
3898
      if (self.op.command in self._SKIP_MASTER and
3899
          self.master_node in self.op.node_names):
3900
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3901
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3902

    
3903
        if master_oob_handler:
3904
          additional_text = ("run '%s %s %s' if you want to operate on the"
3905
                             " master regardless") % (master_oob_handler,
3906
                                                      self.op.command,
3907
                                                      self.master_node)
3908
        else:
3909
          additional_text = "it does not support out-of-band operations"
3910

    
3911
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3912
                                    " allowed for %s; %s") %
3913
                                   (self.master_node, self.op.command,
3914
                                    additional_text), errors.ECODE_INVAL)
3915
    else:
3916
      self.op.node_names = self.cfg.GetNodeList()
3917
      if self.op.command in self._SKIP_MASTER:
3918
        self.op.node_names.remove(self.master_node)
3919

    
3920
    if self.op.command in self._SKIP_MASTER:
3921
      assert self.master_node not in self.op.node_names
3922

    
3923
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3924
      if node is None:
3925
        raise errors.OpPrereqError("Node %s not found" % node_name,
3926
                                   errors.ECODE_NOENT)
3927
      else:
3928
        self.nodes.append(node)
3929

    
3930
      if (not self.op.ignore_status and
3931
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3932
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3933
                                    " not marked offline") % node_name,
3934
                                   errors.ECODE_STATE)
3935

    
3936
  def Exec(self, feedback_fn):
3937
    """Execute OOB and return result if we expect any.
3938

3939
    """
3940
    master_node = self.master_node
3941
    ret = []
3942

    
3943
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3944
                                              key=lambda node: node.name)):
3945
      node_entry = [(constants.RS_NORMAL, node.name)]
3946
      ret.append(node_entry)
3947

    
3948
      oob_program = _SupportsOob(self.cfg, node)
3949

    
3950
      if not oob_program:
3951
        node_entry.append((constants.RS_UNAVAIL, None))
3952
        continue
3953

    
3954
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3955
                   self.op.command, oob_program, node.name)
3956
      result = self.rpc.call_run_oob(master_node, oob_program,
3957
                                     self.op.command, node.name,
3958
                                     self.op.timeout)
3959

    
3960
      if result.fail_msg:
3961
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3962
                        node.name, result.fail_msg)
3963
        node_entry.append((constants.RS_NODATA, None))
3964
      else:
3965
        try:
3966
          self._CheckPayload(result)
3967
        except errors.OpExecError, err:
3968
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
3969
                          node.name, err)
3970
          node_entry.append((constants.RS_NODATA, None))
3971
        else:
3972
          if self.op.command == constants.OOB_HEALTH:
3973
            # For health we should log important events
3974
            for item, status in result.payload:
3975
              if status in [constants.OOB_STATUS_WARNING,
3976
                            constants.OOB_STATUS_CRITICAL]:
3977
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
3978
                                item, node.name, status)
3979

    
3980
          if self.op.command == constants.OOB_POWER_ON:
3981
            node.powered = True
3982
          elif self.op.command == constants.OOB_POWER_OFF:
3983
            node.powered = False
3984
          elif self.op.command == constants.OOB_POWER_STATUS:
3985
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3986
            if powered != node.powered:
3987
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3988
                               " match actual power state (%s)"), node.powered,
3989
                              node.name, powered)
3990

    
3991
          # For configuration changing commands we should update the node
3992
          if self.op.command in (constants.OOB_POWER_ON,
3993
                                 constants.OOB_POWER_OFF):
3994
            self.cfg.Update(node, feedback_fn)
3995

    
3996
          node_entry.append((constants.RS_NORMAL, result.payload))
3997

    
3998
          if (self.op.command == constants.OOB_POWER_ON and
3999
              idx < len(self.nodes) - 1):
4000
            time.sleep(self.op.power_delay)
4001

    
4002
    return ret
4003

    
4004
  def _CheckPayload(self, result):
4005
    """Checks if the payload is valid.
4006

4007
    @param result: RPC result
4008
    @raises errors.OpExecError: If payload is not valid
4009

4010
    """
4011
    errs = []
4012
    if self.op.command == constants.OOB_HEALTH:
4013
      if not isinstance(result.payload, list):
4014
        errs.append("command 'health' is expected to return a list but got %s" %
4015
                    type(result.payload))
4016
      else:
4017
        for item, status in result.payload:
4018
          if status not in constants.OOB_STATUSES:
4019
            errs.append("health item '%s' has invalid status '%s'" %
4020
                        (item, status))
4021

    
4022
    if self.op.command == constants.OOB_POWER_STATUS:
4023
      if not isinstance(result.payload, dict):
4024
        errs.append("power-status is expected to return a dict but got %s" %
4025
                    type(result.payload))
4026

    
4027
    if self.op.command in [
4028
        constants.OOB_POWER_ON,
4029
        constants.OOB_POWER_OFF,
4030
        constants.OOB_POWER_CYCLE,
4031
        ]:
4032
      if result.payload is not None:
4033
        errs.append("%s is expected to not return payload but got '%s'" %
4034
                    (self.op.command, result.payload))
4035

    
4036
    if errs:
4037
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4038
                               utils.CommaJoin(errs))
4039

    
4040
class _OsQuery(_QueryBase):
4041
  FIELDS = query.OS_FIELDS
4042

    
4043
  def ExpandNames(self, lu):
4044
    # Lock all nodes in shared mode
4045
    # Temporary removal of locks, should be reverted later
4046
    # TODO: reintroduce locks when they are lighter-weight
4047
    lu.needed_locks = {}
4048
    #self.share_locks[locking.LEVEL_NODE] = 1
4049
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4050

    
4051
    # The following variables interact with _QueryBase._GetNames
4052
    if self.names:
4053
      self.wanted = self.names
4054
    else:
4055
      self.wanted = locking.ALL_SET
4056

    
4057
    self.do_locking = self.use_locking
4058

    
4059
  def DeclareLocks(self, lu, level):
4060
    pass
4061

    
4062
  @staticmethod
4063
  def _DiagnoseByOS(rlist):
4064
    """Remaps a per-node return list into an a per-os per-node dictionary
4065

4066
    @param rlist: a map with node names as keys and OS objects as values
4067

4068
    @rtype: dict
4069
    @return: a dictionary with osnames as keys and as value another
4070
        map, with nodes as keys and tuples of (path, status, diagnose,
4071
        variants, parameters, api_versions) as values, eg::
4072

4073
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4074
                                     (/srv/..., False, "invalid api")],
4075
                           "node2": [(/srv/..., True, "", [], [])]}
4076
          }
4077

4078
    """
4079
    all_os = {}
4080
    # we build here the list of nodes that didn't fail the RPC (at RPC
4081
    # level), so that nodes with a non-responding node daemon don't
4082
    # make all OSes invalid
4083
    good_nodes = [node_name for node_name in rlist
4084
                  if not rlist[node_name].fail_msg]
4085
    for node_name, nr in rlist.items():
4086
      if nr.fail_msg or not nr.payload:
4087
        continue
4088
      for (name, path, status, diagnose, variants,
4089
           params, api_versions) in nr.payload:
4090
        if name not in all_os:
4091
          # build a list of nodes for this os containing empty lists
4092
          # for each node in node_list
4093
          all_os[name] = {}
4094
          for nname in good_nodes:
4095
            all_os[name][nname] = []
4096
        # convert params from [name, help] to (name, help)
4097
        params = [tuple(v) for v in params]
4098
        all_os[name][node_name].append((path, status, diagnose,
4099
                                        variants, params, api_versions))
4100
    return all_os
4101

    
4102
  def _GetQueryData(self, lu):
4103
    """Computes the list of nodes and their attributes.
4104

4105
    """
4106
    # Locking is not used
4107
    assert not (compat.any(lu.glm.is_owned(level)
4108
                           for level in locking.LEVELS
4109
                           if level != locking.LEVEL_CLUSTER) or
4110
                self.do_locking or self.use_locking)
4111

    
4112
    valid_nodes = [node.name
4113
                   for node in lu.cfg.GetAllNodesInfo().values()
4114
                   if not node.offline and node.vm_capable]
4115
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4116
    cluster = lu.cfg.GetClusterInfo()
4117

    
4118
    data = {}
4119

    
4120
    for (os_name, os_data) in pol.items():
4121
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4122
                          hidden=(os_name in cluster.hidden_os),
4123
                          blacklisted=(os_name in cluster.blacklisted_os))
4124

    
4125
      variants = set()
4126
      parameters = set()
4127
      api_versions = set()
4128

    
4129
      for idx, osl in enumerate(os_data.values()):
4130
        info.valid = bool(info.valid and osl and osl[0][1])
4131
        if not info.valid:
4132
          break
4133

    
4134
        (node_variants, node_params, node_api) = osl[0][3:6]
4135
        if idx == 0:
4136
          # First entry
4137
          variants.update(node_variants)
4138
          parameters.update(node_params)
4139
          api_versions.update(node_api)
4140
        else:
4141
          # Filter out inconsistent values
4142
          variants.intersection_update(node_variants)
4143
          parameters.intersection_update(node_params)
4144
          api_versions.intersection_update(node_api)
4145

    
4146
      info.variants = list(variants)
4147
      info.parameters = list(parameters)
4148
      info.api_versions = list(api_versions)
4149

    
4150
      data[os_name] = info
4151

    
4152
    # Prepare data in requested order
4153
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4154
            if name in data]
4155

    
4156

    
4157
class LUOsDiagnose(NoHooksLU):
4158
  """Logical unit for OS diagnose/query.
4159

4160
  """
4161
  REQ_BGL = False
4162

    
4163
  @staticmethod
4164
  def _BuildFilter(fields, names):
4165
    """Builds a filter for querying OSes.
4166

4167
    """
4168
    name_filter = qlang.MakeSimpleFilter("name", names)
4169

    
4170
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4171
    # respective field is not requested
4172
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4173
                     for fname in ["hidden", "blacklisted"]
4174
                     if fname not in fields]
4175
    if "valid" not in fields:
4176
      status_filter.append([qlang.OP_TRUE, "valid"])
4177

    
4178
    if status_filter:
4179
      status_filter.insert(0, qlang.OP_AND)
4180
    else:
4181
      status_filter = None
4182

    
4183
    if name_filter and status_filter:
4184
      return [qlang.OP_AND, name_filter, status_filter]
4185
    elif name_filter:
4186
      return name_filter
4187
    else:
4188
      return status_filter
4189

    
4190
  def CheckArguments(self):
4191
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4192
                       self.op.output_fields, False)
4193

    
4194
  def ExpandNames(self):
4195
    self.oq.ExpandNames(self)
4196

    
4197
  def Exec(self, feedback_fn):
4198
    return self.oq.OldStyleQuery(self)
4199

    
4200

    
4201
class LUNodeRemove(LogicalUnit):
4202
  """Logical unit for removing a node.
4203

4204
  """
4205
  HPATH = "node-remove"
4206
  HTYPE = constants.HTYPE_NODE
4207

    
4208
  def BuildHooksEnv(self):
4209
    """Build hooks env.
4210

4211
    This doesn't run on the target node in the pre phase as a failed
4212
    node would then be impossible to remove.
4213

4214
    """
4215
    return {
4216
      "OP_TARGET": self.op.node_name,
4217
      "NODE_NAME": self.op.node_name,
4218
      }
4219

    
4220
  def BuildHooksNodes(self):
4221
    """Build hooks nodes.
4222

4223
    """
4224
    all_nodes = self.cfg.GetNodeList()
4225
    try:
4226
      all_nodes.remove(self.op.node_name)
4227
    except ValueError:
4228
      logging.warning("Node '%s', which is about to be removed, was not found"
4229
                      " in the list of all nodes", self.op.node_name)
4230
    return (all_nodes, all_nodes)
4231

    
4232
  def CheckPrereq(self):
4233
    """Check prerequisites.
4234

4235
    This checks:
4236
     - the node exists in the configuration
4237
     - it does not have primary or secondary instances
4238
     - it's not the master
4239

4240
    Any errors are signaled by raising errors.OpPrereqError.
4241

4242
    """
4243
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4244
    node = self.cfg.GetNodeInfo(self.op.node_name)
4245
    assert node is not None
4246

    
4247
    masternode = self.cfg.GetMasterNode()
4248
    if node.name == masternode:
4249
      raise errors.OpPrereqError("Node is the master node, failover to another"
4250
                                 " node is required", errors.ECODE_INVAL)
4251

    
4252
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4253
      if node.name in instance.all_nodes:
4254
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4255
                                   " please remove first" % instance_name,
4256
                                   errors.ECODE_INVAL)
4257
    self.op.node_name = node.name
4258
    self.node = node
4259

    
4260
  def Exec(self, feedback_fn):
4261
    """Removes the node from the cluster.
4262

4263
    """
4264
    node = self.node
4265
    logging.info("Stopping the node daemon and removing configs from node %s",
4266
                 node.name)
4267

    
4268
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4269

    
4270
    # Promote nodes to master candidate as needed
4271
    _AdjustCandidatePool(self, exceptions=[node.name])
4272
    self.context.RemoveNode(node.name)
4273

    
4274
    # Run post hooks on the node before it's removed
4275
    _RunPostHook(self, node.name)
4276

    
4277
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4278
    msg = result.fail_msg
4279
    if msg:
4280
      self.LogWarning("Errors encountered on the remote node while leaving"
4281
                      " the cluster: %s", msg)
4282

    
4283
    # Remove node from our /etc/hosts
4284
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4285
      master_node = self.cfg.GetMasterNode()
4286
      result = self.rpc.call_etc_hosts_modify(master_node,
4287
                                              constants.ETC_HOSTS_REMOVE,
4288
                                              node.name, None)
4289
      result.Raise("Can't update hosts file with new host data")
4290
      _RedistributeAncillaryFiles(self)
4291

    
4292

    
4293
class _NodeQuery(_QueryBase):
4294
  FIELDS = query.NODE_FIELDS
4295

    
4296
  def ExpandNames(self, lu):
4297
    lu.needed_locks = {}
4298
    lu.share_locks[locking.LEVEL_NODE] = 1
4299

    
4300
    if self.names:
4301
      self.wanted = _GetWantedNodes(lu, self.names)
4302
    else:
4303
      self.wanted = locking.ALL_SET
4304

    
4305
    self.do_locking = (self.use_locking and
4306
                       query.NQ_LIVE in self.requested_data)
4307

    
4308
    if self.do_locking:
4309
      # if we don't request only static fields, we need to lock the nodes
4310
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4311

    
4312
  def DeclareLocks(self, lu, level):
4313
    pass
4314

    
4315
  def _GetQueryData(self, lu):
4316
    """Computes the list of nodes and their attributes.
4317

4318
    """
4319
    all_info = lu.cfg.GetAllNodesInfo()
4320

    
4321
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4322

    
4323
    # Gather data as requested
4324
    if query.NQ_LIVE in self.requested_data:
4325
      # filter out non-vm_capable nodes
4326
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4327

    
4328
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4329
                                        lu.cfg.GetHypervisorType())
4330
      live_data = dict((name, nresult.payload)
4331
                       for (name, nresult) in node_data.items()
4332
                       if not nresult.fail_msg and nresult.payload)
4333
    else:
4334
      live_data = None
4335

    
4336
    if query.NQ_INST in self.requested_data:
4337
      node_to_primary = dict([(name, set()) for name in nodenames])
4338
      node_to_secondary = dict([(name, set()) for name in nodenames])
4339

    
4340
      inst_data = lu.cfg.GetAllInstancesInfo()
4341

    
4342
      for inst in inst_data.values():
4343
        if inst.primary_node in node_to_primary:
4344
          node_to_primary[inst.primary_node].add(inst.name)
4345
        for secnode in inst.secondary_nodes:
4346
          if secnode in node_to_secondary:
4347
            node_to_secondary[secnode].add(inst.name)
4348
    else:
4349
      node_to_primary = None
4350
      node_to_secondary = None
4351

    
4352
    if query.NQ_OOB in self.requested_data:
4353
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4354
                         for name, node in all_info.iteritems())
4355
    else:
4356
      oob_support = None
4357

    
4358
    if query.NQ_GROUP in self.requested_data:
4359
      groups = lu.cfg.GetAllNodeGroupsInfo()
4360
    else:
4361
      groups = {}
4362

    
4363
    return query.NodeQueryData([all_info[name] for name in nodenames],
4364
                               live_data, lu.cfg.GetMasterNode(),
4365
                               node_to_primary, node_to_secondary, groups,
4366
                               oob_support, lu.cfg.GetClusterInfo())
4367

    
4368

    
4369
class LUNodeQuery(NoHooksLU):
4370
  """Logical unit for querying nodes.
4371

4372
  """
4373
  # pylint: disable-msg=W0142
4374
  REQ_BGL = False
4375

    
4376
  def CheckArguments(self):
4377
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4378
                         self.op.output_fields, self.op.use_locking)
4379

    
4380
  def ExpandNames(self):
4381
    self.nq.ExpandNames(self)
4382

    
4383
  def Exec(self, feedback_fn):
4384
    return self.nq.OldStyleQuery(self)
4385

    
4386

    
4387
class LUNodeQueryvols(NoHooksLU):
4388
  """Logical unit for getting volumes on node(s).
4389

4390
  """
4391
  REQ_BGL = False
4392
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4393
  _FIELDS_STATIC = utils.FieldSet("node")
4394

    
4395
  def CheckArguments(self):
4396
    _CheckOutputFields(static=self._FIELDS_STATIC,
4397
                       dynamic=self._FIELDS_DYNAMIC,
4398
                       selected=self.op.output_fields)
4399

    
4400
  def ExpandNames(self):
4401
    self.needed_locks = {}
4402
    self.share_locks[locking.LEVEL_NODE] = 1
4403
    if not self.op.nodes:
4404
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4405
    else:
4406
      self.needed_locks[locking.LEVEL_NODE] = \
4407
        _GetWantedNodes(self, self.op.nodes)
4408

    
4409
  def Exec(self, feedback_fn):
4410
    """Computes the list of nodes and their attributes.
4411

4412
    """
4413
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4414
    volumes = self.rpc.call_node_volumes(nodenames)
4415

    
4416
    ilist = self.cfg.GetAllInstancesInfo()
4417
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4418

    
4419
    output = []
4420
    for node in nodenames:
4421
      nresult = volumes[node]
4422
      if nresult.offline:
4423
        continue
4424
      msg = nresult.fail_msg
4425
      if msg:
4426
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4427
        continue
4428

    
4429
      node_vols = sorted(nresult.payload,
4430
                         key=operator.itemgetter("dev"))
4431

    
4432
      for vol in node_vols:
4433
        node_output = []
4434
        for field in self.op.output_fields:
4435
          if field == "node":
4436
            val = node
4437
          elif field == "phys":
4438
            val = vol["dev"]
4439
          elif field == "vg":
4440
            val = vol["vg"]
4441
          elif field == "name":
4442
            val = vol["name"]
4443
          elif field == "size":
4444
            val = int(float(vol["size"]))
4445
          elif field == "instance":
4446
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4447
          else:
4448
            raise errors.ParameterError(field)
4449
          node_output.append(str(val))
4450

    
4451
        output.append(node_output)
4452

    
4453
    return output
4454

    
4455

    
4456
class LUNodeQueryStorage(NoHooksLU):
4457
  """Logical unit for getting information on storage units on node(s).
4458

4459
  """
4460
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4461
  REQ_BGL = False
4462

    
4463
  def CheckArguments(self):
4464
    _CheckOutputFields(static=self._FIELDS_STATIC,
4465
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4466
                       selected=self.op.output_fields)
4467

    
4468
  def ExpandNames(self):
4469
    self.needed_locks = {}
4470
    self.share_locks[locking.LEVEL_NODE] = 1
4471

    
4472
    if self.op.nodes:
4473
      self.needed_locks[locking.LEVEL_NODE] = \
4474
        _GetWantedNodes(self, self.op.nodes)
4475
    else:
4476
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4477

    
4478
  def Exec(self, feedback_fn):
4479
    """Computes the list of nodes and their attributes.
4480

4481
    """
4482
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4483

    
4484
    # Always get name to sort by
4485
    if constants.SF_NAME in self.op.output_fields:
4486
      fields = self.op.output_fields[:]
4487
    else:
4488
      fields = [constants.SF_NAME] + self.op.output_fields
4489

    
4490
    # Never ask for node or type as it's only known to the LU
4491
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4492
      while extra in fields:
4493
        fields.remove(extra)
4494

    
4495
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4496
    name_idx = field_idx[constants.SF_NAME]
4497

    
4498
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4499
    data = self.rpc.call_storage_list(self.nodes,
4500
                                      self.op.storage_type, st_args,
4501
                                      self.op.name, fields)
4502

    
4503
    result = []
4504

    
4505
    for node in utils.NiceSort(self.nodes):
4506
      nresult = data[node]
4507
      if nresult.offline:
4508
        continue
4509

    
4510
      msg = nresult.fail_msg
4511
      if msg:
4512
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4513
        continue
4514

    
4515
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4516

    
4517
      for name in utils.NiceSort(rows.keys()):
4518
        row = rows[name]
4519

    
4520
        out = []
4521

    
4522
        for field in self.op.output_fields:
4523
          if field == constants.SF_NODE:
4524
            val = node
4525
          elif field == constants.SF_TYPE:
4526
            val = self.op.storage_type
4527
          elif field in field_idx:
4528
            val = row[field_idx[field]]
4529
          else:
4530
            raise errors.ParameterError(field)
4531

    
4532
          out.append(val)
4533

    
4534
        result.append(out)
4535

    
4536
    return result
4537

    
4538

    
4539
class _InstanceQuery(_QueryBase):
4540
  FIELDS = query.INSTANCE_FIELDS
4541

    
4542
  def ExpandNames(self, lu):
4543
    lu.needed_locks = {}
4544
    lu.share_locks = _ShareAll()
4545

    
4546
    if self.names:
4547
      self.wanted = _GetWantedInstances(lu, self.names)
4548
    else:
4549
      self.wanted = locking.ALL_SET
4550

    
4551
    self.do_locking = (self.use_locking and
4552
                       query.IQ_LIVE in self.requested_data)
4553
    if self.do_locking:
4554
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4555
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4556
      lu.needed_locks[locking.LEVEL_NODE] = []
4557
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4558

    
4559
    self.do_grouplocks = (self.do_locking and
4560
                          query.IQ_NODES in self.requested_data)
4561

    
4562
  def DeclareLocks(self, lu, level):
4563
    if self.do_locking:
4564
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4565
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4566

    
4567
        # Lock all groups used by instances optimistically; this requires going
4568
        # via the node before it's locked, requiring verification later on
4569
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4570
          set(group_uuid
4571
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4572
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4573
      elif level == locking.LEVEL_NODE:
4574
        lu._LockInstancesNodes() # pylint: disable-msg=W0212
4575

    
4576
  @staticmethod
4577
  def _CheckGroupLocks(lu):
4578
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4579
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4580

    
4581
    # Check if node groups for locked instances are still correct
4582
    for instance_name in owned_instances:
4583
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4584

    
4585
  def _GetQueryData(self, lu):
4586
    """Computes the list of instances and their attributes.
4587

4588
    """
4589
    if self.do_grouplocks:
4590
      self._CheckGroupLocks(lu)
4591

    
4592
    cluster = lu.cfg.GetClusterInfo()
4593
    all_info = lu.cfg.GetAllInstancesInfo()
4594

    
4595
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4596

    
4597
    instance_list = [all_info[name] for name in instance_names]
4598
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4599
                                        for inst in instance_list)))
4600
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4601
    bad_nodes = []
4602
    offline_nodes = []
4603
    wrongnode_inst = set()
4604

    
4605
    # Gather data as requested
4606
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4607
      live_data = {}
4608
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4609
      for name in nodes:
4610
        result = node_data[name]
4611
        if result.offline:
4612
          # offline nodes will be in both lists
4613
          assert result.fail_msg
4614
          offline_nodes.append(name)
4615
        if result.fail_msg:
4616
          bad_nodes.append(name)
4617
        elif result.payload:
4618
          for inst in result.payload:
4619
            if inst in all_info:
4620
              if all_info[inst].primary_node == name:
4621
                live_data.update(result.payload)
4622
              else:
4623
                wrongnode_inst.add(inst)
4624
            else:
4625
              # orphan instance; we don't list it here as we don't
4626
              # handle this case yet in the output of instance listing
4627
              logging.warning("Orphan instance '%s' found on node %s",
4628
                              inst, name)
4629
        # else no instance is alive
4630
    else:
4631
      live_data = {}
4632

    
4633
    if query.IQ_DISKUSAGE in self.requested_data:
4634
      disk_usage = dict((inst.name,
4635
                         _ComputeDiskSize(inst.disk_template,
4636
                                          [{constants.IDISK_SIZE: disk.size}
4637
                                           for disk in inst.disks]))
4638
                        for inst in instance_list)
4639
    else:
4640
      disk_usage = None
4641

    
4642
    if query.IQ_CONSOLE in self.requested_data:
4643
      consinfo = {}
4644
      for inst in instance_list:
4645
        if inst.name in live_data:
4646
          # Instance is running
4647
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4648
        else:
4649
          consinfo[inst.name] = None
4650
      assert set(consinfo.keys()) == set(instance_names)
4651
    else:
4652
      consinfo = None
4653

    
4654
    if query.IQ_NODES in self.requested_data:
4655
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4656
                                            instance_list)))
4657
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4658
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4659
                    for uuid in set(map(operator.attrgetter("group"),
4660
                                        nodes.values())))
4661
    else:
4662
      nodes = None
4663
      groups = None
4664

    
4665
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4666
                                   disk_usage, offline_nodes, bad_nodes,
4667
                                   live_data, wrongnode_inst, consinfo,
4668
                                   nodes, groups)
4669

    
4670

    
4671
class LUQuery(NoHooksLU):
4672
  """Query for resources/items of a certain kind.
4673

4674
  """
4675
  # pylint: disable-msg=W0142
4676
  REQ_BGL = False
4677

    
4678
  def CheckArguments(self):
4679
    qcls = _GetQueryImplementation(self.op.what)
4680

    
4681
    self.impl = qcls(self.op.filter, self.op.fields, False)
4682

    
4683
  def ExpandNames(self):
4684
    self.impl.ExpandNames(self)
4685

    
4686
  def DeclareLocks(self, level):
4687
    self.impl.DeclareLocks(self, level)
4688

    
4689
  def Exec(self, feedback_fn):
4690
    return self.impl.NewStyleQuery(self)
4691

    
4692

    
4693
class LUQueryFields(NoHooksLU):
4694
  """Query for resources/items of a certain kind.
4695

4696
  """
4697
  # pylint: disable-msg=W0142
4698
  REQ_BGL = False
4699

    
4700
  def CheckArguments(self):
4701
    self.qcls = _GetQueryImplementation(self.op.what)
4702

    
4703
  def ExpandNames(self):
4704
    self.needed_locks = {}
4705

    
4706
  def Exec(self, feedback_fn):
4707
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4708

    
4709

    
4710
class LUNodeModifyStorage(NoHooksLU):
4711
  """Logical unit for modifying a storage volume on a node.
4712

4713
  """
4714
  REQ_BGL = False
4715

    
4716
  def CheckArguments(self):
4717
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4718

    
4719
    storage_type = self.op.storage_type
4720

    
4721
    try:
4722
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4723
    except KeyError:
4724
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4725
                                 " modified" % storage_type,
4726
                                 errors.ECODE_INVAL)
4727

    
4728
    diff = set(self.op.changes.keys()) - modifiable
4729
    if diff:
4730
      raise errors.OpPrereqError("The following fields can not be modified for"
4731
                                 " storage units of type '%s': %r" %
4732
                                 (storage_type, list(diff)),
4733
                                 errors.ECODE_INVAL)
4734

    
4735
  def ExpandNames(self):
4736
    self.needed_locks = {
4737
      locking.LEVEL_NODE: self.op.node_name,
4738
      }
4739

    
4740
  def Exec(self, feedback_fn):
4741
    """Computes the list of nodes and their attributes.
4742

4743
    """
4744
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4745
    result = self.rpc.call_storage_modify(self.op.node_name,
4746
                                          self.op.storage_type, st_args,
4747
                                          self.op.name, self.op.changes)
4748
    result.Raise("Failed to modify storage unit '%s' on %s" %
4749
                 (self.op.name, self.op.node_name))
4750

    
4751

    
4752
class LUNodeAdd(LogicalUnit):
4753
  """Logical unit for adding node to the cluster.
4754

4755
  """
4756
  HPATH = "node-add"
4757
  HTYPE = constants.HTYPE_NODE
4758
  _NFLAGS = ["master_capable", "vm_capable"]
4759

    
4760
  def CheckArguments(self):
4761
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4762
    # validate/normalize the node name
4763
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4764
                                         family=self.primary_ip_family)
4765
    self.op.node_name = self.hostname.name
4766

    
4767
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4768
      raise errors.OpPrereqError("Cannot readd the master node",
4769
                                 errors.ECODE_STATE)
4770

    
4771
    if self.op.readd and self.op.group:
4772
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4773
                                 " being readded", errors.ECODE_INVAL)
4774

    
4775
  def BuildHooksEnv(self):
4776
    """Build hooks env.
4777

4778
    This will run on all nodes before, and on all nodes + the new node after.
4779

4780
    """
4781
    return {
4782
      "OP_TARGET": self.op.node_name,
4783
      "NODE_NAME": self.op.node_name,
4784
      "NODE_PIP": self.op.primary_ip,
4785
      "NODE_SIP": self.op.secondary_ip,
4786
      "MASTER_CAPABLE": str(self.op.master_capable),
4787
      "VM_CAPABLE": str(self.op.vm_capable),
4788
      }
4789

    
4790
  def BuildHooksNodes(self):
4791
    """Build hooks nodes.
4792

4793
    """
4794
    # Exclude added node
4795
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4796
    post_nodes = pre_nodes + [self.op.node_name, ]
4797

    
4798
    return (pre_nodes, post_nodes)
4799

    
4800
  def CheckPrereq(self):
4801
    """Check prerequisites.
4802

4803
    This checks:
4804
     - the new node is not already in the config
4805
     - it is resolvable
4806
     - its parameters (single/dual homed) matches the cluster
4807

4808
    Any errors are signaled by raising errors.OpPrereqError.
4809

4810
    """
4811
    cfg = self.cfg
4812
    hostname = self.hostname
4813
    node = hostname.name
4814
    primary_ip = self.op.primary_ip = hostname.ip
4815
    if self.op.secondary_ip is None:
4816
      if self.primary_ip_family == netutils.IP6Address.family:
4817
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4818
                                   " IPv4 address must be given as secondary",
4819
                                   errors.ECODE_INVAL)
4820
      self.op.secondary_ip = primary_ip
4821

    
4822
    secondary_ip = self.op.secondary_ip
4823
    if not netutils.IP4Address.IsValid(secondary_ip):
4824
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4825
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4826

    
4827
    node_list = cfg.GetNodeList()
4828
    if not self.op.readd and node in node_list:
4829
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4830
                                 node, errors.ECODE_EXISTS)
4831
    elif self.op.readd and node not in node_list:
4832
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4833
                                 errors.ECODE_NOENT)
4834

    
4835
    self.changed_primary_ip = False
4836

    
4837
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4838
      if self.op.readd and node == existing_node_name:
4839
        if existing_node.secondary_ip != secondary_ip:
4840
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4841
                                     " address configuration as before",
4842
                                     errors.ECODE_INVAL)
4843
        if existing_node.primary_ip != primary_ip:
4844
          self.changed_primary_ip = True
4845

    
4846
        continue
4847

    
4848
      if (existing_node.primary_ip == primary_ip or
4849
          existing_node.secondary_ip == primary_ip or
4850
          existing_node.primary_ip == secondary_ip or
4851
          existing_node.secondary_ip == secondary_ip):
4852
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4853
                                   " existing node %s" % existing_node.name,
4854
                                   errors.ECODE_NOTUNIQUE)
4855

    
4856
    # After this 'if' block, None is no longer a valid value for the
4857
    # _capable op attributes
4858
    if self.op.readd:
4859
      old_node = self.cfg.GetNodeInfo(node)
4860
      assert old_node is not None, "Can't retrieve locked node %s" % node
4861
      for attr in self._NFLAGS:
4862
        if getattr(self.op, attr) is None:
4863
          setattr(self.op, attr, getattr(old_node, attr))
4864
    else:
4865
      for attr in self._NFLAGS:
4866
        if getattr(self.op, attr) is None:
4867
          setattr(self.op, attr, True)
4868

    
4869
    if self.op.readd and not self.op.vm_capable:
4870
      pri, sec = cfg.GetNodeInstances(node)
4871
      if pri or sec:
4872
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4873
                                   " flag set to false, but it already holds"
4874
                                   " instances" % node,
4875
                                   errors.ECODE_STATE)
4876

    
4877
    # check that the type of the node (single versus dual homed) is the
4878
    # same as for the master
4879
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4880
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4881
    newbie_singlehomed = secondary_ip == primary_ip
4882
    if master_singlehomed != newbie_singlehomed:
4883
      if master_singlehomed:
4884
        raise errors.OpPrereqError("The master has no secondary ip but the"
4885
                                   " new node has one",
4886
                                   errors.ECODE_INVAL)
4887
      else:
4888
        raise errors.OpPrereqError("The master has a secondary ip but the"
4889
                                   " new node doesn't have one",
4890
                                   errors.ECODE_INVAL)
4891

    
4892
    # checks reachability
4893
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4894
      raise errors.OpPrereqError("Node not reachable by ping",
4895
                                 errors.ECODE_ENVIRON)
4896

    
4897
    if not newbie_singlehomed:
4898
      # check reachability from my secondary ip to newbie's secondary ip
4899
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4900
                           source=myself.secondary_ip):
4901
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4902
                                   " based ping to node daemon port",
4903
                                   errors.ECODE_ENVIRON)
4904

    
4905
    if self.op.readd:
4906
      exceptions = [node]
4907
    else:
4908
      exceptions = []
4909

    
4910
    if self.op.master_capable:
4911
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4912
    else:
4913
      self.master_candidate = False
4914

    
4915
    if self.op.readd:
4916
      self.new_node = old_node
4917
    else:
4918
      node_group = cfg.LookupNodeGroup(self.op.group)
4919
      self.new_node = objects.Node(name=node,
4920
                                   primary_ip=primary_ip,
4921
                                   secondary_ip=secondary_ip,
4922
                                   master_candidate=self.master_candidate,
4923
                                   offline=False, drained=False,
4924
                                   group=node_group)
4925

    
4926
    if self.op.ndparams:
4927
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4928

    
4929
  def Exec(self, feedback_fn):
4930
    """Adds the new node to the cluster.
4931

4932
    """
4933
    new_node = self.new_node
4934
    node = new_node.name
4935

    
4936
    # We adding a new node so we assume it's powered
4937
    new_node.powered = True
4938

    
4939
    # for re-adds, reset the offline/drained/master-candidate flags;
4940
    # we need to reset here, otherwise offline would prevent RPC calls
4941
    # later in the procedure; this also means that if the re-add
4942
    # fails, we are left with a non-offlined, broken node
4943
    if self.op.readd:
4944
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4945
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4946
      # if we demote the node, we do cleanup later in the procedure
4947
      new_node.master_candidate = self.master_candidate
4948
      if self.changed_primary_ip:
4949
        new_node.primary_ip = self.op.primary_ip
4950

    
4951
    # copy the master/vm_capable flags
4952
    for attr in self._NFLAGS:
4953
      setattr(new_node, attr, getattr(self.op, attr))
4954

    
4955
    # notify the user about any possible mc promotion
4956
    if new_node.master_candidate:
4957
      self.LogInfo("Node will be a master candidate")
4958

    
4959
    if self.op.ndparams:
4960
      new_node.ndparams = self.op.ndparams
4961
    else:
4962
      new_node.ndparams = {}
4963

    
4964
    # check connectivity
4965
    result = self.rpc.call_version([node])[node]
4966
    result.Raise("Can't get version information from node %s" % node)
4967
    if constants.PROTOCOL_VERSION == result.payload:
4968
      logging.info("Communication to node %s fine, sw version %s match",
4969
                   node, result.payload)
4970
    else:
4971
      raise errors.OpExecError("Version mismatch master version %s,"
4972
                               " node version %s" %
4973
                               (constants.PROTOCOL_VERSION, result.payload))
4974

    
4975
    # Add node to our /etc/hosts, and add key to known_hosts
4976
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4977
      master_node = self.cfg.GetMasterNode()
4978
      result = self.rpc.call_etc_hosts_modify(master_node,
4979
                                              constants.ETC_HOSTS_ADD,
4980
                                              self.hostname.name,
4981
                                              self.hostname.ip)
4982
      result.Raise("Can't update hosts file with new host data")
4983

    
4984
    if new_node.secondary_ip != new_node.primary_ip:
4985
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4986
                               False)
4987

    
4988
    node_verify_list = [self.cfg.GetMasterNode()]
4989
    node_verify_param = {
4990
      constants.NV_NODELIST: [node],
4991
      # TODO: do a node-net-test as well?
4992
    }
4993

    
4994
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4995
                                       self.cfg.GetClusterName())
4996
    for verifier in node_verify_list:
4997
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4998
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4999
      if nl_payload:
5000
        for failed in nl_payload:
5001
          feedback_fn("ssh/hostname verification failed"
5002
                      " (checking from %s): %s" %
5003
                      (verifier, nl_payload[failed]))
5004
        raise errors.OpExecError("ssh/hostname verification failed")
5005

    
5006
    if self.op.readd:
5007
      _RedistributeAncillaryFiles(self)
5008
      self.context.ReaddNode(new_node)
5009
      # make sure we redistribute the config
5010
      self.cfg.Update(new_node, feedback_fn)
5011
      # and make sure the new node will not have old files around
5012
      if not new_node.master_candidate:
5013
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5014
        msg = result.fail_msg
5015
        if msg:
5016
          self.LogWarning("Node failed to demote itself from master"
5017
                          " candidate status: %s" % msg)
5018
    else:
5019
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5020
                                  additional_vm=self.op.vm_capable)
5021
      self.context.AddNode(new_node, self.proc.GetECId())
5022

    
5023

    
5024
class LUNodeSetParams(LogicalUnit):
5025
  """Modifies the parameters of a node.
5026

5027
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5028
      to the node role (as _ROLE_*)
5029
  @cvar _R2F: a dictionary from node role to tuples of flags
5030
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5031

5032
  """
5033
  HPATH = "node-modify"
5034
  HTYPE = constants.HTYPE_NODE
5035
  REQ_BGL = False
5036
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5037
  _F2R = {
5038
    (True, False, False): _ROLE_CANDIDATE,
5039
    (False, True, False): _ROLE_DRAINED,
5040
    (False, False, True): _ROLE_OFFLINE,
5041
    (False, False, False): _ROLE_REGULAR,
5042
    }
5043
  _R2F = dict((v, k) for k, v in _F2R.items())
5044
  _FLAGS = ["master_candidate", "drained", "offline"]
5045

    
5046
  def CheckArguments(self):
5047
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5048
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5049
                self.op.master_capable, self.op.vm_capable,
5050
                self.op.secondary_ip, self.op.ndparams]
5051
    if all_mods.count(None) == len(all_mods):
5052
      raise errors.OpPrereqError("Please pass at least one modification",
5053
                                 errors.ECODE_INVAL)
5054
    if all_mods.count(True) > 1:
5055
      raise errors.OpPrereqError("Can't set the node into more than one"
5056
                                 " state at the same time",
5057
                                 errors.ECODE_INVAL)
5058

    
5059
    # Boolean value that tells us whether we might be demoting from MC
5060
    self.might_demote = (self.op.master_candidate == False or
5061
                         self.op.offline == True or
5062
                         self.op.drained == True or
5063
                         self.op.master_capable == False)
5064

    
5065
    if self.op.secondary_ip:
5066
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5067
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5068
                                   " address" % self.op.secondary_ip,
5069
                                   errors.ECODE_INVAL)
5070

    
5071
    self.lock_all = self.op.auto_promote and self.might_demote
5072
    self.lock_instances = self.op.secondary_ip is not None
5073

    
5074
  def ExpandNames(self):
5075
    if self.lock_all:
5076
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5077
    else:
5078
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5079

    
5080
    if self.lock_instances:
5081
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5082

    
5083
  def DeclareLocks(self, level):
5084
    # If we have locked all instances, before waiting to lock nodes, release
5085
    # all the ones living on nodes unrelated to the current operation.
5086
    if level == locking.LEVEL_NODE and self.lock_instances:
5087
      self.affected_instances = []
5088
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5089
        instances_keep = []
5090

    
5091
        # Build list of instances to release
5092
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5093
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5094
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5095
              self.op.node_name in instance.all_nodes):
5096
            instances_keep.append(instance_name)
5097
            self.affected_instances.append(instance)
5098

    
5099
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5100

    
5101
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5102
                set(instances_keep))
5103

    
5104
  def BuildHooksEnv(self):
5105
    """Build hooks env.
5106

5107
    This runs on the master node.
5108

5109
    """
5110
    return {
5111
      "OP_TARGET": self.op.node_name,
5112
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5113
      "OFFLINE": str(self.op.offline),
5114
      "DRAINED": str(self.op.drained),
5115
      "MASTER_CAPABLE": str(self.op.master_capable),
5116
      "VM_CAPABLE": str(self.op.vm_capable),
5117
      }
5118

    
5119
  def BuildHooksNodes(self):
5120
    """Build hooks nodes.
5121

5122
    """
5123
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5124
    return (nl, nl)
5125

    
5126
  def CheckPrereq(self):
5127
    """Check prerequisites.
5128

5129
    This only checks the instance list against the existing names.
5130

5131
    """
5132
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5133

    
5134
    if (self.op.master_candidate is not None or
5135
        self.op.drained is not None or
5136
        self.op.offline is not None):
5137
      # we can't change the master's node flags
5138
      if self.op.node_name == self.cfg.GetMasterNode():
5139
        raise errors.OpPrereqError("The master role can be changed"
5140
                                   " only via master-failover",
5141
                                   errors.ECODE_INVAL)
5142

    
5143
    if self.op.master_candidate and not node.master_capable:
5144
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5145
                                 " it a master candidate" % node.name,
5146
                                 errors.ECODE_STATE)
5147

    
5148
    if self.op.vm_capable == False:
5149
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5150
      if ipri or isec:
5151
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5152
                                   " the vm_capable flag" % node.name,
5153
                                   errors.ECODE_STATE)
5154

    
5155
    if node.master_candidate and self.might_demote and not self.lock_all:
5156
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5157
      # check if after removing the current node, we're missing master
5158
      # candidates
5159
      (mc_remaining, mc_should, _) = \
5160
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5161
      if mc_remaining < mc_should:
5162
        raise errors.OpPrereqError("Not enough master candidates, please"
5163
                                   " pass auto promote option to allow"
5164
                                   " promotion", errors.ECODE_STATE)
5165

    
5166
    self.old_flags = old_flags = (node.master_candidate,
5167
                                  node.drained, node.offline)
5168
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5169
    self.old_role = old_role = self._F2R[old_flags]
5170

    
5171
    # Check for ineffective changes
5172
    for attr in self._FLAGS:
5173
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5174
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5175
        setattr(self.op, attr, None)
5176

    
5177
    # Past this point, any flag change to False means a transition
5178
    # away from the respective state, as only real changes are kept
5179

    
5180
    # TODO: We might query the real power state if it supports OOB
5181
    if _SupportsOob(self.cfg, node):
5182
      if self.op.offline is False and not (node.powered or
5183
                                           self.op.powered == True):
5184
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5185
                                    " offline status can be reset") %
5186
                                   self.op.node_name)
5187
    elif self.op.powered is not None:
5188
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5189
                                  " as it does not support out-of-band"
5190
                                  " handling") % self.op.node_name)
5191

    
5192
    # If we're being deofflined/drained, we'll MC ourself if needed
5193
    if (self.op.drained == False or self.op.offline == False or
5194
        (self.op.master_capable and not node.master_capable)):
5195
      if _DecideSelfPromotion(self):
5196
        self.op.master_candidate = True
5197
        self.LogInfo("Auto-promoting node to master candidate")
5198

    
5199
    # If we're no longer master capable, we'll demote ourselves from MC
5200
    if self.op.master_capable == False and node.master_candidate:
5201
      self.LogInfo("Demoting from master candidate")
5202
      self.op.master_candidate = False
5203

    
5204
    # Compute new role
5205
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5206
    if self.op.master_candidate:
5207
      new_role = self._ROLE_CANDIDATE
5208
    elif self.op.drained:
5209
      new_role = self._ROLE_DRAINED
5210
    elif self.op.offline:
5211
      new_role = self._ROLE_OFFLINE
5212
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5213
      # False is still in new flags, which means we're un-setting (the
5214
      # only) True flag
5215
      new_role = self._ROLE_REGULAR
5216
    else: # no new flags, nothing, keep old role
5217
      new_role = old_role
5218

    
5219
    self.new_role = new_role
5220

    
5221
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5222
      # Trying to transition out of offline status
5223
      result = self.rpc.call_version([node.name])[node.name]
5224
      if result.fail_msg:
5225
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5226
                                   " to report its version: %s" %
5227
                                   (node.name, result.fail_msg),
5228
                                   errors.ECODE_STATE)
5229
      else:
5230
        self.LogWarning("Transitioning node from offline to online state"
5231
                        " without using re-add. Please make sure the node"
5232
                        " is healthy!")
5233

    
5234
    if self.op.secondary_ip:
5235
      # Ok even without locking, because this can't be changed by any LU
5236
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5237
      master_singlehomed = master.secondary_ip == master.primary_ip
5238
      if master_singlehomed and self.op.secondary_ip:
5239
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5240
                                   " homed cluster", errors.ECODE_INVAL)
5241

    
5242
      if node.offline:
5243
        if self.affected_instances:
5244
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5245
                                     " node has instances (%s) configured"
5246
                                     " to use it" % self.affected_instances)
5247
      else:
5248
        # On online nodes, check that no instances are running, and that
5249
        # the node has the new ip and we can reach it.
5250
        for instance in self.affected_instances:
5251
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5252

    
5253
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5254
        if master.name != node.name:
5255
          # check reachability from master secondary ip to new secondary ip
5256
          if not netutils.TcpPing(self.op.secondary_ip,
5257
                                  constants.DEFAULT_NODED_PORT,
5258
                                  source=master.secondary_ip):
5259
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5260
                                       " based ping to node daemon port",
5261
                                       errors.ECODE_ENVIRON)
5262

    
5263
    if self.op.ndparams:
5264
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5265
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5266
      self.new_ndparams = new_ndparams
5267

    
5268
  def Exec(self, feedback_fn):
5269
    """Modifies a node.
5270

5271
    """
5272
    node = self.node
5273
    old_role = self.old_role
5274
    new_role = self.new_role
5275

    
5276
    result = []
5277

    
5278
    if self.op.ndparams:
5279
      node.ndparams = self.new_ndparams
5280

    
5281
    if self.op.powered is not None:
5282
      node.powered = self.op.powered
5283

    
5284
    for attr in ["master_capable", "vm_capable"]:
5285
      val = getattr(self.op, attr)
5286
      if val is not None:
5287
        setattr(node, attr, val)
5288
        result.append((attr, str(val)))
5289

    
5290
    if new_role != old_role:
5291
      # Tell the node to demote itself, if no longer MC and not offline
5292
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5293
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5294
        if msg:
5295
          self.LogWarning("Node failed to demote itself: %s", msg)
5296

    
5297
      new_flags = self._R2F[new_role]
5298
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5299
        if of != nf:
5300
          result.append((desc, str(nf)))
5301
      (node.master_candidate, node.drained, node.offline) = new_flags
5302

    
5303
      # we locked all nodes, we adjust the CP before updating this node
5304
      if self.lock_all:
5305
        _AdjustCandidatePool(self, [node.name])
5306

    
5307
    if self.op.secondary_ip:
5308
      node.secondary_ip = self.op.secondary_ip
5309
      result.append(("secondary_ip", self.op.secondary_ip))
5310

    
5311
    # this will trigger configuration file update, if needed
5312
    self.cfg.Update(node, feedback_fn)
5313

    
5314
    # this will trigger job queue propagation or cleanup if the mc
5315
    # flag changed
5316
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5317
      self.context.ReaddNode(node)
5318

    
5319
    return result
5320

    
5321

    
5322
class LUNodePowercycle(NoHooksLU):
5323
  """Powercycles a node.
5324

5325
  """
5326
  REQ_BGL = False
5327

    
5328
  def CheckArguments(self):
5329
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5330
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5331
      raise errors.OpPrereqError("The node is the master and the force"
5332
                                 " parameter was not set",
5333
                                 errors.ECODE_INVAL)
5334

    
5335
  def ExpandNames(self):
5336
    """Locking for PowercycleNode.
5337

5338
    This is a last-resort option and shouldn't block on other
5339
    jobs. Therefore, we grab no locks.
5340

5341
    """
5342
    self.needed_locks = {}
5343

    
5344
  def Exec(self, feedback_fn):
5345
    """Reboots a node.
5346

5347
    """
5348
    result = self.rpc.call_node_powercycle(self.op.node_name,
5349
                                           self.cfg.GetHypervisorType())
5350
    result.Raise("Failed to schedule the reboot")
5351
    return result.payload
5352

    
5353

    
5354
class LUClusterQuery(NoHooksLU):
5355
  """Query cluster configuration.
5356

5357
  """
5358
  REQ_BGL = False
5359

    
5360
  def ExpandNames(self):
5361
    self.needed_locks = {}
5362

    
5363
  def Exec(self, feedback_fn):
5364
    """Return cluster config.
5365

5366
    """
5367
    cluster = self.cfg.GetClusterInfo()
5368
    os_hvp = {}
5369

    
5370
    # Filter just for enabled hypervisors
5371
    for os_name, hv_dict in cluster.os_hvp.items():
5372
      os_hvp[os_name] = {}
5373
      for hv_name, hv_params in hv_dict.items():
5374
        if hv_name in cluster.enabled_hypervisors:
5375
          os_hvp[os_name][hv_name] = hv_params
5376

    
5377
    # Convert ip_family to ip_version
5378
    primary_ip_version = constants.IP4_VERSION
5379
    if cluster.primary_ip_family == netutils.IP6Address.family:
5380
      primary_ip_version = constants.IP6_VERSION
5381

    
5382
    result = {
5383
      "software_version": constants.RELEASE_VERSION,
5384
      "protocol_version": constants.PROTOCOL_VERSION,
5385
      "config_version": constants.CONFIG_VERSION,
5386
      "os_api_version": max(constants.OS_API_VERSIONS),
5387
      "export_version": constants.EXPORT_VERSION,
5388
      "architecture": (platform.architecture()[0], platform.machine()),
5389
      "name": cluster.cluster_name,
5390
      "master": cluster.master_node,
5391
      "default_hypervisor": cluster.enabled_hypervisors[0],
5392
      "enabled_hypervisors": cluster.enabled_hypervisors,
5393
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5394
                        for hypervisor_name in cluster.enabled_hypervisors]),
5395
      "os_hvp": os_hvp,
5396
      "beparams": cluster.beparams,
5397
      "osparams": cluster.osparams,
5398
      "nicparams": cluster.nicparams,
5399
      "ndparams": cluster.ndparams,
5400
      "candidate_pool_size": cluster.candidate_pool_size,
5401
      "master_netdev": cluster.master_netdev,
5402
      "volume_group_name": cluster.volume_group_name,
5403
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5404
      "file_storage_dir": cluster.file_storage_dir,
5405
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5406
      "maintain_node_health": cluster.maintain_node_health,
5407
      "ctime": cluster.ctime,
5408
      "mtime": cluster.mtime,
5409
      "uuid": cluster.uuid,
5410
      "tags": list(cluster.GetTags()),
5411
      "uid_pool": cluster.uid_pool,
5412
      "default_iallocator": cluster.default_iallocator,
5413
      "reserved_lvs": cluster.reserved_lvs,
5414
      "primary_ip_version": primary_ip_version,
5415
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5416
      "hidden_os": cluster.hidden_os,
5417
      "blacklisted_os": cluster.blacklisted_os,
5418
      }
5419

    
5420
    return result
5421

    
5422

    
5423
class LUClusterConfigQuery(NoHooksLU):
5424
  """Return configuration values.
5425

5426
  """
5427
  REQ_BGL = False
5428
  _FIELDS_DYNAMIC = utils.FieldSet()
5429
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5430
                                  "watcher_pause", "volume_group_name")
5431

    
5432
  def CheckArguments(self):
5433
    _CheckOutputFields(static=self._FIELDS_STATIC,
5434
                       dynamic=self._FIELDS_DYNAMIC,
5435
                       selected=self.op.output_fields)
5436

    
5437
  def ExpandNames(self):
5438
    self.needed_locks = {}
5439

    
5440
  def Exec(self, feedback_fn):
5441
    """Dump a representation of the cluster config to the standard output.
5442

5443
    """
5444
    values = []
5445
    for field in self.op.output_fields:
5446
      if field == "cluster_name":
5447
        entry = self.cfg.GetClusterName()
5448
      elif field == "master_node":
5449
        entry = self.cfg.GetMasterNode()
5450
      elif field == "drain_flag":
5451
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5452
      elif field == "watcher_pause":
5453
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5454
      elif field == "volume_group_name":
5455
        entry = self.cfg.GetVGName()
5456
      else:
5457
        raise errors.ParameterError(field)
5458
      values.append(entry)
5459
    return values
5460

    
5461

    
5462
class LUInstanceActivateDisks(NoHooksLU):
5463
  """Bring up an instance's disks.
5464

5465
  """
5466
  REQ_BGL = False
5467

    
5468
  def ExpandNames(self):
5469
    self._ExpandAndLockInstance()
5470
    self.needed_locks[locking.LEVEL_NODE] = []
5471
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5472

    
5473
  def DeclareLocks(self, level):
5474
    if level == locking.LEVEL_NODE:
5475
      self._LockInstancesNodes()
5476

    
5477
  def CheckPrereq(self):
5478
    """Check prerequisites.
5479

5480
    This checks that the instance is in the cluster.
5481

5482
    """
5483
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5484
    assert self.instance is not None, \
5485
      "Cannot retrieve locked instance %s" % self.op.instance_name
5486
    _CheckNodeOnline(self, self.instance.primary_node)
5487

    
5488
  def Exec(self, feedback_fn):
5489
    """Activate the disks.
5490

5491
    """
5492
    disks_ok, disks_info = \
5493
              _AssembleInstanceDisks(self, self.instance,
5494
                                     ignore_size=self.op.ignore_size)
5495
    if not disks_ok:
5496
      raise errors.OpExecError("Cannot activate block devices")
5497

    
5498
    return disks_info
5499

    
5500

    
5501
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5502
                           ignore_size=False):
5503
  """Prepare the block devices for an instance.
5504

5505
  This sets up the block devices on all nodes.
5506

5507
  @type lu: L{LogicalUnit}
5508
  @param lu: the logical unit on whose behalf we execute
5509
  @type instance: L{objects.Instance}
5510
  @param instance: the instance for whose disks we assemble
5511
  @type disks: list of L{objects.Disk} or None
5512
  @param disks: which disks to assemble (or all, if None)
5513
  @type ignore_secondaries: boolean
5514
  @param ignore_secondaries: if true, errors on secondary nodes
5515
      won't result in an error return from the function
5516
  @type ignore_size: boolean
5517
  @param ignore_size: if true, the current known size of the disk
5518
      will not be used during the disk activation, useful for cases
5519
      when the size is wrong
5520
  @return: False if the operation failed, otherwise a list of
5521
      (host, instance_visible_name, node_visible_name)
5522
      with the mapping from node devices to instance devices
5523

5524
  """
5525
  device_info = []
5526
  disks_ok = True
5527
  iname = instance.name
5528
  disks = _ExpandCheckDisks(instance, disks)
5529

    
5530
  # With the two passes mechanism we try to reduce the window of
5531
  # opportunity for the race condition of switching DRBD to primary
5532
  # before handshaking occured, but we do not eliminate it
5533

    
5534
  # The proper fix would be to wait (with some limits) until the
5535
  # connection has been made and drbd transitions from WFConnection
5536
  # into any other network-connected state (Connected, SyncTarget,
5537
  # SyncSource, etc.)
5538

    
5539
  # 1st pass, assemble on all nodes in secondary mode
5540
  for idx, inst_disk in enumerate(disks):
5541
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5542
      if ignore_size:
5543
        node_disk = node_disk.Copy()
5544
        node_disk.UnsetSize()
5545
      lu.cfg.SetDiskID(node_disk, node)
5546
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5547
      msg = result.fail_msg
5548
      if msg:
5549
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5550
                           " (is_primary=False, pass=1): %s",
5551
                           inst_disk.iv_name, node, msg)
5552
        if not ignore_secondaries:
5553
          disks_ok = False
5554

    
5555
  # FIXME: race condition on drbd migration to primary
5556

    
5557
  # 2nd pass, do only the primary node
5558
  for idx, inst_disk in enumerate(disks):
5559
    dev_path = None
5560

    
5561
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5562
      if node != instance.primary_node:
5563
        continue
5564
      if ignore_size:
5565
        node_disk = node_disk.Copy()
5566
        node_disk.UnsetSize()
5567
      lu.cfg.SetDiskID(node_disk, node)
5568
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5569
      msg = result.fail_msg
5570
      if msg:
5571
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5572
                           " (is_primary=True, pass=2): %s",
5573
                           inst_disk.iv_name, node, msg)
5574
        disks_ok = False
5575
      else:
5576
        dev_path = result.payload
5577

    
5578
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5579

    
5580
  # leave the disks configured for the primary node
5581
  # this is a workaround that would be fixed better by
5582
  # improving the logical/physical id handling
5583
  for disk in disks:
5584
    lu.cfg.SetDiskID(disk, instance.primary_node)
5585

    
5586
  return disks_ok, device_info
5587

    
5588

    
5589
def _StartInstanceDisks(lu, instance, force):
5590
  """Start the disks of an instance.
5591

5592
  """
5593
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5594
                                           ignore_secondaries=force)
5595
  if not disks_ok:
5596
    _ShutdownInstanceDisks(lu, instance)
5597
    if force is not None and not force:
5598
      lu.proc.LogWarning("", hint="If the message above refers to a"
5599
                         " secondary node,"
5600
                         " you can retry the operation using '--force'.")
5601
    raise errors.OpExecError("Disk consistency error")
5602

    
5603

    
5604
class LUInstanceDeactivateDisks(NoHooksLU):
5605
  """Shutdown an instance's disks.
5606

5607
  """
5608
  REQ_BGL = False
5609

    
5610
  def ExpandNames(self):
5611
    self._ExpandAndLockInstance()
5612
    self.needed_locks[locking.LEVEL_NODE] = []
5613
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5614

    
5615
  def DeclareLocks(self, level):
5616
    if level == locking.LEVEL_NODE:
5617
      self._LockInstancesNodes()
5618

    
5619
  def CheckPrereq(self):
5620
    """Check prerequisites.
5621

5622
    This checks that the instance is in the cluster.
5623

5624
    """
5625
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5626
    assert self.instance is not None, \
5627
      "Cannot retrieve locked instance %s" % self.op.instance_name
5628

    
5629
  def Exec(self, feedback_fn):
5630
    """Deactivate the disks
5631

5632
    """
5633
    instance = self.instance
5634
    if self.op.force:
5635
      _ShutdownInstanceDisks(self, instance)
5636
    else:
5637
      _SafeShutdownInstanceDisks(self, instance)
5638

    
5639

    
5640
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5641
  """Shutdown block devices of an instance.
5642

5643
  This function checks if an instance is running, before calling
5644
  _ShutdownInstanceDisks.
5645

5646
  """
5647
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5648
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5649

    
5650

    
5651
def _ExpandCheckDisks(instance, disks):
5652
  """Return the instance disks selected by the disks list
5653

5654
  @type disks: list of L{objects.Disk} or None
5655
  @param disks: selected disks
5656
  @rtype: list of L{objects.Disk}
5657
  @return: selected instance disks to act on
5658

5659
  """
5660
  if disks is None:
5661
    return instance.disks
5662
  else:
5663
    if not set(disks).issubset(instance.disks):
5664
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5665
                                   " target instance")
5666
    return disks
5667

    
5668

    
5669
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5670
  """Shutdown block devices of an instance.
5671

5672
  This does the shutdown on all nodes of the instance.
5673

5674
  If the ignore_primary is false, errors on the primary node are
5675
  ignored.
5676

5677
  """
5678
  all_result = True
5679
  disks = _ExpandCheckDisks(instance, disks)
5680

    
5681
  for disk in disks:
5682
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5683
      lu.cfg.SetDiskID(top_disk, node)
5684
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5685
      msg = result.fail_msg
5686
      if msg:
5687
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5688
                      disk.iv_name, node, msg)
5689
        if ((node == instance.primary_node and not ignore_primary) or
5690
            (node != instance.primary_node and not result.offline)):
5691
          all_result = False
5692
  return all_result
5693

    
5694

    
5695
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5696
  """Checks if a node has enough free memory.
5697

5698
  This function check if a given node has the needed amount of free
5699
  memory. In case the node has less memory or we cannot get the
5700
  information from the node, this function raise an OpPrereqError
5701
  exception.
5702

5703
  @type lu: C{LogicalUnit}
5704
  @param lu: a logical unit from which we get configuration data
5705
  @type node: C{str}
5706
  @param node: the node to check
5707
  @type reason: C{str}
5708
  @param reason: string to use in the error message
5709
  @type requested: C{int}
5710
  @param requested: the amount of memory in MiB to check for
5711
  @type hypervisor_name: C{str}
5712
  @param hypervisor_name: the hypervisor to ask for memory stats
5713
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5714
      we cannot check the node
5715

5716
  """
5717
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5718
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5719
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5720
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5721
  if not isinstance(free_mem, int):
5722
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5723
                               " was '%s'" % (node, free_mem),
5724
                               errors.ECODE_ENVIRON)
5725
  if requested > free_mem:
5726
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5727
                               " needed %s MiB, available %s MiB" %
5728
                               (node, reason, requested, free_mem),
5729
                               errors.ECODE_NORES)
5730

    
5731

    
5732
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5733
  """Checks if nodes have enough free disk space in the all VGs.
5734

5735
  This function check if all given nodes have the needed amount of
5736
  free disk. In case any node has less disk or we cannot get the
5737
  information from the node, this function raise an OpPrereqError
5738
  exception.
5739

5740
  @type lu: C{LogicalUnit}
5741
  @param lu: a logical unit from which we get configuration data
5742
  @type nodenames: C{list}
5743
  @param nodenames: the list of node names to check
5744
  @type req_sizes: C{dict}
5745
  @param req_sizes: the hash of vg and corresponding amount of disk in
5746
      MiB to check for
5747
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5748
      or we cannot check the node
5749

5750
  """
5751
  for vg, req_size in req_sizes.items():
5752
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5753

    
5754

    
5755
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5756
  """Checks if nodes have enough free disk space in the specified VG.
5757

5758
  This function check if all given nodes have the needed amount of
5759
  free disk. In case any node has less disk or we cannot get the
5760
  information from the node, this function raise an OpPrereqError
5761
  exception.
5762

5763
  @type lu: C{LogicalUnit}
5764
  @param lu: a logical unit from which we get configuration data
5765
  @type nodenames: C{list}
5766
  @param nodenames: the list of node names to check
5767
  @type vg: C{str}
5768
  @param vg: the volume group to check
5769
  @type requested: C{int}
5770
  @param requested: the amount of disk in MiB to check for
5771
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5772
      or we cannot check the node
5773

5774
  """
5775
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5776
  for node in nodenames:
5777
    info = nodeinfo[node]
5778
    info.Raise("Cannot get current information from node %s" % node,
5779
               prereq=True, ecode=errors.ECODE_ENVIRON)
5780
    vg_free = info.payload.get("vg_free", None)
5781
    if not isinstance(vg_free, int):
5782
      raise errors.OpPrereqError("Can't compute free disk space on node"
5783
                                 " %s for vg %s, result was '%s'" %
5784
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5785
    if requested > vg_free:
5786
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5787
                                 " vg %s: required %d MiB, available %d MiB" %
5788
                                 (node, vg, requested, vg_free),
5789
                                 errors.ECODE_NORES)
5790

    
5791

    
5792
class LUInstanceStartup(LogicalUnit):
5793
  """Starts an instance.
5794

5795
  """
5796
  HPATH = "instance-start"
5797
  HTYPE = constants.HTYPE_INSTANCE
5798
  REQ_BGL = False
5799

    
5800
  def CheckArguments(self):
5801
    # extra beparams
5802
    if self.op.beparams:
5803
      # fill the beparams dict
5804
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5805

    
5806
  def ExpandNames(self):
5807
    self._ExpandAndLockInstance()
5808

    
5809
  def BuildHooksEnv(self):
5810
    """Build hooks env.
5811

5812
    This runs on master, primary and secondary nodes of the instance.
5813

5814
    """
5815
    env = {
5816
      "FORCE": self.op.force,
5817
      }
5818

    
5819
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5820

    
5821
    return env
5822

    
5823
  def BuildHooksNodes(self):
5824
    """Build hooks nodes.
5825

5826
    """
5827
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5828
    return (nl, nl)
5829

    
5830
  def CheckPrereq(self):
5831
    """Check prerequisites.
5832

5833
    This checks that the instance is in the cluster.
5834

5835
    """
5836
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5837
    assert self.instance is not None, \
5838
      "Cannot retrieve locked instance %s" % self.op.instance_name
5839

    
5840
    # extra hvparams
5841
    if self.op.hvparams:
5842
      # check hypervisor parameter syntax (locally)
5843
      cluster = self.cfg.GetClusterInfo()
5844
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5845
      filled_hvp = cluster.FillHV(instance)
5846
      filled_hvp.update(self.op.hvparams)
5847
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5848
      hv_type.CheckParameterSyntax(filled_hvp)
5849
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5850

    
5851
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5852

    
5853
    if self.primary_offline and self.op.ignore_offline_nodes:
5854
      self.proc.LogWarning("Ignoring offline primary node")
5855

    
5856
      if self.op.hvparams or self.op.beparams:
5857
        self.proc.LogWarning("Overridden parameters are ignored")
5858
    else:
5859
      _CheckNodeOnline(self, instance.primary_node)
5860

    
5861
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5862

    
5863
      # check bridges existence
5864
      _CheckInstanceBridgesExist(self, instance)
5865

    
5866
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5867
                                                instance.name,
5868
                                                instance.hypervisor)
5869
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5870
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5871
      if not remote_info.payload: # not running already
5872
        _CheckNodeFreeMemory(self, instance.primary_node,
5873
                             "starting instance %s" % instance.name,
5874
                             bep[constants.BE_MEMORY], instance.hypervisor)
5875

    
5876
  def Exec(self, feedback_fn):
5877
    """Start the instance.
5878

5879
    """
5880
    instance = self.instance
5881
    force = self.op.force
5882

    
5883
    if not self.op.no_remember:
5884
      self.cfg.MarkInstanceUp(instance.name)
5885

    
5886
    if self.primary_offline:
5887
      assert self.op.ignore_offline_nodes
5888
      self.proc.LogInfo("Primary node offline, marked instance as started")
5889
    else:
5890
      node_current = instance.primary_node
5891

    
5892
      _StartInstanceDisks(self, instance, force)
5893

    
5894
      result = self.rpc.call_instance_start(node_current, instance,
5895
                                            self.op.hvparams, self.op.beparams,
5896
                                            self.op.startup_paused)
5897
      msg = result.fail_msg
5898
      if msg:
5899
        _ShutdownInstanceDisks(self, instance)
5900
        raise errors.OpExecError("Could not start instance: %s" % msg)
5901

    
5902

    
5903
class LUInstanceReboot(LogicalUnit):
5904
  """Reboot an instance.
5905

5906
  """
5907
  HPATH = "instance-reboot"
5908
  HTYPE = constants.HTYPE_INSTANCE
5909
  REQ_BGL = False
5910

    
5911
  def ExpandNames(self):
5912
    self._ExpandAndLockInstance()
5913

    
5914
  def BuildHooksEnv(self):
5915
    """Build hooks env.
5916

5917
    This runs on master, primary and secondary nodes of the instance.
5918

5919
    """
5920
    env = {
5921
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5922
      "REBOOT_TYPE": self.op.reboot_type,
5923
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5924
      }
5925

    
5926
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5927

    
5928
    return env
5929

    
5930
  def BuildHooksNodes(self):
5931
    """Build hooks nodes.
5932

5933
    """
5934
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5935
    return (nl, nl)
5936

    
5937
  def CheckPrereq(self):
5938
    """Check prerequisites.
5939

5940
    This checks that the instance is in the cluster.
5941

5942
    """
5943
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5944
    assert self.instance is not None, \
5945
      "Cannot retrieve locked instance %s" % self.op.instance_name
5946

    
5947
    _CheckNodeOnline(self, instance.primary_node)
5948

    
5949
    # check bridges existence
5950
    _CheckInstanceBridgesExist(self, instance)
5951

    
5952
  def Exec(self, feedback_fn):
5953
    """Reboot the instance.
5954

5955
    """
5956
    instance = self.instance
5957
    ignore_secondaries = self.op.ignore_secondaries
5958
    reboot_type = self.op.reboot_type
5959

    
5960
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5961
                                              instance.name,
5962
                                              instance.hypervisor)
5963
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5964
    instance_running = bool(remote_info.payload)
5965

    
5966
    node_current = instance.primary_node
5967

    
5968
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5969
                                            constants.INSTANCE_REBOOT_HARD]:
5970
      for disk in instance.disks:
5971
        self.cfg.SetDiskID(disk, node_current)
5972
      result = self.rpc.call_instance_reboot(node_current, instance,
5973
                                             reboot_type,
5974
                                             self.op.shutdown_timeout)
5975
      result.Raise("Could not reboot instance")
5976
    else:
5977
      if instance_running:
5978
        result = self.rpc.call_instance_shutdown(node_current, instance,
5979
                                                 self.op.shutdown_timeout)
5980
        result.Raise("Could not shutdown instance for full reboot")
5981
        _ShutdownInstanceDisks(self, instance)
5982
      else:
5983
        self.LogInfo("Instance %s was already stopped, starting now",
5984
                     instance.name)
5985
      _StartInstanceDisks(self, instance, ignore_secondaries)
5986
      result = self.rpc.call_instance_start(node_current, instance,
5987
                                            None, None, False)
5988
      msg = result.fail_msg
5989
      if msg:
5990
        _ShutdownInstanceDisks(self, instance)
5991
        raise errors.OpExecError("Could not start instance for"
5992
                                 " full reboot: %s" % msg)
5993

    
5994
    self.cfg.MarkInstanceUp(instance.name)
5995

    
5996

    
5997
class LUInstanceShutdown(LogicalUnit):
5998
  """Shutdown an instance.
5999

6000
  """
6001
  HPATH = "instance-stop"
6002
  HTYPE = constants.HTYPE_INSTANCE
6003
  REQ_BGL = False
6004

    
6005
  def ExpandNames(self):
6006
    self._ExpandAndLockInstance()
6007

    
6008
  def BuildHooksEnv(self):
6009
    """Build hooks env.
6010

6011
    This runs on master, primary and secondary nodes of the instance.
6012

6013
    """
6014
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6015
    env["TIMEOUT"] = self.op.timeout
6016
    return env
6017

    
6018
  def BuildHooksNodes(self):
6019
    """Build hooks nodes.
6020

6021
    """
6022
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6023
    return (nl, nl)
6024

    
6025
  def CheckPrereq(self):
6026
    """Check prerequisites.
6027

6028
    This checks that the instance is in the cluster.
6029

6030
    """
6031
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6032
    assert self.instance is not None, \
6033
      "Cannot retrieve locked instance %s" % self.op.instance_name
6034

    
6035
    self.primary_offline = \
6036
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6037

    
6038
    if self.primary_offline and self.op.ignore_offline_nodes:
6039
      self.proc.LogWarning("Ignoring offline primary node")
6040
    else:
6041
      _CheckNodeOnline(self, self.instance.primary_node)
6042

    
6043
  def Exec(self, feedback_fn):
6044
    """Shutdown the instance.
6045

6046
    """
6047
    instance = self.instance
6048
    node_current = instance.primary_node
6049
    timeout = self.op.timeout
6050

    
6051
    if not self.op.no_remember:
6052
      self.cfg.MarkInstanceDown(instance.name)
6053

    
6054
    if self.primary_offline:
6055
      assert self.op.ignore_offline_nodes
6056
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6057
    else:
6058
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6059
      msg = result.fail_msg
6060
      if msg:
6061
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6062

    
6063
      _ShutdownInstanceDisks(self, instance)
6064

    
6065

    
6066
class LUInstanceReinstall(LogicalUnit):
6067
  """Reinstall an instance.
6068

6069
  """
6070
  HPATH = "instance-reinstall"
6071
  HTYPE = constants.HTYPE_INSTANCE
6072
  REQ_BGL = False
6073

    
6074
  def ExpandNames(self):
6075
    self._ExpandAndLockInstance()
6076

    
6077
  def BuildHooksEnv(self):
6078
    """Build hooks env.
6079

6080
    This runs on master, primary and secondary nodes of the instance.
6081

6082
    """
6083
    return _BuildInstanceHookEnvByObject(self, self.instance)
6084

    
6085
  def BuildHooksNodes(self):
6086
    """Build hooks nodes.
6087

6088
    """
6089
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6090
    return (nl, nl)
6091

    
6092
  def CheckPrereq(self):
6093
    """Check prerequisites.
6094

6095
    This checks that the instance is in the cluster and is not running.
6096

6097
    """
6098
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6099
    assert instance is not None, \
6100
      "Cannot retrieve locked instance %s" % self.op.instance_name
6101
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6102
                     " offline, cannot reinstall")
6103
    for node in instance.secondary_nodes:
6104
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6105
                       " cannot reinstall")
6106

    
6107
    if instance.disk_template == constants.DT_DISKLESS:
6108
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6109
                                 self.op.instance_name,
6110
                                 errors.ECODE_INVAL)
6111
    _CheckInstanceDown(self, instance, "cannot reinstall")
6112

    
6113
    if self.op.os_type is not None:
6114
      # OS verification
6115
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6116
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6117
      instance_os = self.op.os_type
6118
    else:
6119
      instance_os = instance.os
6120

    
6121
    nodelist = list(instance.all_nodes)
6122

    
6123
    if self.op.osparams:
6124
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6125
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6126
      self.os_inst = i_osdict # the new dict (without defaults)
6127
    else:
6128
      self.os_inst = None
6129

    
6130
    self.instance = instance
6131

    
6132
  def Exec(self, feedback_fn):
6133
    """Reinstall the instance.
6134

6135
    """
6136
    inst = self.instance
6137

    
6138
    if self.op.os_type is not None:
6139
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6140
      inst.os = self.op.os_type
6141
      # Write to configuration
6142
      self.cfg.Update(inst, feedback_fn)
6143

    
6144
    _StartInstanceDisks(self, inst, None)
6145
    try:
6146
      feedback_fn("Running the instance OS create scripts...")
6147
      # FIXME: pass debug option from opcode to backend
6148
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6149
                                             self.op.debug_level,
6150
                                             osparams=self.os_inst)
6151
      result.Raise("Could not install OS for instance %s on node %s" %
6152
                   (inst.name, inst.primary_node))
6153
    finally:
6154
      _ShutdownInstanceDisks(self, inst)
6155

    
6156

    
6157
class LUInstanceRecreateDisks(LogicalUnit):
6158
  """Recreate an instance's missing disks.
6159

6160
  """
6161
  HPATH = "instance-recreate-disks"
6162
  HTYPE = constants.HTYPE_INSTANCE
6163
  REQ_BGL = False
6164

    
6165
  def CheckArguments(self):
6166
    # normalise the disk list
6167
    self.op.disks = sorted(frozenset(self.op.disks))
6168

    
6169
  def ExpandNames(self):
6170
    self._ExpandAndLockInstance()
6171
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6172
    if self.op.nodes:
6173
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6174
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6175
    else:
6176
      self.needed_locks[locking.LEVEL_NODE] = []
6177

    
6178
  def DeclareLocks(self, level):
6179
    if level == locking.LEVEL_NODE:
6180
      # if we replace the nodes, we only need to lock the old primary,
6181
      # otherwise we need to lock all nodes for disk re-creation
6182
      primary_only = bool(self.op.nodes)
6183
      self._LockInstancesNodes(primary_only=primary_only)
6184

    
6185
  def BuildHooksEnv(self):
6186
    """Build hooks env.
6187

6188
    This runs on master, primary and secondary nodes of the instance.
6189

6190
    """
6191
    return _BuildInstanceHookEnvByObject(self, self.instance)
6192

    
6193
  def BuildHooksNodes(self):
6194
    """Build hooks nodes.
6195

6196
    """
6197
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6198
    return (nl, nl)
6199

    
6200
  def CheckPrereq(self):
6201
    """Check prerequisites.
6202

6203
    This checks that the instance is in the cluster and is not running.
6204

6205
    """
6206
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6207
    assert instance is not None, \
6208
      "Cannot retrieve locked instance %s" % self.op.instance_name
6209
    if self.op.nodes:
6210
      if len(self.op.nodes) != len(instance.all_nodes):
6211
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6212
                                   " %d replacement nodes were specified" %
6213
                                   (instance.name, len(instance.all_nodes),
6214
                                    len(self.op.nodes)),
6215
                                   errors.ECODE_INVAL)
6216
      assert instance.disk_template != constants.DT_DRBD8 or \
6217
          len(self.op.nodes) == 2
6218
      assert instance.disk_template != constants.DT_PLAIN or \
6219
          len(self.op.nodes) == 1
6220
      primary_node = self.op.nodes[0]
6221
    else:
6222
      primary_node = instance.primary_node
6223
    _CheckNodeOnline(self, primary_node)
6224

    
6225
    if instance.disk_template == constants.DT_DISKLESS:
6226
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6227
                                 self.op.instance_name, errors.ECODE_INVAL)
6228
    # if we replace nodes *and* the old primary is offline, we don't
6229
    # check
6230
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6231
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6232
    if not (self.op.nodes and old_pnode.offline):
6233
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6234

    
6235
    if not self.op.disks:
6236
      self.op.disks = range(len(instance.disks))
6237
    else:
6238
      for idx in self.op.disks:
6239
        if idx >= len(instance.disks):
6240
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6241
                                     errors.ECODE_INVAL)
6242
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6243
      raise errors.OpPrereqError("Can't recreate disks partially and"
6244
                                 " change the nodes at the same time",
6245
                                 errors.ECODE_INVAL)
6246
    self.instance = instance
6247

    
6248
  def Exec(self, feedback_fn):
6249
    """Recreate the disks.
6250

6251
    """
6252
    instance = self.instance
6253

    
6254
    to_skip = []
6255
    mods = [] # keeps track of needed logical_id changes
6256

    
6257
    for idx, disk in enumerate(instance.disks):
6258
      if idx not in self.op.disks: # disk idx has not been passed in
6259
        to_skip.append(idx)
6260
        continue
6261
      # update secondaries for disks, if needed
6262
      if self.op.nodes:
6263
        if disk.dev_type == constants.LD_DRBD8:
6264
          # need to update the nodes and minors
6265
          assert len(self.op.nodes) == 2
6266
          assert len(disk.logical_id) == 6 # otherwise disk internals
6267
                                           # have changed
6268
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6269
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6270
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6271
                    new_minors[0], new_minors[1], old_secret)
6272
          assert len(disk.logical_id) == len(new_id)
6273
          mods.append((idx, new_id))
6274

    
6275
    # now that we have passed all asserts above, we can apply the mods
6276
    # in a single run (to avoid partial changes)
6277
    for idx, new_id in mods:
6278
      instance.disks[idx].logical_id = new_id
6279

    
6280
    # change primary node, if needed
6281
    if self.op.nodes:
6282
      instance.primary_node = self.op.nodes[0]
6283
      self.LogWarning("Changing the instance's nodes, you will have to"
6284
                      " remove any disks left on the older nodes manually")
6285

    
6286
    if self.op.nodes:
6287
      self.cfg.Update(instance, feedback_fn)
6288

    
6289
    _CreateDisks(self, instance, to_skip=to_skip)
6290

    
6291

    
6292
class LUInstanceRename(LogicalUnit):
6293
  """Rename an instance.
6294

6295
  """
6296
  HPATH = "instance-rename"
6297
  HTYPE = constants.HTYPE_INSTANCE
6298

    
6299
  def CheckArguments(self):
6300
    """Check arguments.
6301

6302
    """
6303
    if self.op.ip_check and not self.op.name_check:
6304
      # TODO: make the ip check more flexible and not depend on the name check
6305
      raise errors.OpPrereqError("IP address check requires a name check",
6306
                                 errors.ECODE_INVAL)
6307

    
6308
  def BuildHooksEnv(self):
6309
    """Build hooks env.
6310

6311
    This runs on master, primary and secondary nodes of the instance.
6312

6313
    """
6314
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6315
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6316
    return env
6317

    
6318
  def BuildHooksNodes(self):
6319
    """Build hooks nodes.
6320

6321
    """
6322
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6323
    return (nl, nl)
6324

    
6325
  def CheckPrereq(self):
6326
    """Check prerequisites.
6327

6328
    This checks that the instance is in the cluster and is not running.
6329

6330
    """
6331
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6332
                                                self.op.instance_name)
6333
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6334
    assert instance is not None
6335
    _CheckNodeOnline(self, instance.primary_node)
6336
    _CheckInstanceDown(self, instance, "cannot rename")
6337
    self.instance = instance
6338

    
6339
    new_name = self.op.new_name
6340
    if self.op.name_check:
6341
      hostname = netutils.GetHostname(name=new_name)
6342
      if hostname != new_name:
6343
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6344
                     hostname.name)
6345
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6346
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6347
                                    " same as given hostname '%s'") %
6348
                                    (hostname.name, self.op.new_name),
6349
                                    errors.ECODE_INVAL)
6350
      new_name = self.op.new_name = hostname.name
6351
      if (self.op.ip_check and
6352
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6353
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6354
                                   (hostname.ip, new_name),
6355
                                   errors.ECODE_NOTUNIQUE)
6356

    
6357
    instance_list = self.cfg.GetInstanceList()
6358
    if new_name in instance_list and new_name != instance.name:
6359
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6360
                                 new_name, errors.ECODE_EXISTS)
6361

    
6362
  def Exec(self, feedback_fn):
6363
    """Rename the instance.
6364

6365
    """
6366
    inst = self.instance
6367
    old_name = inst.name
6368

    
6369
    rename_file_storage = False
6370
    if (inst.disk_template in constants.DTS_FILEBASED and
6371
        self.op.new_name != inst.name):
6372
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6373
      rename_file_storage = True
6374

    
6375
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6376
    # Change the instance lock. This is definitely safe while we hold the BGL.
6377
    # Otherwise the new lock would have to be added in acquired mode.
6378
    assert self.REQ_BGL
6379
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6380
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6381

    
6382
    # re-read the instance from the configuration after rename
6383
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6384

    
6385
    if rename_file_storage:
6386
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6387
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6388
                                                     old_file_storage_dir,
6389
                                                     new_file_storage_dir)
6390
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6391
                   " (but the instance has been renamed in Ganeti)" %
6392
                   (inst.primary_node, old_file_storage_dir,
6393
                    new_file_storage_dir))
6394

    
6395
    _StartInstanceDisks(self, inst, None)
6396
    try:
6397
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6398
                                                 old_name, self.op.debug_level)
6399
      msg = result.fail_msg
6400
      if msg:
6401
        msg = ("Could not run OS rename script for instance %s on node %s"
6402
               " (but the instance has been renamed in Ganeti): %s" %
6403
               (inst.name, inst.primary_node, msg))
6404
        self.proc.LogWarning(msg)
6405
    finally:
6406
      _ShutdownInstanceDisks(self, inst)
6407

    
6408
    return inst.name
6409

    
6410

    
6411
class LUInstanceRemove(LogicalUnit):
6412
  """Remove an instance.
6413

6414
  """
6415
  HPATH = "instance-remove"
6416
  HTYPE = constants.HTYPE_INSTANCE
6417
  REQ_BGL = False
6418

    
6419
  def ExpandNames(self):
6420
    self._ExpandAndLockInstance()
6421
    self.needed_locks[locking.LEVEL_NODE] = []
6422
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6423

    
6424
  def DeclareLocks(self, level):
6425
    if level == locking.LEVEL_NODE:
6426
      self._LockInstancesNodes()
6427

    
6428
  def BuildHooksEnv(self):
6429
    """Build hooks env.
6430

6431
    This runs on master, primary and secondary nodes of the instance.
6432

6433
    """
6434
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6435
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6436
    return env
6437

    
6438
  def BuildHooksNodes(self):
6439
    """Build hooks nodes.
6440

6441
    """
6442
    nl = [self.cfg.GetMasterNode()]
6443
    nl_post = list(self.instance.all_nodes) + nl
6444
    return (nl, nl_post)
6445

    
6446
  def CheckPrereq(self):
6447
    """Check prerequisites.
6448

6449
    This checks that the instance is in the cluster.
6450

6451
    """
6452
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6453
    assert self.instance is not None, \
6454
      "Cannot retrieve locked instance %s" % self.op.instance_name
6455

    
6456
  def Exec(self, feedback_fn):
6457
    """Remove the instance.
6458

6459
    """
6460
    instance = self.instance
6461
    logging.info("Shutting down instance %s on node %s",
6462
                 instance.name, instance.primary_node)
6463

    
6464
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6465
                                             self.op.shutdown_timeout)
6466
    msg = result.fail_msg
6467
    if msg:
6468
      if self.op.ignore_failures:
6469
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6470
      else:
6471
        raise errors.OpExecError("Could not shutdown instance %s on"
6472
                                 " node %s: %s" %
6473
                                 (instance.name, instance.primary_node, msg))
6474

    
6475
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6476

    
6477

    
6478
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6479
  """Utility function to remove an instance.
6480

6481
  """
6482
  logging.info("Removing block devices for instance %s", instance.name)
6483

    
6484
  if not _RemoveDisks(lu, instance):
6485
    if not ignore_failures:
6486
      raise errors.OpExecError("Can't remove instance's disks")
6487
    feedback_fn("Warning: can't remove instance's disks")
6488

    
6489
  logging.info("Removing instance %s out of cluster config", instance.name)
6490

    
6491
  lu.cfg.RemoveInstance(instance.name)
6492

    
6493
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6494
    "Instance lock removal conflict"
6495

    
6496
  # Remove lock for the instance
6497
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6498

    
6499

    
6500
class LUInstanceQuery(NoHooksLU):
6501
  """Logical unit for querying instances.
6502

6503
  """
6504
  # pylint: disable-msg=W0142
6505
  REQ_BGL = False
6506

    
6507
  def CheckArguments(self):
6508
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6509
                             self.op.output_fields, self.op.use_locking)
6510

    
6511
  def ExpandNames(self):
6512
    self.iq.ExpandNames(self)
6513

    
6514
  def DeclareLocks(self, level):
6515
    self.iq.DeclareLocks(self, level)
6516

    
6517
  def Exec(self, feedback_fn):
6518
    return self.iq.OldStyleQuery(self)
6519

    
6520

    
6521
class LUInstanceFailover(LogicalUnit):
6522
  """Failover an instance.
6523

6524
  """
6525
  HPATH = "instance-failover"
6526
  HTYPE = constants.HTYPE_INSTANCE
6527
  REQ_BGL = False
6528

    
6529
  def CheckArguments(self):
6530
    """Check the arguments.
6531

6532
    """
6533
    self.iallocator = getattr(self.op, "iallocator", None)
6534
    self.target_node = getattr(self.op, "target_node", None)
6535

    
6536
  def ExpandNames(self):
6537
    self._ExpandAndLockInstance()
6538

    
6539
    if self.op.target_node is not None:
6540
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6541

    
6542
    self.needed_locks[locking.LEVEL_NODE] = []
6543
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6544

    
6545
    ignore_consistency = self.op.ignore_consistency
6546
    shutdown_timeout = self.op.shutdown_timeout
6547
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6548
                                       cleanup=False,
6549
                                       failover=True,
6550
                                       ignore_consistency=ignore_consistency,
6551
                                       shutdown_timeout=shutdown_timeout)
6552
    self.tasklets = [self._migrater]
6553

    
6554
  def DeclareLocks(self, level):
6555
    if level == locking.LEVEL_NODE:
6556
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6557
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6558
        if self.op.target_node is None:
6559
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6560
        else:
6561
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6562
                                                   self.op.target_node]
6563
        del self.recalculate_locks[locking.LEVEL_NODE]
6564
      else:
6565
        self._LockInstancesNodes()
6566

    
6567
  def BuildHooksEnv(self):
6568
    """Build hooks env.
6569

6570
    This runs on master, primary and secondary nodes of the instance.
6571

6572
    """
6573
    instance = self._migrater.instance
6574
    source_node = instance.primary_node
6575
    target_node = self.op.target_node
6576
    env = {
6577
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6578
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6579
      "OLD_PRIMARY": source_node,
6580
      "NEW_PRIMARY": target_node,
6581
      }
6582

    
6583
    if instance.disk_template in constants.DTS_INT_MIRROR:
6584
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6585
      env["NEW_SECONDARY"] = source_node
6586
    else:
6587
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6588

    
6589
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6590

    
6591
    return env
6592

    
6593
  def BuildHooksNodes(self):
6594
    """Build hooks nodes.
6595

6596
    """
6597
    instance = self._migrater.instance
6598
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6599
    return (nl, nl + [instance.primary_node])
6600

    
6601

    
6602
class LUInstanceMigrate(LogicalUnit):
6603
  """Migrate an instance.
6604

6605
  This is migration without shutting down, compared to the failover,
6606
  which is done with shutdown.
6607

6608
  """
6609
  HPATH = "instance-migrate"
6610
  HTYPE = constants.HTYPE_INSTANCE
6611
  REQ_BGL = False
6612

    
6613
  def ExpandNames(self):
6614
    self._ExpandAndLockInstance()
6615

    
6616
    if self.op.target_node is not None:
6617
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6618

    
6619
    self.needed_locks[locking.LEVEL_NODE] = []
6620
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6621

    
6622
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6623
                                       cleanup=self.op.cleanup,
6624
                                       failover=False,
6625
                                       fallback=self.op.allow_failover)
6626
    self.tasklets = [self._migrater]
6627

    
6628
  def DeclareLocks(self, level):
6629
    if level == locking.LEVEL_NODE:
6630
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6631
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6632
        if self.op.target_node is None:
6633
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6634
        else:
6635
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6636
                                                   self.op.target_node]
6637
        del self.recalculate_locks[locking.LEVEL_NODE]
6638
      else:
6639
        self._LockInstancesNodes()
6640

    
6641
  def BuildHooksEnv(self):
6642
    """Build hooks env.
6643

6644
    This runs on master, primary and secondary nodes of the instance.
6645

6646
    """
6647
    instance = self._migrater.instance
6648
    source_node = instance.primary_node
6649
    target_node = self.op.target_node
6650
    env = _BuildInstanceHookEnvByObject(self, instance)
6651
    env.update({
6652
      "MIGRATE_LIVE": self._migrater.live,
6653
      "MIGRATE_CLEANUP": self.op.cleanup,
6654
      "OLD_PRIMARY": source_node,
6655
      "NEW_PRIMARY": target_node,
6656
      })
6657

    
6658
    if instance.disk_template in constants.DTS_INT_MIRROR:
6659
      env["OLD_SECONDARY"] = target_node
6660
      env["NEW_SECONDARY"] = source_node
6661
    else:
6662
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6663

    
6664
    return env
6665

    
6666
  def BuildHooksNodes(self):
6667
    """Build hooks nodes.
6668

6669
    """
6670
    instance = self._migrater.instance
6671
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6672
    return (nl, nl + [instance.primary_node])
6673

    
6674

    
6675
class LUInstanceMove(LogicalUnit):
6676
  """Move an instance by data-copying.
6677

6678
  """
6679
  HPATH = "instance-move"
6680
  HTYPE = constants.HTYPE_INSTANCE
6681
  REQ_BGL = False
6682

    
6683
  def ExpandNames(self):
6684
    self._ExpandAndLockInstance()
6685
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6686
    self.op.target_node = target_node
6687
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6688
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6689

    
6690
  def DeclareLocks(self, level):
6691
    if level == locking.LEVEL_NODE:
6692
      self._LockInstancesNodes(primary_only=True)
6693

    
6694
  def BuildHooksEnv(self):
6695
    """Build hooks env.
6696

6697
    This runs on master, primary and secondary nodes of the instance.
6698

6699
    """
6700
    env = {
6701
      "TARGET_NODE": self.op.target_node,
6702
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6703
      }
6704
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6705
    return env
6706

    
6707
  def BuildHooksNodes(self):
6708
    """Build hooks nodes.
6709

6710
    """
6711
    nl = [
6712
      self.cfg.GetMasterNode(),
6713
      self.instance.primary_node,
6714
      self.op.target_node,
6715
      ]
6716
    return (nl, nl)
6717

    
6718
  def CheckPrereq(self):
6719
    """Check prerequisites.
6720

6721
    This checks that the instance is in the cluster.
6722

6723
    """
6724
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6725
    assert self.instance is not None, \
6726
      "Cannot retrieve locked instance %s" % self.op.instance_name
6727

    
6728
    node = self.cfg.GetNodeInfo(self.op.target_node)
6729
    assert node is not None, \
6730
      "Cannot retrieve locked node %s" % self.op.target_node
6731

    
6732
    self.target_node = target_node = node.name
6733

    
6734
    if target_node == instance.primary_node:
6735
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6736
                                 (instance.name, target_node),
6737
                                 errors.ECODE_STATE)
6738

    
6739
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6740

    
6741
    for idx, dsk in enumerate(instance.disks):
6742
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6743
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6744
                                   " cannot copy" % idx, errors.ECODE_STATE)
6745

    
6746
    _CheckNodeOnline(self, target_node)
6747
    _CheckNodeNotDrained(self, target_node)
6748
    _CheckNodeVmCapable(self, target_node)
6749

    
6750
    if instance.admin_up:
6751
      # check memory requirements on the secondary node
6752
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6753
                           instance.name, bep[constants.BE_MEMORY],
6754
                           instance.hypervisor)
6755
    else:
6756
      self.LogInfo("Not checking memory on the secondary node as"
6757
                   " instance will not be started")
6758

    
6759
    # check bridge existance
6760
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6761

    
6762
  def Exec(self, feedback_fn):
6763
    """Move an instance.
6764

6765
    The move is done by shutting it down on its present node, copying
6766
    the data over (slow) and starting it on the new node.
6767

6768
    """
6769
    instance = self.instance
6770

    
6771
    source_node = instance.primary_node
6772
    target_node = self.target_node
6773

    
6774
    self.LogInfo("Shutting down instance %s on source node %s",
6775
                 instance.name, source_node)
6776

    
6777
    result = self.rpc.call_instance_shutdown(source_node, instance,
6778
                                             self.op.shutdown_timeout)
6779
    msg = result.fail_msg
6780
    if msg:
6781
      if self.op.ignore_consistency:
6782
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6783
                             " Proceeding anyway. Please make sure node"
6784
                             " %s is down. Error details: %s",
6785
                             instance.name, source_node, source_node, msg)
6786
      else:
6787
        raise errors.OpExecError("Could not shutdown instance %s on"
6788
                                 " node %s: %s" %
6789
                                 (instance.name, source_node, msg))
6790

    
6791
    # create the target disks
6792
    try:
6793
      _CreateDisks(self, instance, target_node=target_node)
6794
    except errors.OpExecError:
6795
      self.LogWarning("Device creation failed, reverting...")
6796
      try:
6797
        _RemoveDisks(self, instance, target_node=target_node)
6798
      finally:
6799
        self.cfg.ReleaseDRBDMinors(instance.name)
6800
        raise
6801

    
6802
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6803

    
6804
    errs = []
6805
    # activate, get path, copy the data over
6806
    for idx, disk in enumerate(instance.disks):
6807
      self.LogInfo("Copying data for disk %d", idx)
6808
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6809
                                               instance.name, True, idx)
6810
      if result.fail_msg:
6811
        self.LogWarning("Can't assemble newly created disk %d: %s",
6812
                        idx, result.fail_msg)
6813
        errs.append(result.fail_msg)
6814
        break
6815
      dev_path = result.payload
6816
      result = self.rpc.call_blockdev_export(source_node, disk,
6817
                                             target_node, dev_path,
6818
                                             cluster_name)
6819
      if result.fail_msg:
6820
        self.LogWarning("Can't copy data over for disk %d: %s",
6821
                        idx, result.fail_msg)
6822
        errs.append(result.fail_msg)
6823
        break
6824

    
6825
    if errs:
6826
      self.LogWarning("Some disks failed to copy, aborting")
6827
      try:
6828
        _RemoveDisks(self, instance, target_node=target_node)
6829
      finally:
6830
        self.cfg.ReleaseDRBDMinors(instance.name)
6831
        raise errors.OpExecError("Errors during disk copy: %s" %
6832
                                 (",".join(errs),))
6833

    
6834
    instance.primary_node = target_node
6835
    self.cfg.Update(instance, feedback_fn)
6836

    
6837
    self.LogInfo("Removing the disks on the original node")
6838
    _RemoveDisks(self, instance, target_node=source_node)
6839

    
6840
    # Only start the instance if it's marked as up
6841
    if instance.admin_up:
6842
      self.LogInfo("Starting instance %s on node %s",
6843
                   instance.name, target_node)
6844

    
6845
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6846
                                           ignore_secondaries=True)
6847
      if not disks_ok:
6848
        _ShutdownInstanceDisks(self, instance)
6849
        raise errors.OpExecError("Can't activate the instance's disks")
6850

    
6851
      result = self.rpc.call_instance_start(target_node, instance,
6852
                                            None, None, False)
6853
      msg = result.fail_msg
6854
      if msg:
6855
        _ShutdownInstanceDisks(self, instance)
6856
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6857
                                 (instance.name, target_node, msg))
6858

    
6859

    
6860
class LUNodeMigrate(LogicalUnit):
6861
  """Migrate all instances from a node.
6862

6863
  """
6864
  HPATH = "node-migrate"
6865
  HTYPE = constants.HTYPE_NODE
6866
  REQ_BGL = False
6867

    
6868
  def CheckArguments(self):
6869
    pass
6870

    
6871
  def ExpandNames(self):
6872
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6873

    
6874
    self.share_locks = _ShareAll()
6875
    self.needed_locks = {
6876
      locking.LEVEL_NODE: [self.op.node_name],
6877
      }
6878

    
6879
  def BuildHooksEnv(self):
6880
    """Build hooks env.
6881

6882
    This runs on the master, the primary and all the secondaries.
6883

6884
    """
6885
    return {
6886
      "NODE_NAME": self.op.node_name,
6887
      }
6888

    
6889
  def BuildHooksNodes(self):
6890
    """Build hooks nodes.
6891

6892
    """
6893
    nl = [self.cfg.GetMasterNode()]
6894
    return (nl, nl)
6895

    
6896
  def CheckPrereq(self):
6897
    pass
6898

    
6899
  def Exec(self, feedback_fn):
6900
    # Prepare jobs for migration instances
6901
    jobs = [
6902
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6903
                                 mode=self.op.mode,
6904
                                 live=self.op.live,
6905
                                 iallocator=self.op.iallocator,
6906
                                 target_node=self.op.target_node)]
6907
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6908
      ]
6909

    
6910
    # TODO: Run iallocator in this opcode and pass correct placement options to
6911
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6912
    # running the iallocator and the actual migration, a good consistency model
6913
    # will have to be found.
6914

    
6915
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
6916
            frozenset([self.op.node_name]))
6917

    
6918
    return ResultWithJobs(jobs)
6919

    
6920

    
6921
class TLMigrateInstance(Tasklet):
6922
  """Tasklet class for instance migration.
6923

6924
  @type live: boolean
6925
  @ivar live: whether the migration will be done live or non-live;
6926
      this variable is initalized only after CheckPrereq has run
6927
  @type cleanup: boolean
6928
  @ivar cleanup: Wheater we cleanup from a failed migration
6929
  @type iallocator: string
6930
  @ivar iallocator: The iallocator used to determine target_node
6931
  @type target_node: string
6932
  @ivar target_node: If given, the target_node to reallocate the instance to
6933
  @type failover: boolean
6934
  @ivar failover: Whether operation results in failover or migration
6935
  @type fallback: boolean
6936
  @ivar fallback: Whether fallback to failover is allowed if migration not
6937
                  possible
6938
  @type ignore_consistency: boolean
6939
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6940
                            and target node
6941
  @type shutdown_timeout: int
6942
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6943

6944
  """
6945
  def __init__(self, lu, instance_name, cleanup=False,
6946
               failover=False, fallback=False,
6947
               ignore_consistency=False,
6948
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6949
    """Initializes this class.
6950

6951
    """
6952
    Tasklet.__init__(self, lu)
6953

    
6954
    # Parameters
6955
    self.instance_name = instance_name
6956
    self.cleanup = cleanup
6957
    self.live = False # will be overridden later
6958
    self.failover = failover
6959
    self.fallback = fallback
6960
    self.ignore_consistency = ignore_consistency
6961
    self.shutdown_timeout = shutdown_timeout
6962

    
6963
  def CheckPrereq(self):
6964
    """Check prerequisites.
6965

6966
    This checks that the instance is in the cluster.
6967

6968
    """
6969
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6970
    instance = self.cfg.GetInstanceInfo(instance_name)
6971
    assert instance is not None
6972
    self.instance = instance
6973

    
6974
    if (not self.cleanup and not instance.admin_up and not self.failover and
6975
        self.fallback):
6976
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6977
                      " to failover")
6978
      self.failover = True
6979

    
6980
    if instance.disk_template not in constants.DTS_MIRRORED:
6981
      if self.failover:
6982
        text = "failovers"
6983
      else:
6984
        text = "migrations"
6985
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6986
                                 " %s" % (instance.disk_template, text),
6987
                                 errors.ECODE_STATE)
6988

    
6989
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6990
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6991

    
6992
      if self.lu.op.iallocator:
6993
        self._RunAllocator()
6994
      else:
6995
        # We set set self.target_node as it is required by
6996
        # BuildHooksEnv
6997
        self.target_node = self.lu.op.target_node
6998

    
6999
      # self.target_node is already populated, either directly or by the
7000
      # iallocator run
7001
      target_node = self.target_node
7002
      if self.target_node == instance.primary_node:
7003
        raise errors.OpPrereqError("Cannot migrate instance %s"
7004
                                   " to its primary (%s)" %
7005
                                   (instance.name, instance.primary_node))
7006

    
7007
      if len(self.lu.tasklets) == 1:
7008
        # It is safe to release locks only when we're the only tasklet
7009
        # in the LU
7010
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7011
                      keep=[instance.primary_node, self.target_node])
7012

    
7013
    else:
7014
      secondary_nodes = instance.secondary_nodes
7015
      if not secondary_nodes:
7016
        raise errors.ConfigurationError("No secondary node but using"
7017
                                        " %s disk template" %
7018
                                        instance.disk_template)
7019
      target_node = secondary_nodes[0]
7020
      if self.lu.op.iallocator or (self.lu.op.target_node and
7021
                                   self.lu.op.target_node != target_node):
7022
        if self.failover:
7023
          text = "failed over"
7024
        else:
7025
          text = "migrated"
7026
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7027
                                   " be %s to arbitrary nodes"
7028
                                   " (neither an iallocator nor a target"
7029
                                   " node can be passed)" %
7030
                                   (instance.disk_template, text),
7031
                                   errors.ECODE_INVAL)
7032

    
7033
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7034

    
7035
    # check memory requirements on the secondary node
7036
    if not self.failover or instance.admin_up:
7037
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7038
                           instance.name, i_be[constants.BE_MEMORY],
7039
                           instance.hypervisor)
7040
    else:
7041
      self.lu.LogInfo("Not checking memory on the secondary node as"
7042
                      " instance will not be started")
7043

    
7044
    # check bridge existance
7045
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7046

    
7047
    if not self.cleanup:
7048
      _CheckNodeNotDrained(self.lu, target_node)
7049
      if not self.failover:
7050
        result = self.rpc.call_instance_migratable(instance.primary_node,
7051
                                                   instance)
7052
        if result.fail_msg and self.fallback:
7053
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7054
                          " failover")
7055
          self.failover = True
7056
        else:
7057
          result.Raise("Can't migrate, please use failover",
7058
                       prereq=True, ecode=errors.ECODE_STATE)
7059

    
7060
    assert not (self.failover and self.cleanup)
7061

    
7062
    if not self.failover:
7063
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7064
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7065
                                   " parameters are accepted",
7066
                                   errors.ECODE_INVAL)
7067
      if self.lu.op.live is not None:
7068
        if self.lu.op.live:
7069
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7070
        else:
7071
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7072
        # reset the 'live' parameter to None so that repeated
7073
        # invocations of CheckPrereq do not raise an exception
7074
        self.lu.op.live = None
7075
      elif self.lu.op.mode is None:
7076
        # read the default value from the hypervisor
7077
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7078
                                                skip_globals=False)
7079
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7080

    
7081
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7082
    else:
7083
      # Failover is never live
7084
      self.live = False
7085

    
7086
  def _RunAllocator(self):
7087
    """Run the allocator based on input opcode.
7088

7089
    """
7090
    ial = IAllocator(self.cfg, self.rpc,
7091
                     mode=constants.IALLOCATOR_MODE_RELOC,
7092
                     name=self.instance_name,
7093
                     # TODO See why hail breaks with a single node below
7094
                     relocate_from=[self.instance.primary_node,
7095
                                    self.instance.primary_node],
7096
                     )
7097

    
7098
    ial.Run(self.lu.op.iallocator)
7099

    
7100
    if not ial.success:
7101
      raise errors.OpPrereqError("Can't compute nodes using"
7102
                                 " iallocator '%s': %s" %
7103
                                 (self.lu.op.iallocator, ial.info),
7104
                                 errors.ECODE_NORES)
7105
    if len(ial.result) != ial.required_nodes:
7106
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7107
                                 " of nodes (%s), required %s" %
7108
                                 (self.lu.op.iallocator, len(ial.result),
7109
                                  ial.required_nodes), errors.ECODE_FAULT)
7110
    self.target_node = ial.result[0]
7111
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7112
                 self.instance_name, self.lu.op.iallocator,
7113
                 utils.CommaJoin(ial.result))
7114

    
7115
  def _WaitUntilSync(self):
7116
    """Poll with custom rpc for disk sync.
7117

7118
    This uses our own step-based rpc call.
7119

7120
    """
7121
    self.feedback_fn("* wait until resync is done")
7122
    all_done = False
7123
    while not all_done:
7124
      all_done = True
7125
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7126
                                            self.nodes_ip,
7127
                                            self.instance.disks)
7128
      min_percent = 100
7129
      for node, nres in result.items():
7130
        nres.Raise("Cannot resync disks on node %s" % node)
7131
        node_done, node_percent = nres.payload
7132
        all_done = all_done and node_done
7133
        if node_percent is not None:
7134
          min_percent = min(min_percent, node_percent)
7135
      if not all_done:
7136
        if min_percent < 100:
7137
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7138
        time.sleep(2)
7139

    
7140
  def _EnsureSecondary(self, node):
7141
    """Demote a node to secondary.
7142

7143
    """
7144
    self.feedback_fn("* switching node %s to secondary mode" % node)
7145

    
7146
    for dev in self.instance.disks:
7147
      self.cfg.SetDiskID(dev, node)
7148

    
7149
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7150
                                          self.instance.disks)
7151
    result.Raise("Cannot change disk to secondary on node %s" % node)
7152

    
7153
  def _GoStandalone(self):
7154
    """Disconnect from the network.
7155

7156
    """
7157
    self.feedback_fn("* changing into standalone mode")
7158
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7159
                                               self.instance.disks)
7160
    for node, nres in result.items():
7161
      nres.Raise("Cannot disconnect disks node %s" % node)
7162

    
7163
  def _GoReconnect(self, multimaster):
7164
    """Reconnect to the network.
7165

7166
    """
7167
    if multimaster:
7168
      msg = "dual-master"
7169
    else:
7170
      msg = "single-master"
7171
    self.feedback_fn("* changing disks into %s mode" % msg)
7172
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7173
                                           self.instance.disks,
7174
                                           self.instance.name, multimaster)
7175
    for node, nres in result.items():
7176
      nres.Raise("Cannot change disks config on node %s" % node)
7177

    
7178
  def _ExecCleanup(self):
7179
    """Try to cleanup after a failed migration.
7180

7181
    The cleanup is done by:
7182
      - check that the instance is running only on one node
7183
        (and update the config if needed)
7184
      - change disks on its secondary node to secondary
7185
      - wait until disks are fully synchronized
7186
      - disconnect from the network
7187
      - change disks into single-master mode
7188
      - wait again until disks are fully synchronized
7189

7190
    """
7191
    instance = self.instance
7192
    target_node = self.target_node
7193
    source_node = self.source_node
7194

    
7195
    # check running on only one node
7196
    self.feedback_fn("* checking where the instance actually runs"
7197
                     " (if this hangs, the hypervisor might be in"
7198
                     " a bad state)")
7199
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7200
    for node, result in ins_l.items():
7201
      result.Raise("Can't contact node %s" % node)
7202

    
7203
    runningon_source = instance.name in ins_l[source_node].payload
7204
    runningon_target = instance.name in ins_l[target_node].payload
7205

    
7206
    if runningon_source and runningon_target:
7207
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7208
                               " or the hypervisor is confused; you will have"
7209
                               " to ensure manually that it runs only on one"
7210
                               " and restart this operation")
7211

    
7212
    if not (runningon_source or runningon_target):
7213
      raise errors.OpExecError("Instance does not seem to be running at all;"
7214
                               " in this case it's safer to repair by"
7215
                               " running 'gnt-instance stop' to ensure disk"
7216
                               " shutdown, and then restarting it")
7217

    
7218
    if runningon_target:
7219
      # the migration has actually succeeded, we need to update the config
7220
      self.feedback_fn("* instance running on secondary node (%s),"
7221
                       " updating config" % target_node)
7222
      instance.primary_node = target_node
7223
      self.cfg.Update(instance, self.feedback_fn)
7224
      demoted_node = source_node
7225
    else:
7226
      self.feedback_fn("* instance confirmed to be running on its"
7227
                       " primary node (%s)" % source_node)
7228
      demoted_node = target_node
7229

    
7230
    if instance.disk_template in constants.DTS_INT_MIRROR:
7231
      self._EnsureSecondary(demoted_node)
7232
      try:
7233
        self._WaitUntilSync()
7234
      except errors.OpExecError:
7235
        # we ignore here errors, since if the device is standalone, it
7236
        # won't be able to sync
7237
        pass
7238
      self._GoStandalone()
7239
      self._GoReconnect(False)
7240
      self._WaitUntilSync()
7241

    
7242
    self.feedback_fn("* done")
7243

    
7244
  def _RevertDiskStatus(self):
7245
    """Try to revert the disk status after a failed migration.
7246

7247
    """
7248
    target_node = self.target_node
7249
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7250
      return
7251

    
7252
    try:
7253
      self._EnsureSecondary(target_node)
7254
      self._GoStandalone()
7255
      self._GoReconnect(False)
7256
      self._WaitUntilSync()
7257
    except errors.OpExecError, err:
7258
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7259
                         " please try to recover the instance manually;"
7260
                         " error '%s'" % str(err))
7261

    
7262
  def _AbortMigration(self):
7263
    """Call the hypervisor code to abort a started migration.
7264

7265
    """
7266
    instance = self.instance
7267
    target_node = self.target_node
7268
    migration_info = self.migration_info
7269

    
7270
    abort_result = self.rpc.call_finalize_migration(target_node,
7271
                                                    instance,
7272
                                                    migration_info,
7273
                                                    False)
7274
    abort_msg = abort_result.fail_msg
7275
    if abort_msg:
7276
      logging.error("Aborting migration failed on target node %s: %s",
7277
                    target_node, abort_msg)
7278
      # Don't raise an exception here, as we stil have to try to revert the
7279
      # disk status, even if this step failed.
7280

    
7281
  def _ExecMigration(self):
7282
    """Migrate an instance.
7283

7284
    The migrate is done by:
7285
      - change the disks into dual-master mode
7286
      - wait until disks are fully synchronized again
7287
      - migrate the instance
7288
      - change disks on the new secondary node (the old primary) to secondary
7289
      - wait until disks are fully synchronized
7290
      - change disks into single-master mode
7291

7292
    """
7293
    instance = self.instance
7294
    target_node = self.target_node
7295
    source_node = self.source_node
7296

    
7297
    self.feedback_fn("* checking disk consistency between source and target")
7298
    for dev in instance.disks:
7299
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7300
        raise errors.OpExecError("Disk %s is degraded or not fully"
7301
                                 " synchronized on target node,"
7302
                                 " aborting migration" % dev.iv_name)
7303

    
7304
    # First get the migration information from the remote node
7305
    result = self.rpc.call_migration_info(source_node, instance)
7306
    msg = result.fail_msg
7307
    if msg:
7308
      log_err = ("Failed fetching source migration information from %s: %s" %
7309
                 (source_node, msg))
7310
      logging.error(log_err)
7311
      raise errors.OpExecError(log_err)
7312

    
7313
    self.migration_info = migration_info = result.payload
7314

    
7315
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7316
      # Then switch the disks to master/master mode
7317
      self._EnsureSecondary(target_node)
7318
      self._GoStandalone()
7319
      self._GoReconnect(True)
7320
      self._WaitUntilSync()
7321

    
7322
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7323
    result = self.rpc.call_accept_instance(target_node,
7324
                                           instance,
7325
                                           migration_info,
7326
                                           self.nodes_ip[target_node])
7327

    
7328
    msg = result.fail_msg
7329
    if msg:
7330
      logging.error("Instance pre-migration failed, trying to revert"
7331
                    " disk status: %s", msg)
7332
      self.feedback_fn("Pre-migration failed, aborting")
7333
      self._AbortMigration()
7334
      self._RevertDiskStatus()
7335
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7336
                               (instance.name, msg))
7337

    
7338
    self.feedback_fn("* migrating instance to %s" % target_node)
7339
    result = self.rpc.call_instance_migrate(source_node, instance,
7340
                                            self.nodes_ip[target_node],
7341
                                            self.live)
7342
    msg = result.fail_msg
7343
    if msg:
7344
      logging.error("Instance migration failed, trying to revert"
7345
                    " disk status: %s", msg)
7346
      self.feedback_fn("Migration failed, aborting")
7347
      self._AbortMigration()
7348
      self._RevertDiskStatus()
7349
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7350
                               (instance.name, msg))
7351

    
7352
    instance.primary_node = target_node
7353
    # distribute new instance config to the other nodes
7354
    self.cfg.Update(instance, self.feedback_fn)
7355

    
7356
    result = self.rpc.call_finalize_migration(target_node,
7357
                                              instance,
7358
                                              migration_info,
7359
                                              True)
7360
    msg = result.fail_msg
7361
    if msg:
7362
      logging.error("Instance migration succeeded, but finalization failed:"
7363
                    " %s", msg)
7364
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7365
                               msg)
7366

    
7367
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7368
      self._EnsureSecondary(source_node)
7369
      self._WaitUntilSync()
7370
      self._GoStandalone()
7371
      self._GoReconnect(False)
7372
      self._WaitUntilSync()
7373

    
7374
    self.feedback_fn("* done")
7375

    
7376
  def _ExecFailover(self):
7377
    """Failover an instance.
7378

7379
    The failover is done by shutting it down on its present node and
7380
    starting it on the secondary.
7381

7382
    """
7383
    instance = self.instance
7384
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7385

    
7386
    source_node = instance.primary_node
7387
    target_node = self.target_node
7388

    
7389
    if instance.admin_up:
7390
      self.feedback_fn("* checking disk consistency between source and target")
7391
      for dev in instance.disks:
7392
        # for drbd, these are drbd over lvm
7393
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7394
          if primary_node.offline:
7395
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7396
                             " target node %s" %
7397
                             (primary_node.name, dev.iv_name, target_node))
7398
          elif not self.ignore_consistency:
7399
            raise errors.OpExecError("Disk %s is degraded on target node,"
7400
                                     " aborting failover" % dev.iv_name)
7401
    else:
7402
      self.feedback_fn("* not checking disk consistency as instance is not"
7403
                       " running")
7404

    
7405
    self.feedback_fn("* shutting down instance on source node")
7406
    logging.info("Shutting down instance %s on node %s",
7407
                 instance.name, source_node)
7408

    
7409
    result = self.rpc.call_instance_shutdown(source_node, instance,
7410
                                             self.shutdown_timeout)
7411
    msg = result.fail_msg
7412
    if msg:
7413
      if self.ignore_consistency or primary_node.offline:
7414
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7415
                           " proceeding anyway; please make sure node"
7416
                           " %s is down; error details: %s",
7417
                           instance.name, source_node, source_node, msg)
7418
      else:
7419
        raise errors.OpExecError("Could not shutdown instance %s on"
7420
                                 " node %s: %s" %
7421
                                 (instance.name, source_node, msg))
7422

    
7423
    self.feedback_fn("* deactivating the instance's disks on source node")
7424
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7425
      raise errors.OpExecError("Can't shut down the instance's disks")
7426

    
7427
    instance.primary_node = target_node
7428
    # distribute new instance config to the other nodes
7429
    self.cfg.Update(instance, self.feedback_fn)
7430

    
7431
    # Only start the instance if it's marked as up
7432
    if instance.admin_up:
7433
      self.feedback_fn("* activating the instance's disks on target node %s" %
7434
                       target_node)
7435
      logging.info("Starting instance %s on node %s",
7436
                   instance.name, target_node)
7437

    
7438
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7439
                                           ignore_secondaries=True)
7440
      if not disks_ok:
7441
        _ShutdownInstanceDisks(self.lu, instance)
7442
        raise errors.OpExecError("Can't activate the instance's disks")
7443

    
7444
      self.feedback_fn("* starting the instance on the target node %s" %
7445
                       target_node)
7446
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7447
                                            False)
7448
      msg = result.fail_msg
7449
      if msg:
7450
        _ShutdownInstanceDisks(self.lu, instance)
7451
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7452
                                 (instance.name, target_node, msg))
7453

    
7454
  def Exec(self, feedback_fn):
7455
    """Perform the migration.
7456

7457
    """
7458
    self.feedback_fn = feedback_fn
7459
    self.source_node = self.instance.primary_node
7460

    
7461
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7462
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7463
      self.target_node = self.instance.secondary_nodes[0]
7464
      # Otherwise self.target_node has been populated either
7465
      # directly, or through an iallocator.
7466

    
7467
    self.all_nodes = [self.source_node, self.target_node]
7468
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7469
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7470

    
7471
    if self.failover:
7472
      feedback_fn("Failover instance %s" % self.instance.name)
7473
      self._ExecFailover()
7474
    else:
7475
      feedback_fn("Migrating instance %s" % self.instance.name)
7476

    
7477
      if self.cleanup:
7478
        return self._ExecCleanup()
7479
      else:
7480
        return self._ExecMigration()
7481

    
7482

    
7483
def _CreateBlockDev(lu, node, instance, device, force_create,
7484
                    info, force_open):
7485
  """Create a tree of block devices on a given node.
7486

7487
  If this device type has to be created on secondaries, create it and
7488
  all its children.
7489

7490
  If not, just recurse to children keeping the same 'force' value.
7491

7492
  @param lu: the lu on whose behalf we execute
7493
  @param node: the node on which to create the device
7494
  @type instance: L{objects.Instance}
7495
  @param instance: the instance which owns the device
7496
  @type device: L{objects.Disk}
7497
  @param device: the device to create
7498
  @type force_create: boolean
7499
  @param force_create: whether to force creation of this device; this
7500
      will be change to True whenever we find a device which has
7501
      CreateOnSecondary() attribute
7502
  @param info: the extra 'metadata' we should attach to the device
7503
      (this will be represented as a LVM tag)
7504
  @type force_open: boolean
7505
  @param force_open: this parameter will be passes to the
7506
      L{backend.BlockdevCreate} function where it specifies
7507
      whether we run on primary or not, and it affects both
7508
      the child assembly and the device own Open() execution
7509

7510
  """
7511
  if device.CreateOnSecondary():
7512
    force_create = True
7513

    
7514
  if device.children:
7515
    for child in device.children:
7516
      _CreateBlockDev(lu, node, instance, child, force_create,
7517
                      info, force_open)
7518

    
7519
  if not force_create:
7520
    return
7521

    
7522
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7523

    
7524

    
7525
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7526
  """Create a single block device on a given node.
7527

7528
  This will not recurse over children of the device, so they must be
7529
  created in advance.
7530

7531
  @param lu: the lu on whose behalf we execute
7532
  @param node: the node on which to create the device
7533
  @type instance: L{objects.Instance}
7534
  @param instance: the instance which owns the device
7535
  @type device: L{objects.Disk}
7536
  @param device: the device to create
7537
  @param info: the extra 'metadata' we should attach to the device
7538
      (this will be represented as a LVM tag)
7539
  @type force_open: boolean
7540
  @param force_open: this parameter will be passes to the
7541
      L{backend.BlockdevCreate} function where it specifies
7542
      whether we run on primary or not, and it affects both
7543
      the child assembly and the device own Open() execution
7544

7545
  """
7546
  lu.cfg.SetDiskID(device, node)
7547
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7548
                                       instance.name, force_open, info)
7549
  result.Raise("Can't create block device %s on"
7550
               " node %s for instance %s" % (device, node, instance.name))
7551
  if device.physical_id is None:
7552
    device.physical_id = result.payload
7553

    
7554

    
7555
def _GenerateUniqueNames(lu, exts):
7556
  """Generate a suitable LV name.
7557

7558
  This will generate a logical volume name for the given instance.
7559

7560
  """
7561
  results = []
7562
  for val in exts:
7563
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7564
    results.append("%s%s" % (new_id, val))
7565
  return results
7566

    
7567

    
7568
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7569
                         iv_name, p_minor, s_minor):
7570
  """Generate a drbd8 device complete with its children.
7571

7572
  """
7573
  assert len(vgnames) == len(names) == 2
7574
  port = lu.cfg.AllocatePort()
7575
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7576
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7577
                          logical_id=(vgnames[0], names[0]))
7578
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7579
                          logical_id=(vgnames[1], names[1]))
7580
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7581
                          logical_id=(primary, secondary, port,
7582
                                      p_minor, s_minor,
7583
                                      shared_secret),
7584
                          children=[dev_data, dev_meta],
7585
                          iv_name=iv_name)
7586
  return drbd_dev
7587

    
7588

    
7589
def _GenerateDiskTemplate(lu, template_name,
7590
                          instance_name, primary_node,
7591
                          secondary_nodes, disk_info,
7592
                          file_storage_dir, file_driver,
7593
                          base_index, feedback_fn):
7594
  """Generate the entire disk layout for a given template type.
7595

7596
  """
7597
  #TODO: compute space requirements
7598

    
7599
  vgname = lu.cfg.GetVGName()
7600
  disk_count = len(disk_info)
7601
  disks = []
7602
  if template_name == constants.DT_DISKLESS:
7603
    pass
7604
  elif template_name == constants.DT_PLAIN:
7605
    if len(secondary_nodes) != 0:
7606
      raise errors.ProgrammerError("Wrong template configuration")
7607

    
7608
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7609
                                      for i in range(disk_count)])
7610
    for idx, disk in enumerate(disk_info):
7611
      disk_index = idx + base_index
7612
      vg = disk.get(constants.IDISK_VG, vgname)
7613
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7614
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7615
                              size=disk[constants.IDISK_SIZE],
7616
                              logical_id=(vg, names[idx]),
7617
                              iv_name="disk/%d" % disk_index,
7618
                              mode=disk[constants.IDISK_MODE])
7619
      disks.append(disk_dev)
7620
  elif template_name == constants.DT_DRBD8:
7621
    if len(secondary_nodes) != 1:
7622
      raise errors.ProgrammerError("Wrong template configuration")
7623
    remote_node = secondary_nodes[0]
7624
    minors = lu.cfg.AllocateDRBDMinor(
7625
      [primary_node, remote_node] * len(disk_info), instance_name)
7626

    
7627
    names = []
7628
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7629
                                               for i in range(disk_count)]):
7630
      names.append(lv_prefix + "_data")
7631
      names.append(lv_prefix + "_meta")
7632
    for idx, disk in enumerate(disk_info):
7633
      disk_index = idx + base_index
7634
      data_vg = disk.get(constants.IDISK_VG, vgname)
7635
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7636
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7637
                                      disk[constants.IDISK_SIZE],
7638
                                      [data_vg, meta_vg],
7639
                                      names[idx * 2:idx * 2 + 2],
7640
                                      "disk/%d" % disk_index,
7641
                                      minors[idx * 2], minors[idx * 2 + 1])
7642
      disk_dev.mode = disk[constants.IDISK_MODE]
7643
      disks.append(disk_dev)
7644
  elif template_name == constants.DT_FILE:
7645
    if len(secondary_nodes) != 0:
7646
      raise errors.ProgrammerError("Wrong template configuration")
7647

    
7648
    opcodes.RequireFileStorage()
7649

    
7650
    for idx, disk in enumerate(disk_info):
7651
      disk_index = idx + base_index
7652
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7653
                              size=disk[constants.IDISK_SIZE],
7654
                              iv_name="disk/%d" % disk_index,
7655
                              logical_id=(file_driver,
7656
                                          "%s/disk%d" % (file_storage_dir,
7657
                                                         disk_index)),
7658
                              mode=disk[constants.IDISK_MODE])
7659
      disks.append(disk_dev)
7660
  elif template_name == constants.DT_SHARED_FILE:
7661
    if len(secondary_nodes) != 0:
7662
      raise errors.ProgrammerError("Wrong template configuration")
7663

    
7664
    opcodes.RequireSharedFileStorage()
7665

    
7666
    for idx, disk in enumerate(disk_info):
7667
      disk_index = idx + base_index
7668
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7669
                              size=disk[constants.IDISK_SIZE],
7670
                              iv_name="disk/%d" % disk_index,
7671
                              logical_id=(file_driver,
7672
                                          "%s/disk%d" % (file_storage_dir,
7673
                                                         disk_index)),
7674
                              mode=disk[constants.IDISK_MODE])
7675
      disks.append(disk_dev)
7676
  elif template_name == constants.DT_BLOCK:
7677
    if len(secondary_nodes) != 0:
7678
      raise errors.ProgrammerError("Wrong template configuration")
7679

    
7680
    for idx, disk in enumerate(disk_info):
7681
      disk_index = idx + base_index
7682
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7683
                              size=disk[constants.IDISK_SIZE],
7684
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7685
                                          disk[constants.IDISK_ADOPT]),
7686
                              iv_name="disk/%d" % disk_index,
7687
                              mode=disk[constants.IDISK_MODE])
7688
      disks.append(disk_dev)
7689

    
7690
  else:
7691
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7692
  return disks
7693

    
7694

    
7695
def _GetInstanceInfoText(instance):
7696
  """Compute that text that should be added to the disk's metadata.
7697

7698
  """
7699
  return "originstname+%s" % instance.name
7700

    
7701

    
7702
def _CalcEta(time_taken, written, total_size):
7703
  """Calculates the ETA based on size written and total size.
7704

7705
  @param time_taken: The time taken so far
7706
  @param written: amount written so far
7707
  @param total_size: The total size of data to be written
7708
  @return: The remaining time in seconds
7709

7710
  """
7711
  avg_time = time_taken / float(written)
7712
  return (total_size - written) * avg_time
7713

    
7714

    
7715
def _WipeDisks(lu, instance):
7716
  """Wipes instance disks.
7717

7718
  @type lu: L{LogicalUnit}
7719
  @param lu: the logical unit on whose behalf we execute
7720
  @type instance: L{objects.Instance}
7721
  @param instance: the instance whose disks we should create
7722
  @return: the success of the wipe
7723

7724
  """
7725
  node = instance.primary_node
7726

    
7727
  for device in instance.disks:
7728
    lu.cfg.SetDiskID(device, node)
7729

    
7730
  logging.info("Pause sync of instance %s disks", instance.name)
7731
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7732

    
7733
  for idx, success in enumerate(result.payload):
7734
    if not success:
7735
      logging.warn("pause-sync of instance %s for disks %d failed",
7736
                   instance.name, idx)
7737

    
7738
  try:
7739
    for idx, device in enumerate(instance.disks):
7740
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7741
      # MAX_WIPE_CHUNK at max
7742
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7743
                            constants.MIN_WIPE_CHUNK_PERCENT)
7744
      # we _must_ make this an int, otherwise rounding errors will
7745
      # occur
7746
      wipe_chunk_size = int(wipe_chunk_size)
7747

    
7748
      lu.LogInfo("* Wiping disk %d", idx)
7749
      logging.info("Wiping disk %d for instance %s, node %s using"
7750
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7751

    
7752
      offset = 0
7753
      size = device.size
7754
      last_output = 0
7755
      start_time = time.time()
7756

    
7757
      while offset < size:
7758
        wipe_size = min(wipe_chunk_size, size - offset)
7759
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7760
                      idx, offset, wipe_size)
7761
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7762
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7763
                     (idx, offset, wipe_size))
7764
        now = time.time()
7765
        offset += wipe_size
7766
        if now - last_output >= 60:
7767
          eta = _CalcEta(now - start_time, offset, size)
7768
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7769
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7770
          last_output = now
7771
  finally:
7772
    logging.info("Resume sync of instance %s disks", instance.name)
7773

    
7774
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7775

    
7776
    for idx, success in enumerate(result.payload):
7777
      if not success:
7778
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7779
                      " look at the status and troubleshoot the issue", idx)
7780
        logging.warn("resume-sync of instance %s for disks %d failed",
7781
                     instance.name, idx)
7782

    
7783

    
7784
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7785
  """Create all disks for an instance.
7786

7787
  This abstracts away some work from AddInstance.
7788

7789
  @type lu: L{LogicalUnit}
7790
  @param lu: the logical unit on whose behalf we execute
7791
  @type instance: L{objects.Instance}
7792
  @param instance: the instance whose disks we should create
7793
  @type to_skip: list
7794
  @param to_skip: list of indices to skip
7795
  @type target_node: string
7796
  @param target_node: if passed, overrides the target node for creation
7797
  @rtype: boolean
7798
  @return: the success of the creation
7799

7800
  """
7801
  info = _GetInstanceInfoText(instance)
7802
  if target_node is None:
7803
    pnode = instance.primary_node
7804
    all_nodes = instance.all_nodes
7805
  else:
7806
    pnode = target_node
7807
    all_nodes = [pnode]
7808

    
7809
  if instance.disk_template in constants.DTS_FILEBASED:
7810
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7811
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7812

    
7813
    result.Raise("Failed to create directory '%s' on"
7814
                 " node %s" % (file_storage_dir, pnode))
7815

    
7816
  # Note: this needs to be kept in sync with adding of disks in
7817
  # LUInstanceSetParams
7818
  for idx, device in enumerate(instance.disks):
7819
    if to_skip and idx in to_skip:
7820
      continue
7821
    logging.info("Creating volume %s for instance %s",
7822
                 device.iv_name, instance.name)
7823
    #HARDCODE
7824
    for node in all_nodes:
7825
      f_create = node == pnode
7826
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7827

    
7828

    
7829
def _RemoveDisks(lu, instance, target_node=None):
7830
  """Remove all disks for an instance.
7831

7832
  This abstracts away some work from `AddInstance()` and
7833
  `RemoveInstance()`. Note that in case some of the devices couldn't
7834
  be removed, the removal will continue with the other ones (compare
7835
  with `_CreateDisks()`).
7836

7837
  @type lu: L{LogicalUnit}
7838
  @param lu: the logical unit on whose behalf we execute
7839
  @type instance: L{objects.Instance}
7840
  @param instance: the instance whose disks we should remove
7841
  @type target_node: string
7842
  @param target_node: used to override the node on which to remove the disks
7843
  @rtype: boolean
7844
  @return: the success of the removal
7845

7846
  """
7847
  logging.info("Removing block devices for instance %s", instance.name)
7848

    
7849
  all_result = True
7850
  for device in instance.disks:
7851
    if target_node:
7852
      edata = [(target_node, device)]
7853
    else:
7854
      edata = device.ComputeNodeTree(instance.primary_node)
7855
    for node, disk in edata:
7856
      lu.cfg.SetDiskID(disk, node)
7857
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7858
      if msg:
7859
        lu.LogWarning("Could not remove block device %s on node %s,"
7860
                      " continuing anyway: %s", device.iv_name, node, msg)
7861
        all_result = False
7862

    
7863
  if instance.disk_template == constants.DT_FILE:
7864
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7865
    if target_node:
7866
      tgt = target_node
7867
    else:
7868
      tgt = instance.primary_node
7869
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7870
    if result.fail_msg:
7871
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7872
                    file_storage_dir, instance.primary_node, result.fail_msg)
7873
      all_result = False
7874

    
7875
  return all_result
7876

    
7877

    
7878
def _ComputeDiskSizePerVG(disk_template, disks):
7879
  """Compute disk size requirements in the volume group
7880

7881
  """
7882
  def _compute(disks, payload):
7883
    """Universal algorithm.
7884

7885
    """
7886
    vgs = {}
7887
    for disk in disks:
7888
      vgs[disk[constants.IDISK_VG]] = \
7889
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7890

    
7891
    return vgs
7892

    
7893
  # Required free disk space as a function of disk and swap space
7894
  req_size_dict = {
7895
    constants.DT_DISKLESS: {},
7896
    constants.DT_PLAIN: _compute(disks, 0),
7897
    # 128 MB are added for drbd metadata for each disk
7898
    constants.DT_DRBD8: _compute(disks, 128),
7899
    constants.DT_FILE: {},
7900
    constants.DT_SHARED_FILE: {},
7901
  }
7902

    
7903
  if disk_template not in req_size_dict:
7904
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7905
                                 " is unknown" %  disk_template)
7906

    
7907
  return req_size_dict[disk_template]
7908

    
7909

    
7910
def _ComputeDiskSize(disk_template, disks):
7911
  """Compute disk size requirements in the volume group
7912

7913
  """
7914
  # Required free disk space as a function of disk and swap space
7915
  req_size_dict = {
7916
    constants.DT_DISKLESS: None,
7917
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7918
    # 128 MB are added for drbd metadata for each disk
7919
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7920
    constants.DT_FILE: None,
7921
    constants.DT_SHARED_FILE: 0,
7922
    constants.DT_BLOCK: 0,
7923
  }
7924

    
7925
  if disk_template not in req_size_dict:
7926
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7927
                                 " is unknown" %  disk_template)
7928

    
7929
  return req_size_dict[disk_template]
7930

    
7931

    
7932
def _FilterVmNodes(lu, nodenames):
7933
  """Filters out non-vm_capable nodes from a list.
7934

7935
  @type lu: L{LogicalUnit}
7936
  @param lu: the logical unit for which we check
7937
  @type nodenames: list
7938
  @param nodenames: the list of nodes on which we should check
7939
  @rtype: list
7940
  @return: the list of vm-capable nodes
7941

7942
  """
7943
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7944
  return [name for name in nodenames if name not in vm_nodes]
7945

    
7946

    
7947
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7948
  """Hypervisor parameter validation.
7949

7950
  This function abstract the hypervisor parameter validation to be
7951
  used in both instance create and instance modify.
7952

7953
  @type lu: L{LogicalUnit}
7954
  @param lu: the logical unit for which we check
7955
  @type nodenames: list
7956
  @param nodenames: the list of nodes on which we should check
7957
  @type hvname: string
7958
  @param hvname: the name of the hypervisor we should use
7959
  @type hvparams: dict
7960
  @param hvparams: the parameters which we need to check
7961
  @raise errors.OpPrereqError: if the parameters are not valid
7962

7963
  """
7964
  nodenames = _FilterVmNodes(lu, nodenames)
7965
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7966
                                                  hvname,
7967
                                                  hvparams)
7968
  for node in nodenames:
7969
    info = hvinfo[node]
7970
    if info.offline:
7971
      continue
7972
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7973

    
7974

    
7975
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7976
  """OS parameters validation.
7977

7978
  @type lu: L{LogicalUnit}
7979
  @param lu: the logical unit for which we check
7980
  @type required: boolean
7981
  @param required: whether the validation should fail if the OS is not
7982
      found
7983
  @type nodenames: list
7984
  @param nodenames: the list of nodes on which we should check
7985
  @type osname: string
7986
  @param osname: the name of the hypervisor we should use
7987
  @type osparams: dict
7988
  @param osparams: the parameters which we need to check
7989
  @raise errors.OpPrereqError: if the parameters are not valid
7990

7991
  """
7992
  nodenames = _FilterVmNodes(lu, nodenames)
7993
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7994
                                   [constants.OS_VALIDATE_PARAMETERS],
7995
                                   osparams)
7996
  for node, nres in result.items():
7997
    # we don't check for offline cases since this should be run only
7998
    # against the master node and/or an instance's nodes
7999
    nres.Raise("OS Parameters validation failed on node %s" % node)
8000
    if not nres.payload:
8001
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8002
                 osname, node)
8003

    
8004

    
8005
class LUInstanceCreate(LogicalUnit):
8006
  """Create an instance.
8007

8008
  """
8009
  HPATH = "instance-add"
8010
  HTYPE = constants.HTYPE_INSTANCE
8011
  REQ_BGL = False
8012

    
8013
  def CheckArguments(self):
8014
    """Check arguments.
8015

8016
    """
8017
    # do not require name_check to ease forward/backward compatibility
8018
    # for tools
8019
    if self.op.no_install and self.op.start:
8020
      self.LogInfo("No-installation mode selected, disabling startup")
8021
      self.op.start = False
8022
    # validate/normalize the instance name
8023
    self.op.instance_name = \
8024
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8025

    
8026
    if self.op.ip_check and not self.op.name_check:
8027
      # TODO: make the ip check more flexible and not depend on the name check
8028
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8029
                                 " check", errors.ECODE_INVAL)
8030

    
8031
    # check nics' parameter names
8032
    for nic in self.op.nics:
8033
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8034

    
8035
    # check disks. parameter names and consistent adopt/no-adopt strategy
8036
    has_adopt = has_no_adopt = False
8037
    for disk in self.op.disks:
8038
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8039
      if constants.IDISK_ADOPT in disk:
8040
        has_adopt = True
8041
      else:
8042
        has_no_adopt = True
8043
    if has_adopt and has_no_adopt:
8044
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8045
                                 errors.ECODE_INVAL)
8046
    if has_adopt:
8047
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8048
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8049
                                   " '%s' disk template" %
8050
                                   self.op.disk_template,
8051
                                   errors.ECODE_INVAL)
8052
      if self.op.iallocator is not None:
8053
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8054
                                   " iallocator script", errors.ECODE_INVAL)
8055
      if self.op.mode == constants.INSTANCE_IMPORT:
8056
        raise errors.OpPrereqError("Disk adoption not allowed for"
8057
                                   " instance import", errors.ECODE_INVAL)
8058
    else:
8059
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8060
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8061
                                   " but no 'adopt' parameter given" %
8062
                                   self.op.disk_template,
8063
                                   errors.ECODE_INVAL)
8064

    
8065
    self.adopt_disks = has_adopt
8066

    
8067
    # instance name verification
8068
    if self.op.name_check:
8069
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8070
      self.op.instance_name = self.hostname1.name
8071
      # used in CheckPrereq for ip ping check
8072
      self.check_ip = self.hostname1.ip
8073
    else:
8074
      self.check_ip = None
8075

    
8076
    # file storage checks
8077
    if (self.op.file_driver and
8078
        not self.op.file_driver in constants.FILE_DRIVER):
8079
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8080
                                 self.op.file_driver, errors.ECODE_INVAL)
8081

    
8082
    if self.op.disk_template == constants.DT_FILE:
8083
      opcodes.RequireFileStorage()
8084
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8085
      opcodes.RequireSharedFileStorage()
8086

    
8087
    ### Node/iallocator related checks
8088
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8089

    
8090
    if self.op.pnode is not None:
8091
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8092
        if self.op.snode is None:
8093
          raise errors.OpPrereqError("The networked disk templates need"
8094
                                     " a mirror node", errors.ECODE_INVAL)
8095
      elif self.op.snode:
8096
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8097
                        " template")
8098
        self.op.snode = None
8099

    
8100
    self._cds = _GetClusterDomainSecret()
8101

    
8102
    if self.op.mode == constants.INSTANCE_IMPORT:
8103
      # On import force_variant must be True, because if we forced it at
8104
      # initial install, our only chance when importing it back is that it
8105
      # works again!
8106
      self.op.force_variant = True
8107

    
8108
      if self.op.no_install:
8109
        self.LogInfo("No-installation mode has no effect during import")
8110

    
8111
    elif self.op.mode == constants.INSTANCE_CREATE:
8112
      if self.op.os_type is None:
8113
        raise errors.OpPrereqError("No guest OS specified",
8114
                                   errors.ECODE_INVAL)
8115
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8116
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8117
                                   " installation" % self.op.os_type,
8118
                                   errors.ECODE_STATE)
8119
      if self.op.disk_template is None:
8120
        raise errors.OpPrereqError("No disk template specified",
8121
                                   errors.ECODE_INVAL)
8122

    
8123
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8124
      # Check handshake to ensure both clusters have the same domain secret
8125
      src_handshake = self.op.source_handshake
8126
      if not src_handshake:
8127
        raise errors.OpPrereqError("Missing source handshake",
8128
                                   errors.ECODE_INVAL)
8129

    
8130
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8131
                                                           src_handshake)
8132
      if errmsg:
8133
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8134
                                   errors.ECODE_INVAL)
8135

    
8136
      # Load and check source CA
8137
      self.source_x509_ca_pem = self.op.source_x509_ca
8138
      if not self.source_x509_ca_pem:
8139
        raise errors.OpPrereqError("Missing source X509 CA",
8140
                                   errors.ECODE_INVAL)
8141

    
8142
      try:
8143
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8144
                                                    self._cds)
8145
      except OpenSSL.crypto.Error, err:
8146
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8147
                                   (err, ), errors.ECODE_INVAL)
8148

    
8149
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8150
      if errcode is not None:
8151
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8152
                                   errors.ECODE_INVAL)
8153

    
8154
      self.source_x509_ca = cert
8155

    
8156
      src_instance_name = self.op.source_instance_name
8157
      if not src_instance_name:
8158
        raise errors.OpPrereqError("Missing source instance name",
8159
                                   errors.ECODE_INVAL)
8160

    
8161
      self.source_instance_name = \
8162
          netutils.GetHostname(name=src_instance_name).name
8163

    
8164
    else:
8165
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8166
                                 self.op.mode, errors.ECODE_INVAL)
8167

    
8168
  def ExpandNames(self):
8169
    """ExpandNames for CreateInstance.
8170

8171
    Figure out the right locks for instance creation.
8172

8173
    """
8174
    self.needed_locks = {}
8175

    
8176
    instance_name = self.op.instance_name
8177
    # this is just a preventive check, but someone might still add this
8178
    # instance in the meantime, and creation will fail at lock-add time
8179
    if instance_name in self.cfg.GetInstanceList():
8180
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8181
                                 instance_name, errors.ECODE_EXISTS)
8182

    
8183
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8184

    
8185
    if self.op.iallocator:
8186
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8187
    else:
8188
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8189
      nodelist = [self.op.pnode]
8190
      if self.op.snode is not None:
8191
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8192
        nodelist.append(self.op.snode)
8193
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8194

    
8195
    # in case of import lock the source node too
8196
    if self.op.mode == constants.INSTANCE_IMPORT:
8197
      src_node = self.op.src_node
8198
      src_path = self.op.src_path
8199

    
8200
      if src_path is None:
8201
        self.op.src_path = src_path = self.op.instance_name
8202

    
8203
      if src_node is None:
8204
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8205
        self.op.src_node = None
8206
        if os.path.isabs(src_path):
8207
          raise errors.OpPrereqError("Importing an instance from an absolute"
8208
                                     " path requires a source node option",
8209
                                     errors.ECODE_INVAL)
8210
      else:
8211
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8212
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8213
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8214
        if not os.path.isabs(src_path):
8215
          self.op.src_path = src_path = \
8216
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8217

    
8218
  def _RunAllocator(self):
8219
    """Run the allocator based on input opcode.
8220

8221
    """
8222
    nics = [n.ToDict() for n in self.nics]
8223
    ial = IAllocator(self.cfg, self.rpc,
8224
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8225
                     name=self.op.instance_name,
8226
                     disk_template=self.op.disk_template,
8227
                     tags=self.op.tags,
8228
                     os=self.op.os_type,
8229
                     vcpus=self.be_full[constants.BE_VCPUS],
8230
                     memory=self.be_full[constants.BE_MEMORY],
8231
                     disks=self.disks,
8232
                     nics=nics,
8233
                     hypervisor=self.op.hypervisor,
8234
                     )
8235

    
8236
    ial.Run(self.op.iallocator)
8237

    
8238
    if not ial.success:
8239
      raise errors.OpPrereqError("Can't compute nodes using"
8240
                                 " iallocator '%s': %s" %
8241
                                 (self.op.iallocator, ial.info),
8242
                                 errors.ECODE_NORES)
8243
    if len(ial.result) != ial.required_nodes:
8244
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8245
                                 " of nodes (%s), required %s" %
8246
                                 (self.op.iallocator, len(ial.result),
8247
                                  ial.required_nodes), errors.ECODE_FAULT)
8248
    self.op.pnode = ial.result[0]
8249
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8250
                 self.op.instance_name, self.op.iallocator,
8251
                 utils.CommaJoin(ial.result))
8252
    if ial.required_nodes == 2:
8253
      self.op.snode = ial.result[1]
8254

    
8255
  def BuildHooksEnv(self):
8256
    """Build hooks env.
8257

8258
    This runs on master, primary and secondary nodes of the instance.
8259

8260
    """
8261
    env = {
8262
      "ADD_MODE": self.op.mode,
8263
      }
8264
    if self.op.mode == constants.INSTANCE_IMPORT:
8265
      env["SRC_NODE"] = self.op.src_node
8266
      env["SRC_PATH"] = self.op.src_path
8267
      env["SRC_IMAGES"] = self.src_images
8268

    
8269
    env.update(_BuildInstanceHookEnv(
8270
      name=self.op.instance_name,
8271
      primary_node=self.op.pnode,
8272
      secondary_nodes=self.secondaries,
8273
      status=self.op.start,
8274
      os_type=self.op.os_type,
8275
      memory=self.be_full[constants.BE_MEMORY],
8276
      vcpus=self.be_full[constants.BE_VCPUS],
8277
      nics=_NICListToTuple(self, self.nics),
8278
      disk_template=self.op.disk_template,
8279
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8280
             for d in self.disks],
8281
      bep=self.be_full,
8282
      hvp=self.hv_full,
8283
      hypervisor_name=self.op.hypervisor,
8284
      tags=self.op.tags,
8285
    ))
8286

    
8287
    return env
8288

    
8289
  def BuildHooksNodes(self):
8290
    """Build hooks nodes.
8291

8292
    """
8293
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8294
    return nl, nl
8295

    
8296
  def _ReadExportInfo(self):
8297
    """Reads the export information from disk.
8298

8299
    It will override the opcode source node and path with the actual
8300
    information, if these two were not specified before.
8301

8302
    @return: the export information
8303

8304
    """
8305
    assert self.op.mode == constants.INSTANCE_IMPORT
8306

    
8307
    src_node = self.op.src_node
8308
    src_path = self.op.src_path
8309

    
8310
    if src_node is None:
8311
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8312
      exp_list = self.rpc.call_export_list(locked_nodes)
8313
      found = False
8314
      for node in exp_list:
8315
        if exp_list[node].fail_msg:
8316
          continue
8317
        if src_path in exp_list[node].payload:
8318
          found = True
8319
          self.op.src_node = src_node = node
8320
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8321
                                                       src_path)
8322
          break
8323
      if not found:
8324
        raise errors.OpPrereqError("No export found for relative path %s" %
8325
                                    src_path, errors.ECODE_INVAL)
8326

    
8327
    _CheckNodeOnline(self, src_node)
8328
    result = self.rpc.call_export_info(src_node, src_path)
8329
    result.Raise("No export or invalid export found in dir %s" % src_path)
8330

    
8331
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8332
    if not export_info.has_section(constants.INISECT_EXP):
8333
      raise errors.ProgrammerError("Corrupted export config",
8334
                                   errors.ECODE_ENVIRON)
8335

    
8336
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8337
    if (int(ei_version) != constants.EXPORT_VERSION):
8338
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8339
                                 (ei_version, constants.EXPORT_VERSION),
8340
                                 errors.ECODE_ENVIRON)
8341
    return export_info
8342

    
8343
  def _ReadExportParams(self, einfo):
8344
    """Use export parameters as defaults.
8345

8346
    In case the opcode doesn't specify (as in override) some instance
8347
    parameters, then try to use them from the export information, if
8348
    that declares them.
8349

8350
    """
8351
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8352

    
8353
    if self.op.disk_template is None:
8354
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8355
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8356
                                          "disk_template")
8357
      else:
8358
        raise errors.OpPrereqError("No disk template specified and the export"
8359
                                   " is missing the disk_template information",
8360
                                   errors.ECODE_INVAL)
8361

    
8362
    if not self.op.disks:
8363
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8364
        disks = []
8365
        # TODO: import the disk iv_name too
8366
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8367
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8368
          disks.append({constants.IDISK_SIZE: disk_sz})
8369
        self.op.disks = disks
8370
      else:
8371
        raise errors.OpPrereqError("No disk info specified and the export"
8372
                                   " is missing the disk information",
8373
                                   errors.ECODE_INVAL)
8374

    
8375
    if (not self.op.nics and
8376
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8377
      nics = []
8378
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8379
        ndict = {}
8380
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8381
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8382
          ndict[name] = v
8383
        nics.append(ndict)
8384
      self.op.nics = nics
8385

    
8386
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8387
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8388

    
8389
    if (self.op.hypervisor is None and
8390
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8391
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8392

    
8393
    if einfo.has_section(constants.INISECT_HYP):
8394
      # use the export parameters but do not override the ones
8395
      # specified by the user
8396
      for name, value in einfo.items(constants.INISECT_HYP):
8397
        if name not in self.op.hvparams:
8398
          self.op.hvparams[name] = value
8399

    
8400
    if einfo.has_section(constants.INISECT_BEP):
8401
      # use the parameters, without overriding
8402
      for name, value in einfo.items(constants.INISECT_BEP):
8403
        if name not in self.op.beparams:
8404
          self.op.beparams[name] = value
8405
    else:
8406
      # try to read the parameters old style, from the main section
8407
      for name in constants.BES_PARAMETERS:
8408
        if (name not in self.op.beparams and
8409
            einfo.has_option(constants.INISECT_INS, name)):
8410
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8411

    
8412
    if einfo.has_section(constants.INISECT_OSP):
8413
      # use the parameters, without overriding
8414
      for name, value in einfo.items(constants.INISECT_OSP):
8415
        if name not in self.op.osparams:
8416
          self.op.osparams[name] = value
8417

    
8418
  def _RevertToDefaults(self, cluster):
8419
    """Revert the instance parameters to the default values.
8420

8421
    """
8422
    # hvparams
8423
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8424
    for name in self.op.hvparams.keys():
8425
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8426
        del self.op.hvparams[name]
8427
    # beparams
8428
    be_defs = cluster.SimpleFillBE({})
8429
    for name in self.op.beparams.keys():
8430
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8431
        del self.op.beparams[name]
8432
    # nic params
8433
    nic_defs = cluster.SimpleFillNIC({})
8434
    for nic in self.op.nics:
8435
      for name in constants.NICS_PARAMETERS:
8436
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8437
          del nic[name]
8438
    # osparams
8439
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8440
    for name in self.op.osparams.keys():
8441
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8442
        del self.op.osparams[name]
8443

    
8444
  def _CalculateFileStorageDir(self):
8445
    """Calculate final instance file storage dir.
8446

8447
    """
8448
    # file storage dir calculation/check
8449
    self.instance_file_storage_dir = None
8450
    if self.op.disk_template in constants.DTS_FILEBASED:
8451
      # build the full file storage dir path
8452
      joinargs = []
8453

    
8454
      if self.op.disk_template == constants.DT_SHARED_FILE:
8455
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8456
      else:
8457
        get_fsd_fn = self.cfg.GetFileStorageDir
8458

    
8459
      cfg_storagedir = get_fsd_fn()
8460
      if not cfg_storagedir:
8461
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8462
      joinargs.append(cfg_storagedir)
8463

    
8464
      if self.op.file_storage_dir is not None:
8465
        joinargs.append(self.op.file_storage_dir)
8466

    
8467
      joinargs.append(self.op.instance_name)
8468

    
8469
      # pylint: disable-msg=W0142
8470
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8471

    
8472
  def CheckPrereq(self):
8473
    """Check prerequisites.
8474

8475
    """
8476
    self._CalculateFileStorageDir()
8477

    
8478
    if self.op.mode == constants.INSTANCE_IMPORT:
8479
      export_info = self._ReadExportInfo()
8480
      self._ReadExportParams(export_info)
8481

    
8482
    if (not self.cfg.GetVGName() and
8483
        self.op.disk_template not in constants.DTS_NOT_LVM):
8484
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8485
                                 " instances", errors.ECODE_STATE)
8486

    
8487
    if self.op.hypervisor is None:
8488
      self.op.hypervisor = self.cfg.GetHypervisorType()
8489

    
8490
    cluster = self.cfg.GetClusterInfo()
8491
    enabled_hvs = cluster.enabled_hypervisors
8492
    if self.op.hypervisor not in enabled_hvs:
8493
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8494
                                 " cluster (%s)" % (self.op.hypervisor,
8495
                                  ",".join(enabled_hvs)),
8496
                                 errors.ECODE_STATE)
8497

    
8498
    # Check tag validity
8499
    for tag in self.op.tags:
8500
      objects.TaggableObject.ValidateTag(tag)
8501

    
8502
    # check hypervisor parameter syntax (locally)
8503
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8504
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8505
                                      self.op.hvparams)
8506
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8507
    hv_type.CheckParameterSyntax(filled_hvp)
8508
    self.hv_full = filled_hvp
8509
    # check that we don't specify global parameters on an instance
8510
    _CheckGlobalHvParams(self.op.hvparams)
8511

    
8512
    # fill and remember the beparams dict
8513
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8514
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8515

    
8516
    # build os parameters
8517
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8518

    
8519
    # now that hvp/bep are in final format, let's reset to defaults,
8520
    # if told to do so
8521
    if self.op.identify_defaults:
8522
      self._RevertToDefaults(cluster)
8523

    
8524
    # NIC buildup
8525
    self.nics = []
8526
    for idx, nic in enumerate(self.op.nics):
8527
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8528
      nic_mode = nic_mode_req
8529
      if nic_mode is None:
8530
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8531

    
8532
      # in routed mode, for the first nic, the default ip is 'auto'
8533
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8534
        default_ip_mode = constants.VALUE_AUTO
8535
      else:
8536
        default_ip_mode = constants.VALUE_NONE
8537

    
8538
      # ip validity checks
8539
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8540
      if ip is None or ip.lower() == constants.VALUE_NONE:
8541
        nic_ip = None
8542
      elif ip.lower() == constants.VALUE_AUTO:
8543
        if not self.op.name_check:
8544
          raise errors.OpPrereqError("IP address set to auto but name checks"
8545
                                     " have been skipped",
8546
                                     errors.ECODE_INVAL)
8547
        nic_ip = self.hostname1.ip
8548
      else:
8549
        if not netutils.IPAddress.IsValid(ip):
8550
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8551
                                     errors.ECODE_INVAL)
8552
        nic_ip = ip
8553

    
8554
      # TODO: check the ip address for uniqueness
8555
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8556
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8557
                                   errors.ECODE_INVAL)
8558

    
8559
      # MAC address verification
8560
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8561
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8562
        mac = utils.NormalizeAndValidateMac(mac)
8563

    
8564
        try:
8565
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8566
        except errors.ReservationError:
8567
          raise errors.OpPrereqError("MAC address %s already in use"
8568
                                     " in cluster" % mac,
8569
                                     errors.ECODE_NOTUNIQUE)
8570

    
8571
      #  Build nic parameters
8572
      link = nic.get(constants.INIC_LINK, None)
8573
      nicparams = {}
8574
      if nic_mode_req:
8575
        nicparams[constants.NIC_MODE] = nic_mode_req
8576
      if link:
8577
        nicparams[constants.NIC_LINK] = link
8578

    
8579
      check_params = cluster.SimpleFillNIC(nicparams)
8580
      objects.NIC.CheckParameterSyntax(check_params)
8581
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8582

    
8583
    # disk checks/pre-build
8584
    default_vg = self.cfg.GetVGName()
8585
    self.disks = []
8586
    for disk in self.op.disks:
8587
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8588
      if mode not in constants.DISK_ACCESS_SET:
8589
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8590
                                   mode, errors.ECODE_INVAL)
8591
      size = disk.get(constants.IDISK_SIZE, None)
8592
      if size is None:
8593
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8594
      try:
8595
        size = int(size)
8596
      except (TypeError, ValueError):
8597
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8598
                                   errors.ECODE_INVAL)
8599

    
8600
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8601
      new_disk = {
8602
        constants.IDISK_SIZE: size,
8603
        constants.IDISK_MODE: mode,
8604
        constants.IDISK_VG: data_vg,
8605
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8606
        }
8607
      if constants.IDISK_ADOPT in disk:
8608
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8609
      self.disks.append(new_disk)
8610

    
8611
    if self.op.mode == constants.INSTANCE_IMPORT:
8612

    
8613
      # Check that the new instance doesn't have less disks than the export
8614
      instance_disks = len(self.disks)
8615
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8616
      if instance_disks < export_disks:
8617
        raise errors.OpPrereqError("Not enough disks to import."
8618
                                   " (instance: %d, export: %d)" %
8619
                                   (instance_disks, export_disks),
8620
                                   errors.ECODE_INVAL)
8621

    
8622
      disk_images = []
8623
      for idx in range(export_disks):
8624
        option = "disk%d_dump" % idx
8625
        if export_info.has_option(constants.INISECT_INS, option):
8626
          # FIXME: are the old os-es, disk sizes, etc. useful?
8627
          export_name = export_info.get(constants.INISECT_INS, option)
8628
          image = utils.PathJoin(self.op.src_path, export_name)
8629
          disk_images.append(image)
8630
        else:
8631
          disk_images.append(False)
8632

    
8633
      self.src_images = disk_images
8634

    
8635
      old_name = export_info.get(constants.INISECT_INS, "name")
8636
      try:
8637
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8638
      except (TypeError, ValueError), err:
8639
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8640
                                   " an integer: %s" % str(err),
8641
                                   errors.ECODE_STATE)
8642
      if self.op.instance_name == old_name:
8643
        for idx, nic in enumerate(self.nics):
8644
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8645
            nic_mac_ini = "nic%d_mac" % idx
8646
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8647

    
8648
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8649

    
8650
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8651
    if self.op.ip_check:
8652
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8653
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8654
                                   (self.check_ip, self.op.instance_name),
8655
                                   errors.ECODE_NOTUNIQUE)
8656

    
8657
    #### mac address generation
8658
    # By generating here the mac address both the allocator and the hooks get
8659
    # the real final mac address rather than the 'auto' or 'generate' value.
8660
    # There is a race condition between the generation and the instance object
8661
    # creation, which means that we know the mac is valid now, but we're not
8662
    # sure it will be when we actually add the instance. If things go bad
8663
    # adding the instance will abort because of a duplicate mac, and the
8664
    # creation job will fail.
8665
    for nic in self.nics:
8666
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8667
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8668

    
8669
    #### allocator run
8670

    
8671
    if self.op.iallocator is not None:
8672
      self._RunAllocator()
8673

    
8674
    #### node related checks
8675

    
8676
    # check primary node
8677
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8678
    assert self.pnode is not None, \
8679
      "Cannot retrieve locked node %s" % self.op.pnode
8680
    if pnode.offline:
8681
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8682
                                 pnode.name, errors.ECODE_STATE)
8683
    if pnode.drained:
8684
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8685
                                 pnode.name, errors.ECODE_STATE)
8686
    if not pnode.vm_capable:
8687
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8688
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8689

    
8690
    self.secondaries = []
8691

    
8692
    # mirror node verification
8693
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8694
      if self.op.snode == pnode.name:
8695
        raise errors.OpPrereqError("The secondary node cannot be the"
8696
                                   " primary node", errors.ECODE_INVAL)
8697
      _CheckNodeOnline(self, self.op.snode)
8698
      _CheckNodeNotDrained(self, self.op.snode)
8699
      _CheckNodeVmCapable(self, self.op.snode)
8700
      self.secondaries.append(self.op.snode)
8701

    
8702
    nodenames = [pnode.name] + self.secondaries
8703

    
8704
    if not self.adopt_disks:
8705
      # Check lv size requirements, if not adopting
8706
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8707
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8708

    
8709
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8710
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8711
                                disk[constants.IDISK_ADOPT])
8712
                     for disk in self.disks])
8713
      if len(all_lvs) != len(self.disks):
8714
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8715
                                   errors.ECODE_INVAL)
8716
      for lv_name in all_lvs:
8717
        try:
8718
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8719
          # to ReserveLV uses the same syntax
8720
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8721
        except errors.ReservationError:
8722
          raise errors.OpPrereqError("LV named %s used by another instance" %
8723
                                     lv_name, errors.ECODE_NOTUNIQUE)
8724

    
8725
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8726
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8727

    
8728
      node_lvs = self.rpc.call_lv_list([pnode.name],
8729
                                       vg_names.payload.keys())[pnode.name]
8730
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8731
      node_lvs = node_lvs.payload
8732

    
8733
      delta = all_lvs.difference(node_lvs.keys())
8734
      if delta:
8735
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8736
                                   utils.CommaJoin(delta),
8737
                                   errors.ECODE_INVAL)
8738
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8739
      if online_lvs:
8740
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8741
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8742
                                   errors.ECODE_STATE)
8743
      # update the size of disk based on what is found
8744
      for dsk in self.disks:
8745
        dsk[constants.IDISK_SIZE] = \
8746
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8747
                                        dsk[constants.IDISK_ADOPT])][0]))
8748

    
8749
    elif self.op.disk_template == constants.DT_BLOCK:
8750
      # Normalize and de-duplicate device paths
8751
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8752
                       for disk in self.disks])
8753
      if len(all_disks) != len(self.disks):
8754
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8755
                                   errors.ECODE_INVAL)
8756
      baddisks = [d for d in all_disks
8757
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8758
      if baddisks:
8759
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8760
                                   " cannot be adopted" %
8761
                                   (", ".join(baddisks),
8762
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8763
                                   errors.ECODE_INVAL)
8764

    
8765
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8766
                                            list(all_disks))[pnode.name]
8767
      node_disks.Raise("Cannot get block device information from node %s" %
8768
                       pnode.name)
8769
      node_disks = node_disks.payload
8770
      delta = all_disks.difference(node_disks.keys())
8771
      if delta:
8772
        raise errors.OpPrereqError("Missing block device(s): %s" %
8773
                                   utils.CommaJoin(delta),
8774
                                   errors.ECODE_INVAL)
8775
      for dsk in self.disks:
8776
        dsk[constants.IDISK_SIZE] = \
8777
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8778

    
8779
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8780

    
8781
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8782
    # check OS parameters (remotely)
8783
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8784

    
8785
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8786

    
8787
    # memory check on primary node
8788
    if self.op.start:
8789
      _CheckNodeFreeMemory(self, self.pnode.name,
8790
                           "creating instance %s" % self.op.instance_name,
8791
                           self.be_full[constants.BE_MEMORY],
8792
                           self.op.hypervisor)
8793

    
8794
    self.dry_run_result = list(nodenames)
8795

    
8796
  def Exec(self, feedback_fn):
8797
    """Create and add the instance to the cluster.
8798

8799
    """
8800
    instance = self.op.instance_name
8801
    pnode_name = self.pnode.name
8802

    
8803
    ht_kind = self.op.hypervisor
8804
    if ht_kind in constants.HTS_REQ_PORT:
8805
      network_port = self.cfg.AllocatePort()
8806
    else:
8807
      network_port = None
8808

    
8809
    disks = _GenerateDiskTemplate(self,
8810
                                  self.op.disk_template,
8811
                                  instance, pnode_name,
8812
                                  self.secondaries,
8813
                                  self.disks,
8814
                                  self.instance_file_storage_dir,
8815
                                  self.op.file_driver,
8816
                                  0,
8817
                                  feedback_fn)
8818

    
8819
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8820
                            primary_node=pnode_name,
8821
                            nics=self.nics, disks=disks,
8822
                            disk_template=self.op.disk_template,
8823
                            admin_up=False,
8824
                            network_port=network_port,
8825
                            beparams=self.op.beparams,
8826
                            hvparams=self.op.hvparams,
8827
                            hypervisor=self.op.hypervisor,
8828
                            osparams=self.op.osparams,
8829
                            )
8830

    
8831
    if self.op.tags:
8832
      for tag in self.op.tags:
8833
        iobj.AddTag(tag)
8834

    
8835
    if self.adopt_disks:
8836
      if self.op.disk_template == constants.DT_PLAIN:
8837
        # rename LVs to the newly-generated names; we need to construct
8838
        # 'fake' LV disks with the old data, plus the new unique_id
8839
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8840
        rename_to = []
8841
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8842
          rename_to.append(t_dsk.logical_id)
8843
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8844
          self.cfg.SetDiskID(t_dsk, pnode_name)
8845
        result = self.rpc.call_blockdev_rename(pnode_name,
8846
                                               zip(tmp_disks, rename_to))
8847
        result.Raise("Failed to rename adoped LVs")
8848
    else:
8849
      feedback_fn("* creating instance disks...")
8850
      try:
8851
        _CreateDisks(self, iobj)
8852
      except errors.OpExecError:
8853
        self.LogWarning("Device creation failed, reverting...")
8854
        try:
8855
          _RemoveDisks(self, iobj)
8856
        finally:
8857
          self.cfg.ReleaseDRBDMinors(instance)
8858
          raise
8859

    
8860
    feedback_fn("adding instance %s to cluster config" % instance)
8861

    
8862
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8863

    
8864
    # Declare that we don't want to remove the instance lock anymore, as we've
8865
    # added the instance to the config
8866
    del self.remove_locks[locking.LEVEL_INSTANCE]
8867

    
8868
    if self.op.mode == constants.INSTANCE_IMPORT:
8869
      # Release unused nodes
8870
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8871
    else:
8872
      # Release all nodes
8873
      _ReleaseLocks(self, locking.LEVEL_NODE)
8874

    
8875
    disk_abort = False
8876
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8877
      feedback_fn("* wiping instance disks...")
8878
      try:
8879
        _WipeDisks(self, iobj)
8880
      except errors.OpExecError, err:
8881
        logging.exception("Wiping disks failed")
8882
        self.LogWarning("Wiping instance disks failed (%s)", err)
8883
        disk_abort = True
8884

    
8885
    if disk_abort:
8886
      # Something is already wrong with the disks, don't do anything else
8887
      pass
8888
    elif self.op.wait_for_sync:
8889
      disk_abort = not _WaitForSync(self, iobj)
8890
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8891
      # make sure the disks are not degraded (still sync-ing is ok)
8892
      feedback_fn("* checking mirrors status")
8893
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8894
    else:
8895
      disk_abort = False
8896

    
8897
    if disk_abort:
8898
      _RemoveDisks(self, iobj)
8899
      self.cfg.RemoveInstance(iobj.name)
8900
      # Make sure the instance lock gets removed
8901
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8902
      raise errors.OpExecError("There are some degraded disks for"
8903
                               " this instance")
8904

    
8905
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8906
      if self.op.mode == constants.INSTANCE_CREATE:
8907
        if not self.op.no_install:
8908
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8909
                        not self.op.wait_for_sync)
8910
          if pause_sync:
8911
            feedback_fn("* pausing disk sync to install instance OS")
8912
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8913
                                                              iobj.disks, True)
8914
            for idx, success in enumerate(result.payload):
8915
              if not success:
8916
                logging.warn("pause-sync of instance %s for disk %d failed",
8917
                             instance, idx)
8918

    
8919
          feedback_fn("* running the instance OS create scripts...")
8920
          # FIXME: pass debug option from opcode to backend
8921
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8922
                                                 self.op.debug_level)
8923
          if pause_sync:
8924
            feedback_fn("* resuming disk sync")
8925
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8926
                                                              iobj.disks, False)
8927
            for idx, success in enumerate(result.payload):
8928
              if not success:
8929
                logging.warn("resume-sync of instance %s for disk %d failed",
8930
                             instance, idx)
8931

    
8932
          result.Raise("Could not add os for instance %s"
8933
                       " on node %s" % (instance, pnode_name))
8934

    
8935
      elif self.op.mode == constants.INSTANCE_IMPORT:
8936
        feedback_fn("* running the instance OS import scripts...")
8937

    
8938
        transfers = []
8939

    
8940
        for idx, image in enumerate(self.src_images):
8941
          if not image:
8942
            continue
8943

    
8944
          # FIXME: pass debug option from opcode to backend
8945
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8946
                                             constants.IEIO_FILE, (image, ),
8947
                                             constants.IEIO_SCRIPT,
8948
                                             (iobj.disks[idx], idx),
8949
                                             None)
8950
          transfers.append(dt)
8951

    
8952
        import_result = \
8953
          masterd.instance.TransferInstanceData(self, feedback_fn,
8954
                                                self.op.src_node, pnode_name,
8955
                                                self.pnode.secondary_ip,
8956
                                                iobj, transfers)
8957
        if not compat.all(import_result):
8958
          self.LogWarning("Some disks for instance %s on node %s were not"
8959
                          " imported successfully" % (instance, pnode_name))
8960

    
8961
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8962
        feedback_fn("* preparing remote import...")
8963
        # The source cluster will stop the instance before attempting to make a
8964
        # connection. In some cases stopping an instance can take a long time,
8965
        # hence the shutdown timeout is added to the connection timeout.
8966
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8967
                           self.op.source_shutdown_timeout)
8968
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8969

    
8970
        assert iobj.primary_node == self.pnode.name
8971
        disk_results = \
8972
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8973
                                        self.source_x509_ca,
8974
                                        self._cds, timeouts)
8975
        if not compat.all(disk_results):
8976
          # TODO: Should the instance still be started, even if some disks
8977
          # failed to import (valid for local imports, too)?
8978
          self.LogWarning("Some disks for instance %s on node %s were not"
8979
                          " imported successfully" % (instance, pnode_name))
8980

    
8981
        # Run rename script on newly imported instance
8982
        assert iobj.name == instance
8983
        feedback_fn("Running rename script for %s" % instance)
8984
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8985
                                                   self.source_instance_name,
8986
                                                   self.op.debug_level)
8987
        if result.fail_msg:
8988
          self.LogWarning("Failed to run rename script for %s on node"
8989
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8990

    
8991
      else:
8992
        # also checked in the prereq part
8993
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8994
                                     % self.op.mode)
8995

    
8996
    if self.op.start:
8997
      iobj.admin_up = True
8998
      self.cfg.Update(iobj, feedback_fn)
8999
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9000
      feedback_fn("* starting instance...")
9001
      result = self.rpc.call_instance_start(pnode_name, iobj,
9002
                                            None, None, False)
9003
      result.Raise("Could not start instance")
9004

    
9005
    return list(iobj.all_nodes)
9006

    
9007

    
9008
class LUInstanceConsole(NoHooksLU):
9009
  """Connect to an instance's console.
9010

9011
  This is somewhat special in that it returns the command line that
9012
  you need to run on the master node in order to connect to the
9013
  console.
9014

9015
  """
9016
  REQ_BGL = False
9017

    
9018
  def ExpandNames(self):
9019
    self._ExpandAndLockInstance()
9020

    
9021
  def CheckPrereq(self):
9022
    """Check prerequisites.
9023

9024
    This checks that the instance is in the cluster.
9025

9026
    """
9027
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9028
    assert self.instance is not None, \
9029
      "Cannot retrieve locked instance %s" % self.op.instance_name
9030
    _CheckNodeOnline(self, self.instance.primary_node)
9031

    
9032
  def Exec(self, feedback_fn):
9033
    """Connect to the console of an instance
9034

9035
    """
9036
    instance = self.instance
9037
    node = instance.primary_node
9038

    
9039
    node_insts = self.rpc.call_instance_list([node],
9040
                                             [instance.hypervisor])[node]
9041
    node_insts.Raise("Can't get node information from %s" % node)
9042

    
9043
    if instance.name not in node_insts.payload:
9044
      if instance.admin_up:
9045
        state = constants.INSTST_ERRORDOWN
9046
      else:
9047
        state = constants.INSTST_ADMINDOWN
9048
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9049
                               (instance.name, state))
9050

    
9051
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9052

    
9053
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9054

    
9055

    
9056
def _GetInstanceConsole(cluster, instance):
9057
  """Returns console information for an instance.
9058

9059
  @type cluster: L{objects.Cluster}
9060
  @type instance: L{objects.Instance}
9061
  @rtype: dict
9062

9063
  """
9064
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9065
  # beparams and hvparams are passed separately, to avoid editing the
9066
  # instance and then saving the defaults in the instance itself.
9067
  hvparams = cluster.FillHV(instance)
9068
  beparams = cluster.FillBE(instance)
9069
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9070

    
9071
  assert console.instance == instance.name
9072
  assert console.Validate()
9073

    
9074
  return console.ToDict()
9075

    
9076

    
9077
class LUInstanceReplaceDisks(LogicalUnit):
9078
  """Replace the disks of an instance.
9079

9080
  """
9081
  HPATH = "mirrors-replace"
9082
  HTYPE = constants.HTYPE_INSTANCE
9083
  REQ_BGL = False
9084

    
9085
  def CheckArguments(self):
9086
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9087
                                  self.op.iallocator)
9088

    
9089
  def ExpandNames(self):
9090
    self._ExpandAndLockInstance()
9091

    
9092
    assert locking.LEVEL_NODE not in self.needed_locks
9093
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9094

    
9095
    assert self.op.iallocator is None or self.op.remote_node is None, \
9096
      "Conflicting options"
9097

    
9098
    if self.op.remote_node is not None:
9099
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9100

    
9101
      # Warning: do not remove the locking of the new secondary here
9102
      # unless DRBD8.AddChildren is changed to work in parallel;
9103
      # currently it doesn't since parallel invocations of
9104
      # FindUnusedMinor will conflict
9105
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9106
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9107
    else:
9108
      self.needed_locks[locking.LEVEL_NODE] = []
9109
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9110

    
9111
      if self.op.iallocator is not None:
9112
        # iallocator will select a new node in the same group
9113
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9114

    
9115
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9116
                                   self.op.iallocator, self.op.remote_node,
9117
                                   self.op.disks, False, self.op.early_release)
9118

    
9119
    self.tasklets = [self.replacer]
9120

    
9121
  def DeclareLocks(self, level):
9122
    if level == locking.LEVEL_NODEGROUP:
9123
      assert self.op.remote_node is None
9124
      assert self.op.iallocator is not None
9125
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9126

    
9127
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9128
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9129
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9130

    
9131
    elif level == locking.LEVEL_NODE:
9132
      if self.op.iallocator is not None:
9133
        assert self.op.remote_node is None
9134
        assert not self.needed_locks[locking.LEVEL_NODE]
9135

    
9136
        # Lock member nodes of all locked groups
9137
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9138
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9139
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9140
      else:
9141
        self._LockInstancesNodes()
9142

    
9143
  def BuildHooksEnv(self):
9144
    """Build hooks env.
9145

9146
    This runs on the master, the primary and all the secondaries.
9147

9148
    """
9149
    instance = self.replacer.instance
9150
    env = {
9151
      "MODE": self.op.mode,
9152
      "NEW_SECONDARY": self.op.remote_node,
9153
      "OLD_SECONDARY": instance.secondary_nodes[0],
9154
      }
9155
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9156
    return env
9157

    
9158
  def BuildHooksNodes(self):
9159
    """Build hooks nodes.
9160

9161
    """
9162
    instance = self.replacer.instance
9163
    nl = [
9164
      self.cfg.GetMasterNode(),
9165
      instance.primary_node,
9166
      ]
9167
    if self.op.remote_node is not None:
9168
      nl.append(self.op.remote_node)
9169
    return nl, nl
9170

    
9171
  def CheckPrereq(self):
9172
    """Check prerequisites.
9173

9174
    """
9175
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9176
            self.op.iallocator is None)
9177

    
9178
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9179
    if owned_groups:
9180
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9181

    
9182
    return LogicalUnit.CheckPrereq(self)
9183

    
9184

    
9185
class TLReplaceDisks(Tasklet):
9186
  """Replaces disks for an instance.
9187

9188
  Note: Locking is not within the scope of this class.
9189

9190
  """
9191
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9192
               disks, delay_iallocator, early_release):
9193
    """Initializes this class.
9194

9195
    """
9196
    Tasklet.__init__(self, lu)
9197

    
9198
    # Parameters
9199
    self.instance_name = instance_name
9200
    self.mode = mode
9201
    self.iallocator_name = iallocator_name
9202
    self.remote_node = remote_node
9203
    self.disks = disks
9204
    self.delay_iallocator = delay_iallocator
9205
    self.early_release = early_release
9206

    
9207
    # Runtime data
9208
    self.instance = None
9209
    self.new_node = None
9210
    self.target_node = None
9211
    self.other_node = None
9212
    self.remote_node_info = None
9213
    self.node_secondary_ip = None
9214

    
9215
  @staticmethod
9216
  def CheckArguments(mode, remote_node, iallocator):
9217
    """Helper function for users of this class.
9218

9219
    """
9220
    # check for valid parameter combination
9221
    if mode == constants.REPLACE_DISK_CHG:
9222
      if remote_node is None and iallocator is None:
9223
        raise errors.OpPrereqError("When changing the secondary either an"
9224
                                   " iallocator script must be used or the"
9225
                                   " new node given", errors.ECODE_INVAL)
9226

    
9227
      if remote_node is not None and iallocator is not None:
9228
        raise errors.OpPrereqError("Give either the iallocator or the new"
9229
                                   " secondary, not both", errors.ECODE_INVAL)
9230

    
9231
    elif remote_node is not None or iallocator is not None:
9232
      # Not replacing the secondary
9233
      raise errors.OpPrereqError("The iallocator and new node options can"
9234
                                 " only be used when changing the"
9235
                                 " secondary node", errors.ECODE_INVAL)
9236

    
9237
  @staticmethod
9238
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9239
    """Compute a new secondary node using an IAllocator.
9240

9241
    """
9242
    ial = IAllocator(lu.cfg, lu.rpc,
9243
                     mode=constants.IALLOCATOR_MODE_RELOC,
9244
                     name=instance_name,
9245
                     relocate_from=list(relocate_from))
9246

    
9247
    ial.Run(iallocator_name)
9248

    
9249
    if not ial.success:
9250
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9251
                                 " %s" % (iallocator_name, ial.info),
9252
                                 errors.ECODE_NORES)
9253

    
9254
    if len(ial.result) != ial.required_nodes:
9255
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9256
                                 " of nodes (%s), required %s" %
9257
                                 (iallocator_name,
9258
                                  len(ial.result), ial.required_nodes),
9259
                                 errors.ECODE_FAULT)
9260

    
9261
    remote_node_name = ial.result[0]
9262

    
9263
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9264
               instance_name, remote_node_name)
9265

    
9266
    return remote_node_name
9267

    
9268
  def _FindFaultyDisks(self, node_name):
9269
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9270
                                    node_name, True)
9271

    
9272
  def _CheckDisksActivated(self, instance):
9273
    """Checks if the instance disks are activated.
9274

9275
    @param instance: The instance to check disks
9276
    @return: True if they are activated, False otherwise
9277

9278
    """
9279
    nodes = instance.all_nodes
9280

    
9281
    for idx, dev in enumerate(instance.disks):
9282
      for node in nodes:
9283
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9284
        self.cfg.SetDiskID(dev, node)
9285

    
9286
        result = self.rpc.call_blockdev_find(node, dev)
9287

    
9288
        if result.offline:
9289
          continue
9290
        elif result.fail_msg or not result.payload:
9291
          return False
9292

    
9293
    return True
9294

    
9295
  def CheckPrereq(self):
9296
    """Check prerequisites.
9297

9298
    This checks that the instance is in the cluster.
9299

9300
    """
9301
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9302
    assert instance is not None, \
9303
      "Cannot retrieve locked instance %s" % self.instance_name
9304

    
9305
    if instance.disk_template != constants.DT_DRBD8:
9306
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9307
                                 " instances", errors.ECODE_INVAL)
9308

    
9309
    if len(instance.secondary_nodes) != 1:
9310
      raise errors.OpPrereqError("The instance has a strange layout,"
9311
                                 " expected one secondary but found %d" %
9312
                                 len(instance.secondary_nodes),
9313
                                 errors.ECODE_FAULT)
9314

    
9315
    if not self.delay_iallocator:
9316
      self._CheckPrereq2()
9317

    
9318
  def _CheckPrereq2(self):
9319
    """Check prerequisites, second part.
9320

9321
    This function should always be part of CheckPrereq. It was separated and is
9322
    now called from Exec because during node evacuation iallocator was only
9323
    called with an unmodified cluster model, not taking planned changes into
9324
    account.
9325

9326
    """
9327
    instance = self.instance
9328
    secondary_node = instance.secondary_nodes[0]
9329

    
9330
    if self.iallocator_name is None:
9331
      remote_node = self.remote_node
9332
    else:
9333
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9334
                                       instance.name, instance.secondary_nodes)
9335

    
9336
    if remote_node is None:
9337
      self.remote_node_info = None
9338
    else:
9339
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9340
             "Remote node '%s' is not locked" % remote_node
9341

    
9342
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9343
      assert self.remote_node_info is not None, \
9344
        "Cannot retrieve locked node %s" % remote_node
9345

    
9346
    if remote_node == self.instance.primary_node:
9347
      raise errors.OpPrereqError("The specified node is the primary node of"
9348
                                 " the instance", errors.ECODE_INVAL)
9349

    
9350
    if remote_node == secondary_node:
9351
      raise errors.OpPrereqError("The specified node is already the"
9352
                                 " secondary node of the instance",
9353
                                 errors.ECODE_INVAL)
9354

    
9355
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9356
                                    constants.REPLACE_DISK_CHG):
9357
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9358
                                 errors.ECODE_INVAL)
9359

    
9360
    if self.mode == constants.REPLACE_DISK_AUTO:
9361
      if not self._CheckDisksActivated(instance):
9362
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9363
                                   " first" % self.instance_name,
9364
                                   errors.ECODE_STATE)
9365
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9366
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9367

    
9368
      if faulty_primary and faulty_secondary:
9369
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9370
                                   " one node and can not be repaired"
9371
                                   " automatically" % self.instance_name,
9372
                                   errors.ECODE_STATE)
9373

    
9374
      if faulty_primary:
9375
        self.disks = faulty_primary
9376
        self.target_node = instance.primary_node
9377
        self.other_node = secondary_node
9378
        check_nodes = [self.target_node, self.other_node]
9379
      elif faulty_secondary:
9380
        self.disks = faulty_secondary
9381
        self.target_node = secondary_node
9382
        self.other_node = instance.primary_node
9383
        check_nodes = [self.target_node, self.other_node]
9384
      else:
9385
        self.disks = []
9386
        check_nodes = []
9387

    
9388
    else:
9389
      # Non-automatic modes
9390
      if self.mode == constants.REPLACE_DISK_PRI:
9391
        self.target_node = instance.primary_node
9392
        self.other_node = secondary_node
9393
        check_nodes = [self.target_node, self.other_node]
9394

    
9395
      elif self.mode == constants.REPLACE_DISK_SEC:
9396
        self.target_node = secondary_node
9397
        self.other_node = instance.primary_node
9398
        check_nodes = [self.target_node, self.other_node]
9399

    
9400
      elif self.mode == constants.REPLACE_DISK_CHG:
9401
        self.new_node = remote_node
9402
        self.other_node = instance.primary_node
9403
        self.target_node = secondary_node
9404
        check_nodes = [self.new_node, self.other_node]
9405

    
9406
        _CheckNodeNotDrained(self.lu, remote_node)
9407
        _CheckNodeVmCapable(self.lu, remote_node)
9408

    
9409
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9410
        assert old_node_info is not None
9411
        if old_node_info.offline and not self.early_release:
9412
          # doesn't make sense to delay the release
9413
          self.early_release = True
9414
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9415
                          " early-release mode", secondary_node)
9416

    
9417
      else:
9418
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9419
                                     self.mode)
9420

    
9421
      # If not specified all disks should be replaced
9422
      if not self.disks:
9423
        self.disks = range(len(self.instance.disks))
9424

    
9425
    for node in check_nodes:
9426
      _CheckNodeOnline(self.lu, node)
9427

    
9428
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9429
                                                          self.other_node,
9430
                                                          self.target_node]
9431
                              if node_name is not None)
9432

    
9433
    # Release unneeded node locks
9434
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9435

    
9436
    # Release any owned node group
9437
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9438
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9439

    
9440
    # Check whether disks are valid
9441
    for disk_idx in self.disks:
9442
      instance.FindDisk(disk_idx)
9443

    
9444
    # Get secondary node IP addresses
9445
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9446
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9447

    
9448
  def Exec(self, feedback_fn):
9449
    """Execute disk replacement.
9450

9451
    This dispatches the disk replacement to the appropriate handler.
9452

9453
    """
9454
    if self.delay_iallocator:
9455
      self._CheckPrereq2()
9456

    
9457
    if __debug__:
9458
      # Verify owned locks before starting operation
9459
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9460
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9461
          ("Incorrect node locks, owning %s, expected %s" %
9462
           (owned_nodes, self.node_secondary_ip.keys()))
9463

    
9464
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9465
      assert list(owned_instances) == [self.instance_name], \
9466
          "Instance '%s' not locked" % self.instance_name
9467

    
9468
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9469
          "Should not own any node group lock at this point"
9470

    
9471
    if not self.disks:
9472
      feedback_fn("No disks need replacement")
9473
      return
9474

    
9475
    feedback_fn("Replacing disk(s) %s for %s" %
9476
                (utils.CommaJoin(self.disks), self.instance.name))
9477

    
9478
    activate_disks = (not self.instance.admin_up)
9479

    
9480
    # Activate the instance disks if we're replacing them on a down instance
9481
    if activate_disks:
9482
      _StartInstanceDisks(self.lu, self.instance, True)
9483

    
9484
    try:
9485
      # Should we replace the secondary node?
9486
      if self.new_node is not None:
9487
        fn = self._ExecDrbd8Secondary
9488
      else:
9489
        fn = self._ExecDrbd8DiskOnly
9490

    
9491
      result = fn(feedback_fn)
9492
    finally:
9493
      # Deactivate the instance disks if we're replacing them on a
9494
      # down instance
9495
      if activate_disks:
9496
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9497

    
9498
    if __debug__:
9499
      # Verify owned locks
9500
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9501
      nodes = frozenset(self.node_secondary_ip)
9502
      assert ((self.early_release and not owned_nodes) or
9503
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9504
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9505
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9506

    
9507
    return result
9508

    
9509
  def _CheckVolumeGroup(self, nodes):
9510
    self.lu.LogInfo("Checking volume groups")
9511

    
9512
    vgname = self.cfg.GetVGName()
9513

    
9514
    # Make sure volume group exists on all involved nodes
9515
    results = self.rpc.call_vg_list(nodes)
9516
    if not results:
9517
      raise errors.OpExecError("Can't list volume groups on the nodes")
9518

    
9519
    for node in nodes:
9520
      res = results[node]
9521
      res.Raise("Error checking node %s" % node)
9522
      if vgname not in res.payload:
9523
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9524
                                 (vgname, node))
9525

    
9526
  def _CheckDisksExistence(self, nodes):
9527
    # Check disk existence
9528
    for idx, dev in enumerate(self.instance.disks):
9529
      if idx not in self.disks:
9530
        continue
9531

    
9532
      for node in nodes:
9533
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9534
        self.cfg.SetDiskID(dev, node)
9535

    
9536
        result = self.rpc.call_blockdev_find(node, dev)
9537

    
9538
        msg = result.fail_msg
9539
        if msg or not result.payload:
9540
          if not msg:
9541
            msg = "disk not found"
9542
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9543
                                   (idx, node, msg))
9544

    
9545
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9546
    for idx, dev in enumerate(self.instance.disks):
9547
      if idx not in self.disks:
9548
        continue
9549

    
9550
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9551
                      (idx, node_name))
9552

    
9553
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9554
                                   ldisk=ldisk):
9555
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9556
                                 " replace disks for instance %s" %
9557
                                 (node_name, self.instance.name))
9558

    
9559
  def _CreateNewStorage(self, node_name):
9560
    """Create new storage on the primary or secondary node.
9561

9562
    This is only used for same-node replaces, not for changing the
9563
    secondary node, hence we don't want to modify the existing disk.
9564

9565
    """
9566
    iv_names = {}
9567

    
9568
    for idx, dev in enumerate(self.instance.disks):
9569
      if idx not in self.disks:
9570
        continue
9571

    
9572
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9573

    
9574
      self.cfg.SetDiskID(dev, node_name)
9575

    
9576
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9577
      names = _GenerateUniqueNames(self.lu, lv_names)
9578

    
9579
      vg_data = dev.children[0].logical_id[0]
9580
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9581
                             logical_id=(vg_data, names[0]))
9582
      vg_meta = dev.children[1].logical_id[0]
9583
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9584
                             logical_id=(vg_meta, names[1]))
9585

    
9586
      new_lvs = [lv_data, lv_meta]
9587
      old_lvs = [child.Copy() for child in dev.children]
9588
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9589

    
9590
      # we pass force_create=True to force the LVM creation
9591
      for new_lv in new_lvs:
9592
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9593
                        _GetInstanceInfoText(self.instance), False)
9594

    
9595
    return iv_names
9596

    
9597
  def _CheckDevices(self, node_name, iv_names):
9598
    for name, (dev, _, _) in iv_names.iteritems():
9599
      self.cfg.SetDiskID(dev, node_name)
9600

    
9601
      result = self.rpc.call_blockdev_find(node_name, dev)
9602

    
9603
      msg = result.fail_msg
9604
      if msg or not result.payload:
9605
        if not msg:
9606
          msg = "disk not found"
9607
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9608
                                 (name, msg))
9609

    
9610
      if result.payload.is_degraded:
9611
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9612

    
9613
  def _RemoveOldStorage(self, node_name, iv_names):
9614
    for name, (_, old_lvs, _) in iv_names.iteritems():
9615
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9616

    
9617
      for lv in old_lvs:
9618
        self.cfg.SetDiskID(lv, node_name)
9619

    
9620
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9621
        if msg:
9622
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9623
                             hint="remove unused LVs manually")
9624

    
9625
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9626
    """Replace a disk on the primary or secondary for DRBD 8.
9627

9628
    The algorithm for replace is quite complicated:
9629

9630
      1. for each disk to be replaced:
9631

9632
        1. create new LVs on the target node with unique names
9633
        1. detach old LVs from the drbd device
9634
        1. rename old LVs to name_replaced.<time_t>
9635
        1. rename new LVs to old LVs
9636
        1. attach the new LVs (with the old names now) to the drbd device
9637

9638
      1. wait for sync across all devices
9639

9640
      1. for each modified disk:
9641

9642
        1. remove old LVs (which have the name name_replaces.<time_t>)
9643

9644
    Failures are not very well handled.
9645

9646
    """
9647
    steps_total = 6
9648

    
9649
    # Step: check device activation
9650
    self.lu.LogStep(1, steps_total, "Check device existence")
9651
    self._CheckDisksExistence([self.other_node, self.target_node])
9652
    self._CheckVolumeGroup([self.target_node, self.other_node])
9653

    
9654
    # Step: check other node consistency
9655
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9656
    self._CheckDisksConsistency(self.other_node,
9657
                                self.other_node == self.instance.primary_node,
9658
                                False)
9659

    
9660
    # Step: create new storage
9661
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9662
    iv_names = self._CreateNewStorage(self.target_node)
9663

    
9664
    # Step: for each lv, detach+rename*2+attach
9665
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9666
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9667
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9668

    
9669
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9670
                                                     old_lvs)
9671
      result.Raise("Can't detach drbd from local storage on node"
9672
                   " %s for device %s" % (self.target_node, dev.iv_name))
9673
      #dev.children = []
9674
      #cfg.Update(instance)
9675

    
9676
      # ok, we created the new LVs, so now we know we have the needed
9677
      # storage; as such, we proceed on the target node to rename
9678
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9679
      # using the assumption that logical_id == physical_id (which in
9680
      # turn is the unique_id on that node)
9681

    
9682
      # FIXME(iustin): use a better name for the replaced LVs
9683
      temp_suffix = int(time.time())
9684
      ren_fn = lambda d, suff: (d.physical_id[0],
9685
                                d.physical_id[1] + "_replaced-%s" % suff)
9686

    
9687
      # Build the rename list based on what LVs exist on the node
9688
      rename_old_to_new = []
9689
      for to_ren in old_lvs:
9690
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9691
        if not result.fail_msg and result.payload:
9692
          # device exists
9693
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9694

    
9695
      self.lu.LogInfo("Renaming the old LVs on the target node")
9696
      result = self.rpc.call_blockdev_rename(self.target_node,
9697
                                             rename_old_to_new)
9698
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9699

    
9700
      # Now we rename the new LVs to the old LVs
9701
      self.lu.LogInfo("Renaming the new LVs on the target node")
9702
      rename_new_to_old = [(new, old.physical_id)
9703
                           for old, new in zip(old_lvs, new_lvs)]
9704
      result = self.rpc.call_blockdev_rename(self.target_node,
9705
                                             rename_new_to_old)
9706
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9707

    
9708
      # Intermediate steps of in memory modifications
9709
      for old, new in zip(old_lvs, new_lvs):
9710
        new.logical_id = old.logical_id
9711
        self.cfg.SetDiskID(new, self.target_node)
9712

    
9713
      # We need to modify old_lvs so that removal later removes the
9714
      # right LVs, not the newly added ones; note that old_lvs is a
9715
      # copy here
9716
      for disk in old_lvs:
9717
        disk.logical_id = ren_fn(disk, temp_suffix)
9718
        self.cfg.SetDiskID(disk, self.target_node)
9719

    
9720
      # Now that the new lvs have the old name, we can add them to the device
9721
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9722
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9723
                                                  new_lvs)
9724
      msg = result.fail_msg
9725
      if msg:
9726
        for new_lv in new_lvs:
9727
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9728
                                               new_lv).fail_msg
9729
          if msg2:
9730
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9731
                               hint=("cleanup manually the unused logical"
9732
                                     "volumes"))
9733
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9734

    
9735
    cstep = 5
9736
    if self.early_release:
9737
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9738
      cstep += 1
9739
      self._RemoveOldStorage(self.target_node, iv_names)
9740
      # WARNING: we release both node locks here, do not do other RPCs
9741
      # than WaitForSync to the primary node
9742
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9743
                    names=[self.target_node, self.other_node])
9744

    
9745
    # Wait for sync
9746
    # This can fail as the old devices are degraded and _WaitForSync
9747
    # does a combined result over all disks, so we don't check its return value
9748
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9749
    cstep += 1
9750
    _WaitForSync(self.lu, self.instance)
9751

    
9752
    # Check all devices manually
9753
    self._CheckDevices(self.instance.primary_node, iv_names)
9754

    
9755
    # Step: remove old storage
9756
    if not self.early_release:
9757
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9758
      cstep += 1
9759
      self._RemoveOldStorage(self.target_node, iv_names)
9760

    
9761
  def _ExecDrbd8Secondary(self, feedback_fn):
9762
    """Replace the secondary node for DRBD 8.
9763

9764
    The algorithm for replace is quite complicated:
9765
      - for all disks of the instance:
9766
        - create new LVs on the new node with same names
9767
        - shutdown the drbd device on the old secondary
9768
        - disconnect the drbd network on the primary
9769
        - create the drbd device on the new secondary
9770
        - network attach the drbd on the primary, using an artifice:
9771
          the drbd code for Attach() will connect to the network if it
9772
          finds a device which is connected to the good local disks but
9773
          not network enabled
9774
      - wait for sync across all devices
9775
      - remove all disks from the old secondary
9776

9777
    Failures are not very well handled.
9778

9779
    """
9780
    steps_total = 6
9781

    
9782
    # Step: check device activation
9783
    self.lu.LogStep(1, steps_total, "Check device existence")
9784
    self._CheckDisksExistence([self.instance.primary_node])
9785
    self._CheckVolumeGroup([self.instance.primary_node])
9786

    
9787
    # Step: check other node consistency
9788
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9789
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9790

    
9791
    # Step: create new storage
9792
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9793
    for idx, dev in enumerate(self.instance.disks):
9794
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9795
                      (self.new_node, idx))
9796
      # we pass force_create=True to force LVM creation
9797
      for new_lv in dev.children:
9798
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9799
                        _GetInstanceInfoText(self.instance), False)
9800

    
9801
    # Step 4: dbrd minors and drbd setups changes
9802
    # after this, we must manually remove the drbd minors on both the
9803
    # error and the success paths
9804
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9805
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9806
                                         for dev in self.instance.disks],
9807
                                        self.instance.name)
9808
    logging.debug("Allocated minors %r", minors)
9809

    
9810
    iv_names = {}
9811
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9812
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9813
                      (self.new_node, idx))
9814
      # create new devices on new_node; note that we create two IDs:
9815
      # one without port, so the drbd will be activated without
9816
      # networking information on the new node at this stage, and one
9817
      # with network, for the latter activation in step 4
9818
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9819
      if self.instance.primary_node == o_node1:
9820
        p_minor = o_minor1
9821
      else:
9822
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9823
        p_minor = o_minor2
9824

    
9825
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9826
                      p_minor, new_minor, o_secret)
9827
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9828
                    p_minor, new_minor, o_secret)
9829

    
9830
      iv_names[idx] = (dev, dev.children, new_net_id)
9831
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9832
                    new_net_id)
9833
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9834
                              logical_id=new_alone_id,
9835
                              children=dev.children,
9836
                              size=dev.size)
9837
      try:
9838
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9839
                              _GetInstanceInfoText(self.instance), False)
9840
      except errors.GenericError:
9841
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9842
        raise
9843

    
9844
    # We have new devices, shutdown the drbd on the old secondary
9845
    for idx, dev in enumerate(self.instance.disks):
9846
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9847
      self.cfg.SetDiskID(dev, self.target_node)
9848
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9849
      if msg:
9850
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9851
                           "node: %s" % (idx, msg),
9852
                           hint=("Please cleanup this device manually as"
9853
                                 " soon as possible"))
9854

    
9855
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9856
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9857
                                               self.node_secondary_ip,
9858
                                               self.instance.disks)\
9859
                                              [self.instance.primary_node]
9860

    
9861
    msg = result.fail_msg
9862
    if msg:
9863
      # detaches didn't succeed (unlikely)
9864
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9865
      raise errors.OpExecError("Can't detach the disks from the network on"
9866
                               " old node: %s" % (msg,))
9867

    
9868
    # if we managed to detach at least one, we update all the disks of
9869
    # the instance to point to the new secondary
9870
    self.lu.LogInfo("Updating instance configuration")
9871
    for dev, _, new_logical_id in iv_names.itervalues():
9872
      dev.logical_id = new_logical_id
9873
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9874

    
9875
    self.cfg.Update(self.instance, feedback_fn)
9876

    
9877
    # and now perform the drbd attach
9878
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9879
                    " (standalone => connected)")
9880
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9881
                                            self.new_node],
9882
                                           self.node_secondary_ip,
9883
                                           self.instance.disks,
9884
                                           self.instance.name,
9885
                                           False)
9886
    for to_node, to_result in result.items():
9887
      msg = to_result.fail_msg
9888
      if msg:
9889
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9890
                           to_node, msg,
9891
                           hint=("please do a gnt-instance info to see the"
9892
                                 " status of disks"))
9893
    cstep = 5
9894
    if self.early_release:
9895
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9896
      cstep += 1
9897
      self._RemoveOldStorage(self.target_node, iv_names)
9898
      # WARNING: we release all node locks here, do not do other RPCs
9899
      # than WaitForSync to the primary node
9900
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9901
                    names=[self.instance.primary_node,
9902
                           self.target_node,
9903
                           self.new_node])
9904

    
9905
    # Wait for sync
9906
    # This can fail as the old devices are degraded and _WaitForSync
9907
    # does a combined result over all disks, so we don't check its return value
9908
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9909
    cstep += 1
9910
    _WaitForSync(self.lu, self.instance)
9911

    
9912
    # Check all devices manually
9913
    self._CheckDevices(self.instance.primary_node, iv_names)
9914

    
9915
    # Step: remove old storage
9916
    if not self.early_release:
9917
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9918
      self._RemoveOldStorage(self.target_node, iv_names)
9919

    
9920

    
9921
class LURepairNodeStorage(NoHooksLU):
9922
  """Repairs the volume group on a node.
9923

9924
  """
9925
  REQ_BGL = False
9926

    
9927
  def CheckArguments(self):
9928
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9929

    
9930
    storage_type = self.op.storage_type
9931

    
9932
    if (constants.SO_FIX_CONSISTENCY not in
9933
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9934
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9935
                                 " repaired" % storage_type,
9936
                                 errors.ECODE_INVAL)
9937

    
9938
  def ExpandNames(self):
9939
    self.needed_locks = {
9940
      locking.LEVEL_NODE: [self.op.node_name],
9941
      }
9942

    
9943
  def _CheckFaultyDisks(self, instance, node_name):
9944
    """Ensure faulty disks abort the opcode or at least warn."""
9945
    try:
9946
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9947
                                  node_name, True):
9948
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9949
                                   " node '%s'" % (instance.name, node_name),
9950
                                   errors.ECODE_STATE)
9951
    except errors.OpPrereqError, err:
9952
      if self.op.ignore_consistency:
9953
        self.proc.LogWarning(str(err.args[0]))
9954
      else:
9955
        raise
9956

    
9957
  def CheckPrereq(self):
9958
    """Check prerequisites.
9959

9960
    """
9961
    # Check whether any instance on this node has faulty disks
9962
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9963
      if not inst.admin_up:
9964
        continue
9965
      check_nodes = set(inst.all_nodes)
9966
      check_nodes.discard(self.op.node_name)
9967
      for inst_node_name in check_nodes:
9968
        self._CheckFaultyDisks(inst, inst_node_name)
9969

    
9970
  def Exec(self, feedback_fn):
9971
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9972
                (self.op.name, self.op.node_name))
9973

    
9974
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9975
    result = self.rpc.call_storage_execute(self.op.node_name,
9976
                                           self.op.storage_type, st_args,
9977
                                           self.op.name,
9978
                                           constants.SO_FIX_CONSISTENCY)
9979
    result.Raise("Failed to repair storage unit '%s' on %s" %
9980
                 (self.op.name, self.op.node_name))
9981

    
9982

    
9983
class LUNodeEvacuate(NoHooksLU):
9984
  """Evacuates instances off a list of nodes.
9985

9986
  """
9987
  REQ_BGL = False
9988

    
9989
  def CheckArguments(self):
9990
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9991

    
9992
  def ExpandNames(self):
9993
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9994

    
9995
    if self.op.remote_node is not None:
9996
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9997
      assert self.op.remote_node
9998

    
9999
      if self.op.remote_node == self.op.node_name:
10000
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10001
                                   " secondary node", errors.ECODE_INVAL)
10002

    
10003
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10004
        raise errors.OpPrereqError("Without the use of an iallocator only"
10005
                                   " secondary instances can be evacuated",
10006
                                   errors.ECODE_INVAL)
10007

    
10008
    # Declare locks
10009
    self.share_locks = _ShareAll()
10010
    self.needed_locks = {
10011
      locking.LEVEL_INSTANCE: [],
10012
      locking.LEVEL_NODEGROUP: [],
10013
      locking.LEVEL_NODE: [],
10014
      }
10015

    
10016
    if self.op.remote_node is None:
10017
      # Iallocator will choose any node(s) in the same group
10018
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10019
    else:
10020
      group_nodes = frozenset([self.op.remote_node])
10021

    
10022
    # Determine nodes to be locked
10023
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10024

    
10025
  def _DetermineInstances(self):
10026
    """Builds list of instances to operate on.
10027

10028
    """
10029
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10030

    
10031
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10032
      # Primary instances only
10033
      inst_fn = _GetNodePrimaryInstances
10034
      assert self.op.remote_node is None, \
10035
        "Evacuating primary instances requires iallocator"
10036
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10037
      # Secondary instances only
10038
      inst_fn = _GetNodeSecondaryInstances
10039
    else:
10040
      # All instances
10041
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10042
      inst_fn = _GetNodeInstances
10043

    
10044
    return inst_fn(self.cfg, self.op.node_name)
10045

    
10046
  def DeclareLocks(self, level):
10047
    if level == locking.LEVEL_INSTANCE:
10048
      # Lock instances optimistically, needs verification once node and group
10049
      # locks have been acquired
10050
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10051
        set(i.name for i in self._DetermineInstances())
10052

    
10053
    elif level == locking.LEVEL_NODEGROUP:
10054
      # Lock node groups optimistically, needs verification once nodes have
10055
      # been acquired
10056
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10057
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10058

    
10059
    elif level == locking.LEVEL_NODE:
10060
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10061

    
10062
  def CheckPrereq(self):
10063
    # Verify locks
10064
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10065
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10066
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10067

    
10068
    assert owned_nodes == self.lock_nodes
10069

    
10070
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10071
    if owned_groups != wanted_groups:
10072
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10073
                               " current groups are '%s', used to be '%s'" %
10074
                               (utils.CommaJoin(wanted_groups),
10075
                                utils.CommaJoin(owned_groups)))
10076

    
10077
    # Determine affected instances
10078
    self.instances = self._DetermineInstances()
10079
    self.instance_names = [i.name for i in self.instances]
10080

    
10081
    if set(self.instance_names) != owned_instances:
10082
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10083
                               " were acquired, current instances are '%s',"
10084
                               " used to be '%s'" %
10085
                               (self.op.node_name,
10086
                                utils.CommaJoin(self.instance_names),
10087
                                utils.CommaJoin(owned_instances)))
10088

    
10089
    if self.instance_names:
10090
      self.LogInfo("Evacuating instances from node '%s': %s",
10091
                   self.op.node_name,
10092
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10093
    else:
10094
      self.LogInfo("No instances to evacuate from node '%s'",
10095
                   self.op.node_name)
10096

    
10097
    if self.op.remote_node is not None:
10098
      for i in self.instances:
10099
        if i.primary_node == self.op.remote_node:
10100
          raise errors.OpPrereqError("Node %s is the primary node of"
10101
                                     " instance %s, cannot use it as"
10102
                                     " secondary" %
10103
                                     (self.op.remote_node, i.name),
10104
                                     errors.ECODE_INVAL)
10105

    
10106
  def Exec(self, feedback_fn):
10107
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10108

    
10109
    if not self.instance_names:
10110
      # No instances to evacuate
10111
      jobs = []
10112

    
10113
    elif self.op.iallocator is not None:
10114
      # TODO: Implement relocation to other group
10115
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10116
                       evac_mode=self.op.mode,
10117
                       instances=list(self.instance_names))
10118

    
10119
      ial.Run(self.op.iallocator)
10120

    
10121
      if not ial.success:
10122
        raise errors.OpPrereqError("Can't compute node evacuation using"
10123
                                   " iallocator '%s': %s" %
10124
                                   (self.op.iallocator, ial.info),
10125
                                   errors.ECODE_NORES)
10126

    
10127
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10128

    
10129
    elif self.op.remote_node is not None:
10130
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10131
      jobs = [
10132
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10133
                                        remote_node=self.op.remote_node,
10134
                                        disks=[],
10135
                                        mode=constants.REPLACE_DISK_CHG,
10136
                                        early_release=self.op.early_release)]
10137
        for instance_name in self.instance_names
10138
        ]
10139

    
10140
    else:
10141
      raise errors.ProgrammerError("No iallocator or remote node")
10142

    
10143
    return ResultWithJobs(jobs)
10144

    
10145

    
10146
def _SetOpEarlyRelease(early_release, op):
10147
  """Sets C{early_release} flag on opcodes if available.
10148

10149
  """
10150
  try:
10151
    op.early_release = early_release
10152
  except AttributeError:
10153
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10154

    
10155
  return op
10156

    
10157

    
10158
def _NodeEvacDest(use_nodes, group, nodes):
10159
  """Returns group or nodes depending on caller's choice.
10160

10161
  """
10162
  if use_nodes:
10163
    return utils.CommaJoin(nodes)
10164
  else:
10165
    return group
10166

    
10167

    
10168
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10169
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10170

10171
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10172
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10173

10174
  @type lu: L{LogicalUnit}
10175
  @param lu: Logical unit instance
10176
  @type alloc_result: tuple/list
10177
  @param alloc_result: Result from iallocator
10178
  @type early_release: bool
10179
  @param early_release: Whether to release locks early if possible
10180
  @type use_nodes: bool
10181
  @param use_nodes: Whether to display node names instead of groups
10182

10183
  """
10184
  (moved, failed, jobs) = alloc_result
10185

    
10186
  if failed:
10187
    lu.LogWarning("Unable to evacuate instances %s",
10188
                  utils.CommaJoin("%s (%s)" % (name, reason)
10189
                                  for (name, reason) in failed))
10190

    
10191
  if moved:
10192
    lu.LogInfo("Instances to be moved: %s",
10193
               utils.CommaJoin("%s (to %s)" %
10194
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10195
                               for (name, group, nodes) in moved))
10196

    
10197
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10198
              map(opcodes.OpCode.LoadOpCode, ops))
10199
          for ops in jobs]
10200

    
10201

    
10202
class LUInstanceGrowDisk(LogicalUnit):
10203
  """Grow a disk of an instance.
10204

10205
  """
10206
  HPATH = "disk-grow"
10207
  HTYPE = constants.HTYPE_INSTANCE
10208
  REQ_BGL = False
10209

    
10210
  def ExpandNames(self):
10211
    self._ExpandAndLockInstance()
10212
    self.needed_locks[locking.LEVEL_NODE] = []
10213
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10214

    
10215
  def DeclareLocks(self, level):
10216
    if level == locking.LEVEL_NODE:
10217
      self._LockInstancesNodes()
10218

    
10219
  def BuildHooksEnv(self):
10220
    """Build hooks env.
10221

10222
    This runs on the master, the primary and all the secondaries.
10223

10224
    """
10225
    env = {
10226
      "DISK": self.op.disk,
10227
      "AMOUNT": self.op.amount,
10228
      }
10229
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10230
    return env
10231

    
10232
  def BuildHooksNodes(self):
10233
    """Build hooks nodes.
10234

10235
    """
10236
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10237
    return (nl, nl)
10238

    
10239
  def CheckPrereq(self):
10240
    """Check prerequisites.
10241

10242
    This checks that the instance is in the cluster.
10243

10244
    """
10245
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10246
    assert instance is not None, \
10247
      "Cannot retrieve locked instance %s" % self.op.instance_name
10248
    nodenames = list(instance.all_nodes)
10249
    for node in nodenames:
10250
      _CheckNodeOnline(self, node)
10251

    
10252
    self.instance = instance
10253

    
10254
    if instance.disk_template not in constants.DTS_GROWABLE:
10255
      raise errors.OpPrereqError("Instance's disk layout does not support"
10256
                                 " growing", errors.ECODE_INVAL)
10257

    
10258
    self.disk = instance.FindDisk(self.op.disk)
10259

    
10260
    if instance.disk_template not in (constants.DT_FILE,
10261
                                      constants.DT_SHARED_FILE):
10262
      # TODO: check the free disk space for file, when that feature will be
10263
      # supported
10264
      _CheckNodesFreeDiskPerVG(self, nodenames,
10265
                               self.disk.ComputeGrowth(self.op.amount))
10266

    
10267
  def Exec(self, feedback_fn):
10268
    """Execute disk grow.
10269

10270
    """
10271
    instance = self.instance
10272
    disk = self.disk
10273

    
10274
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10275
    if not disks_ok:
10276
      raise errors.OpExecError("Cannot activate block device to grow")
10277

    
10278
    # First run all grow ops in dry-run mode
10279
    for node in instance.all_nodes:
10280
      self.cfg.SetDiskID(disk, node)
10281
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10282
      result.Raise("Grow request failed to node %s" % node)
10283

    
10284
    # We know that (as far as we can test) operations across different
10285
    # nodes will succeed, time to run it for real
10286
    for node in instance.all_nodes:
10287
      self.cfg.SetDiskID(disk, node)
10288
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10289
      result.Raise("Grow request failed to node %s" % node)
10290

    
10291
      # TODO: Rewrite code to work properly
10292
      # DRBD goes into sync mode for a short amount of time after executing the
10293
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10294
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10295
      # time is a work-around.
10296
      time.sleep(5)
10297

    
10298
    disk.RecordGrow(self.op.amount)
10299
    self.cfg.Update(instance, feedback_fn)
10300
    if self.op.wait_for_sync:
10301
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10302
      if disk_abort:
10303
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10304
                             " status; please check the instance")
10305
      if not instance.admin_up:
10306
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10307
    elif not instance.admin_up:
10308
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10309
                           " not supposed to be running because no wait for"
10310
                           " sync mode was requested")
10311

    
10312

    
10313
class LUInstanceQueryData(NoHooksLU):
10314
  """Query runtime instance data.
10315

10316
  """
10317
  REQ_BGL = False
10318

    
10319
  def ExpandNames(self):
10320
    self.needed_locks = {}
10321

    
10322
    # Use locking if requested or when non-static information is wanted
10323
    if not (self.op.static or self.op.use_locking):
10324
      self.LogWarning("Non-static data requested, locks need to be acquired")
10325
      self.op.use_locking = True
10326

    
10327
    if self.op.instances or not self.op.use_locking:
10328
      # Expand instance names right here
10329
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10330
    else:
10331
      # Will use acquired locks
10332
      self.wanted_names = None
10333

    
10334
    if self.op.use_locking:
10335
      self.share_locks = _ShareAll()
10336

    
10337
      if self.wanted_names is None:
10338
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10339
      else:
10340
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10341

    
10342
      self.needed_locks[locking.LEVEL_NODE] = []
10343
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10344

    
10345
  def DeclareLocks(self, level):
10346
    if self.op.use_locking and level == locking.LEVEL_NODE:
10347
      self._LockInstancesNodes()
10348

    
10349
  def CheckPrereq(self):
10350
    """Check prerequisites.
10351

10352
    This only checks the optional instance list against the existing names.
10353

10354
    """
10355
    if self.wanted_names is None:
10356
      assert self.op.use_locking, "Locking was not used"
10357
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10358

    
10359
    self.wanted_instances = \
10360
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10361

    
10362
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10363
    """Returns the status of a block device
10364

10365
    """
10366
    if self.op.static or not node:
10367
      return None
10368

    
10369
    self.cfg.SetDiskID(dev, node)
10370

    
10371
    result = self.rpc.call_blockdev_find(node, dev)
10372
    if result.offline:
10373
      return None
10374

    
10375
    result.Raise("Can't compute disk status for %s" % instance_name)
10376

    
10377
    status = result.payload
10378
    if status is None:
10379
      return None
10380

    
10381
    return (status.dev_path, status.major, status.minor,
10382
            status.sync_percent, status.estimated_time,
10383
            status.is_degraded, status.ldisk_status)
10384

    
10385
  def _ComputeDiskStatus(self, instance, snode, dev):
10386
    """Compute block device status.
10387

10388
    """
10389
    if dev.dev_type in constants.LDS_DRBD:
10390
      # we change the snode then (otherwise we use the one passed in)
10391
      if dev.logical_id[0] == instance.primary_node:
10392
        snode = dev.logical_id[1]
10393
      else:
10394
        snode = dev.logical_id[0]
10395

    
10396
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10397
                                              instance.name, dev)
10398
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10399

    
10400
    if dev.children:
10401
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10402
                                        instance, snode),
10403
                         dev.children)
10404
    else:
10405
      dev_children = []
10406

    
10407
    return {
10408
      "iv_name": dev.iv_name,
10409
      "dev_type": dev.dev_type,
10410
      "logical_id": dev.logical_id,
10411
      "physical_id": dev.physical_id,
10412
      "pstatus": dev_pstatus,
10413
      "sstatus": dev_sstatus,
10414
      "children": dev_children,
10415
      "mode": dev.mode,
10416
      "size": dev.size,
10417
      }
10418

    
10419
  def Exec(self, feedback_fn):
10420
    """Gather and return data"""
10421
    result = {}
10422

    
10423
    cluster = self.cfg.GetClusterInfo()
10424

    
10425
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10426
                                          for i in self.wanted_instances)
10427
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10428
      if self.op.static or pnode.offline:
10429
        remote_state = None
10430
        if pnode.offline:
10431
          self.LogWarning("Primary node %s is marked offline, returning static"
10432
                          " information only for instance %s" %
10433
                          (pnode.name, instance.name))
10434
      else:
10435
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10436
                                                  instance.name,
10437
                                                  instance.hypervisor)
10438
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10439
        remote_info = remote_info.payload
10440
        if remote_info and "state" in remote_info:
10441
          remote_state = "up"
10442
        else:
10443
          remote_state = "down"
10444

    
10445
      if instance.admin_up:
10446
        config_state = "up"
10447
      else:
10448
        config_state = "down"
10449

    
10450
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10451
                  instance.disks)
10452

    
10453
      result[instance.name] = {
10454
        "name": instance.name,
10455
        "config_state": config_state,
10456
        "run_state": remote_state,
10457
        "pnode": instance.primary_node,
10458
        "snodes": instance.secondary_nodes,
10459
        "os": instance.os,
10460
        # this happens to be the same format used for hooks
10461
        "nics": _NICListToTuple(self, instance.nics),
10462
        "disk_template": instance.disk_template,
10463
        "disks": disks,
10464
        "hypervisor": instance.hypervisor,
10465
        "network_port": instance.network_port,
10466
        "hv_instance": instance.hvparams,
10467
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10468
        "be_instance": instance.beparams,
10469
        "be_actual": cluster.FillBE(instance),
10470
        "os_instance": instance.osparams,
10471
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10472
        "serial_no": instance.serial_no,
10473
        "mtime": instance.mtime,
10474
        "ctime": instance.ctime,
10475
        "uuid": instance.uuid,
10476
        }
10477

    
10478
    return result
10479

    
10480

    
10481
class LUInstanceSetParams(LogicalUnit):
10482
  """Modifies an instances's parameters.
10483

10484
  """
10485
  HPATH = "instance-modify"
10486
  HTYPE = constants.HTYPE_INSTANCE
10487
  REQ_BGL = False
10488

    
10489
  def CheckArguments(self):
10490
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10491
            self.op.hvparams or self.op.beparams or self.op.os_name):
10492
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10493

    
10494
    if self.op.hvparams:
10495
      _CheckGlobalHvParams(self.op.hvparams)
10496

    
10497
    # Disk validation
10498
    disk_addremove = 0
10499
    for disk_op, disk_dict in self.op.disks:
10500
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10501
      if disk_op == constants.DDM_REMOVE:
10502
        disk_addremove += 1
10503
        continue
10504
      elif disk_op == constants.DDM_ADD:
10505
        disk_addremove += 1
10506
      else:
10507
        if not isinstance(disk_op, int):
10508
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10509
        if not isinstance(disk_dict, dict):
10510
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10511
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10512

    
10513
      if disk_op == constants.DDM_ADD:
10514
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10515
        if mode not in constants.DISK_ACCESS_SET:
10516
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10517
                                     errors.ECODE_INVAL)
10518
        size = disk_dict.get(constants.IDISK_SIZE, None)
10519
        if size is None:
10520
          raise errors.OpPrereqError("Required disk parameter size missing",
10521
                                     errors.ECODE_INVAL)
10522
        try:
10523
          size = int(size)
10524
        except (TypeError, ValueError), err:
10525
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10526
                                     str(err), errors.ECODE_INVAL)
10527
        disk_dict[constants.IDISK_SIZE] = size
10528
      else:
10529
        # modification of disk
10530
        if constants.IDISK_SIZE in disk_dict:
10531
          raise errors.OpPrereqError("Disk size change not possible, use"
10532
                                     " grow-disk", errors.ECODE_INVAL)
10533

    
10534
    if disk_addremove > 1:
10535
      raise errors.OpPrereqError("Only one disk add or remove operation"
10536
                                 " supported at a time", errors.ECODE_INVAL)
10537

    
10538
    if self.op.disks and self.op.disk_template is not None:
10539
      raise errors.OpPrereqError("Disk template conversion and other disk"
10540
                                 " changes not supported at the same time",
10541
                                 errors.ECODE_INVAL)
10542

    
10543
    if (self.op.disk_template and
10544
        self.op.disk_template in constants.DTS_INT_MIRROR and
10545
        self.op.remote_node is None):
10546
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10547
                                 " one requires specifying a secondary node",
10548
                                 errors.ECODE_INVAL)
10549

    
10550
    # NIC validation
10551
    nic_addremove = 0
10552
    for nic_op, nic_dict in self.op.nics:
10553
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10554
      if nic_op == constants.DDM_REMOVE:
10555
        nic_addremove += 1
10556
        continue
10557
      elif nic_op == constants.DDM_ADD:
10558
        nic_addremove += 1
10559
      else:
10560
        if not isinstance(nic_op, int):
10561
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10562
        if not isinstance(nic_dict, dict):
10563
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10564
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10565

    
10566
      # nic_dict should be a dict
10567
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10568
      if nic_ip is not None:
10569
        if nic_ip.lower() == constants.VALUE_NONE:
10570
          nic_dict[constants.INIC_IP] = None
10571
        else:
10572
          if not netutils.IPAddress.IsValid(nic_ip):
10573
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10574
                                       errors.ECODE_INVAL)
10575

    
10576
      nic_bridge = nic_dict.get("bridge", None)
10577
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10578
      if nic_bridge and nic_link:
10579
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10580
                                   " at the same time", errors.ECODE_INVAL)
10581
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10582
        nic_dict["bridge"] = None
10583
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10584
        nic_dict[constants.INIC_LINK] = None
10585

    
10586
      if nic_op == constants.DDM_ADD:
10587
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10588
        if nic_mac is None:
10589
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10590

    
10591
      if constants.INIC_MAC in nic_dict:
10592
        nic_mac = nic_dict[constants.INIC_MAC]
10593
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10594
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10595

    
10596
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10597
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10598
                                     " modifying an existing nic",
10599
                                     errors.ECODE_INVAL)
10600

    
10601
    if nic_addremove > 1:
10602
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10603
                                 " supported at a time", errors.ECODE_INVAL)
10604

    
10605
  def ExpandNames(self):
10606
    self._ExpandAndLockInstance()
10607
    self.needed_locks[locking.LEVEL_NODE] = []
10608
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10609

    
10610
  def DeclareLocks(self, level):
10611
    if level == locking.LEVEL_NODE:
10612
      self._LockInstancesNodes()
10613
      if self.op.disk_template and self.op.remote_node:
10614
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10615
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10616

    
10617
  def BuildHooksEnv(self):
10618
    """Build hooks env.
10619

10620
    This runs on the master, primary and secondaries.
10621

10622
    """
10623
    args = dict()
10624
    if constants.BE_MEMORY in self.be_new:
10625
      args["memory"] = self.be_new[constants.BE_MEMORY]
10626
    if constants.BE_VCPUS in self.be_new:
10627
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10628
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10629
    # information at all.
10630
    if self.op.nics:
10631
      args["nics"] = []
10632
      nic_override = dict(self.op.nics)
10633
      for idx, nic in enumerate(self.instance.nics):
10634
        if idx in nic_override:
10635
          this_nic_override = nic_override[idx]
10636
        else:
10637
          this_nic_override = {}
10638
        if constants.INIC_IP in this_nic_override:
10639
          ip = this_nic_override[constants.INIC_IP]
10640
        else:
10641
          ip = nic.ip
10642
        if constants.INIC_MAC in this_nic_override:
10643
          mac = this_nic_override[constants.INIC_MAC]
10644
        else:
10645
          mac = nic.mac
10646
        if idx in self.nic_pnew:
10647
          nicparams = self.nic_pnew[idx]
10648
        else:
10649
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10650
        mode = nicparams[constants.NIC_MODE]
10651
        link = nicparams[constants.NIC_LINK]
10652
        args["nics"].append((ip, mac, mode, link))
10653
      if constants.DDM_ADD in nic_override:
10654
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10655
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10656
        nicparams = self.nic_pnew[constants.DDM_ADD]
10657
        mode = nicparams[constants.NIC_MODE]
10658
        link = nicparams[constants.NIC_LINK]
10659
        args["nics"].append((ip, mac, mode, link))
10660
      elif constants.DDM_REMOVE in nic_override:
10661
        del args["nics"][-1]
10662

    
10663
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10664
    if self.op.disk_template:
10665
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10666

    
10667
    return env
10668

    
10669
  def BuildHooksNodes(self):
10670
    """Build hooks nodes.
10671

10672
    """
10673
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10674
    return (nl, nl)
10675

    
10676
  def CheckPrereq(self):
10677
    """Check prerequisites.
10678

10679
    This only checks the instance list against the existing names.
10680

10681
    """
10682
    # checking the new params on the primary/secondary nodes
10683

    
10684
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10685
    cluster = self.cluster = self.cfg.GetClusterInfo()
10686
    assert self.instance is not None, \
10687
      "Cannot retrieve locked instance %s" % self.op.instance_name
10688
    pnode = instance.primary_node
10689
    nodelist = list(instance.all_nodes)
10690

    
10691
    # OS change
10692
    if self.op.os_name and not self.op.force:
10693
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10694
                      self.op.force_variant)
10695
      instance_os = self.op.os_name
10696
    else:
10697
      instance_os = instance.os
10698

    
10699
    if self.op.disk_template:
10700
      if instance.disk_template == self.op.disk_template:
10701
        raise errors.OpPrereqError("Instance already has disk template %s" %
10702
                                   instance.disk_template, errors.ECODE_INVAL)
10703

    
10704
      if (instance.disk_template,
10705
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10706
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10707
                                   " %s to %s" % (instance.disk_template,
10708
                                                  self.op.disk_template),
10709
                                   errors.ECODE_INVAL)
10710
      _CheckInstanceDown(self, instance, "cannot change disk template")
10711
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10712
        if self.op.remote_node == pnode:
10713
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10714
                                     " as the primary node of the instance" %
10715
                                     self.op.remote_node, errors.ECODE_STATE)
10716
        _CheckNodeOnline(self, self.op.remote_node)
10717
        _CheckNodeNotDrained(self, self.op.remote_node)
10718
        # FIXME: here we assume that the old instance type is DT_PLAIN
10719
        assert instance.disk_template == constants.DT_PLAIN
10720
        disks = [{constants.IDISK_SIZE: d.size,
10721
                  constants.IDISK_VG: d.logical_id[0]}
10722
                 for d in instance.disks]
10723
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10724
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10725

    
10726
    # hvparams processing
10727
    if self.op.hvparams:
10728
      hv_type = instance.hypervisor
10729
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10730
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10731
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10732

    
10733
      # local check
10734
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10735
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10736
      self.hv_new = hv_new # the new actual values
10737
      self.hv_inst = i_hvdict # the new dict (without defaults)
10738
    else:
10739
      self.hv_new = self.hv_inst = {}
10740

    
10741
    # beparams processing
10742
    if self.op.beparams:
10743
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10744
                                   use_none=True)
10745
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10746
      be_new = cluster.SimpleFillBE(i_bedict)
10747
      self.be_new = be_new # the new actual values
10748
      self.be_inst = i_bedict # the new dict (without defaults)
10749
    else:
10750
      self.be_new = self.be_inst = {}
10751
    be_old = cluster.FillBE(instance)
10752

    
10753
    # osparams processing
10754
    if self.op.osparams:
10755
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10756
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10757
      self.os_inst = i_osdict # the new dict (without defaults)
10758
    else:
10759
      self.os_inst = {}
10760

    
10761
    self.warn = []
10762

    
10763
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10764
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10765
      mem_check_list = [pnode]
10766
      if be_new[constants.BE_AUTO_BALANCE]:
10767
        # either we changed auto_balance to yes or it was from before
10768
        mem_check_list.extend(instance.secondary_nodes)
10769
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10770
                                                  instance.hypervisor)
10771
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10772
                                         instance.hypervisor)
10773
      pninfo = nodeinfo[pnode]
10774
      msg = pninfo.fail_msg
10775
      if msg:
10776
        # Assume the primary node is unreachable and go ahead
10777
        self.warn.append("Can't get info from primary node %s: %s" %
10778
                         (pnode,  msg))
10779
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10780
        self.warn.append("Node data from primary node %s doesn't contain"
10781
                         " free memory information" % pnode)
10782
      elif instance_info.fail_msg:
10783
        self.warn.append("Can't get instance runtime information: %s" %
10784
                        instance_info.fail_msg)
10785
      else:
10786
        if instance_info.payload:
10787
          current_mem = int(instance_info.payload["memory"])
10788
        else:
10789
          # Assume instance not running
10790
          # (there is a slight race condition here, but it's not very probable,
10791
          # and we have no other way to check)
10792
          current_mem = 0
10793
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10794
                    pninfo.payload["memory_free"])
10795
        if miss_mem > 0:
10796
          raise errors.OpPrereqError("This change will prevent the instance"
10797
                                     " from starting, due to %d MB of memory"
10798
                                     " missing on its primary node" % miss_mem,
10799
                                     errors.ECODE_NORES)
10800

    
10801
      if be_new[constants.BE_AUTO_BALANCE]:
10802
        for node, nres in nodeinfo.items():
10803
          if node not in instance.secondary_nodes:
10804
            continue
10805
          nres.Raise("Can't get info from secondary node %s" % node,
10806
                     prereq=True, ecode=errors.ECODE_STATE)
10807
          if not isinstance(nres.payload.get("memory_free", None), int):
10808
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10809
                                       " memory information" % node,
10810
                                       errors.ECODE_STATE)
10811
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10812
            raise errors.OpPrereqError("This change will prevent the instance"
10813
                                       " from failover to its secondary node"
10814
                                       " %s, due to not enough memory" % node,
10815
                                       errors.ECODE_STATE)
10816

    
10817
    # NIC processing
10818
    self.nic_pnew = {}
10819
    self.nic_pinst = {}
10820
    for nic_op, nic_dict in self.op.nics:
10821
      if nic_op == constants.DDM_REMOVE:
10822
        if not instance.nics:
10823
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10824
                                     errors.ECODE_INVAL)
10825
        continue
10826
      if nic_op != constants.DDM_ADD:
10827
        # an existing nic
10828
        if not instance.nics:
10829
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10830
                                     " no NICs" % nic_op,
10831
                                     errors.ECODE_INVAL)
10832
        if nic_op < 0 or nic_op >= len(instance.nics):
10833
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10834
                                     " are 0 to %d" %
10835
                                     (nic_op, len(instance.nics) - 1),
10836
                                     errors.ECODE_INVAL)
10837
        old_nic_params = instance.nics[nic_op].nicparams
10838
        old_nic_ip = instance.nics[nic_op].ip
10839
      else:
10840
        old_nic_params = {}
10841
        old_nic_ip = None
10842

    
10843
      update_params_dict = dict([(key, nic_dict[key])
10844
                                 for key in constants.NICS_PARAMETERS
10845
                                 if key in nic_dict])
10846

    
10847
      if "bridge" in nic_dict:
10848
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10849

    
10850
      new_nic_params = _GetUpdatedParams(old_nic_params,
10851
                                         update_params_dict)
10852
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10853
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10854
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10855
      self.nic_pinst[nic_op] = new_nic_params
10856
      self.nic_pnew[nic_op] = new_filled_nic_params
10857
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10858

    
10859
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10860
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10861
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10862
        if msg:
10863
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10864
          if self.op.force:
10865
            self.warn.append(msg)
10866
          else:
10867
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10868
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10869
        if constants.INIC_IP in nic_dict:
10870
          nic_ip = nic_dict[constants.INIC_IP]
10871
        else:
10872
          nic_ip = old_nic_ip
10873
        if nic_ip is None:
10874
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10875
                                     " on a routed nic", errors.ECODE_INVAL)
10876
      if constants.INIC_MAC in nic_dict:
10877
        nic_mac = nic_dict[constants.INIC_MAC]
10878
        if nic_mac is None:
10879
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10880
                                     errors.ECODE_INVAL)
10881
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10882
          # otherwise generate the mac
10883
          nic_dict[constants.INIC_MAC] = \
10884
            self.cfg.GenerateMAC(self.proc.GetECId())
10885
        else:
10886
          # or validate/reserve the current one
10887
          try:
10888
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10889
          except errors.ReservationError:
10890
            raise errors.OpPrereqError("MAC address %s already in use"
10891
                                       " in cluster" % nic_mac,
10892
                                       errors.ECODE_NOTUNIQUE)
10893

    
10894
    # DISK processing
10895
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10896
      raise errors.OpPrereqError("Disk operations not supported for"
10897
                                 " diskless instances",
10898
                                 errors.ECODE_INVAL)
10899
    for disk_op, _ in self.op.disks:
10900
      if disk_op == constants.DDM_REMOVE:
10901
        if len(instance.disks) == 1:
10902
          raise errors.OpPrereqError("Cannot remove the last disk of"
10903
                                     " an instance", errors.ECODE_INVAL)
10904
        _CheckInstanceDown(self, instance, "cannot remove disks")
10905

    
10906
      if (disk_op == constants.DDM_ADD and
10907
          len(instance.disks) >= constants.MAX_DISKS):
10908
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10909
                                   " add more" % constants.MAX_DISKS,
10910
                                   errors.ECODE_STATE)
10911
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10912
        # an existing disk
10913
        if disk_op < 0 or disk_op >= len(instance.disks):
10914
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10915
                                     " are 0 to %d" %
10916
                                     (disk_op, len(instance.disks)),
10917
                                     errors.ECODE_INVAL)
10918

    
10919
    return
10920

    
10921
  def _ConvertPlainToDrbd(self, feedback_fn):
10922
    """Converts an instance from plain to drbd.
10923

10924
    """
10925
    feedback_fn("Converting template to drbd")
10926
    instance = self.instance
10927
    pnode = instance.primary_node
10928
    snode = self.op.remote_node
10929

    
10930
    # create a fake disk info for _GenerateDiskTemplate
10931
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10932
                  constants.IDISK_VG: d.logical_id[0]}
10933
                 for d in instance.disks]
10934
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10935
                                      instance.name, pnode, [snode],
10936
                                      disk_info, None, None, 0, feedback_fn)
10937
    info = _GetInstanceInfoText(instance)
10938
    feedback_fn("Creating aditional volumes...")
10939
    # first, create the missing data and meta devices
10940
    for disk in new_disks:
10941
      # unfortunately this is... not too nice
10942
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10943
                            info, True)
10944
      for child in disk.children:
10945
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10946
    # at this stage, all new LVs have been created, we can rename the
10947
    # old ones
10948
    feedback_fn("Renaming original volumes...")
10949
    rename_list = [(o, n.children[0].logical_id)
10950
                   for (o, n) in zip(instance.disks, new_disks)]
10951
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10952
    result.Raise("Failed to rename original LVs")
10953

    
10954
    feedback_fn("Initializing DRBD devices...")
10955
    # all child devices are in place, we can now create the DRBD devices
10956
    for disk in new_disks:
10957
      for node in [pnode, snode]:
10958
        f_create = node == pnode
10959
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10960

    
10961
    # at this point, the instance has been modified
10962
    instance.disk_template = constants.DT_DRBD8
10963
    instance.disks = new_disks
10964
    self.cfg.Update(instance, feedback_fn)
10965

    
10966
    # disks are created, waiting for sync
10967
    disk_abort = not _WaitForSync(self, instance,
10968
                                  oneshot=not self.op.wait_for_sync)
10969
    if disk_abort:
10970
      raise errors.OpExecError("There are some degraded disks for"
10971
                               " this instance, please cleanup manually")
10972

    
10973
  def _ConvertDrbdToPlain(self, feedback_fn):
10974
    """Converts an instance from drbd to plain.
10975

10976
    """
10977
    instance = self.instance
10978
    assert len(instance.secondary_nodes) == 1
10979
    pnode = instance.primary_node
10980
    snode = instance.secondary_nodes[0]
10981
    feedback_fn("Converting template to plain")
10982

    
10983
    old_disks = instance.disks
10984
    new_disks = [d.children[0] for d in old_disks]
10985

    
10986
    # copy over size and mode
10987
    for parent, child in zip(old_disks, new_disks):
10988
      child.size = parent.size
10989
      child.mode = parent.mode
10990

    
10991
    # update instance structure
10992
    instance.disks = new_disks
10993
    instance.disk_template = constants.DT_PLAIN
10994
    self.cfg.Update(instance, feedback_fn)
10995

    
10996
    feedback_fn("Removing volumes on the secondary node...")
10997
    for disk in old_disks:
10998
      self.cfg.SetDiskID(disk, snode)
10999
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11000
      if msg:
11001
        self.LogWarning("Could not remove block device %s on node %s,"
11002
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11003

    
11004
    feedback_fn("Removing unneeded volumes on the primary node...")
11005
    for idx, disk in enumerate(old_disks):
11006
      meta = disk.children[1]
11007
      self.cfg.SetDiskID(meta, pnode)
11008
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11009
      if msg:
11010
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11011
                        " continuing anyway: %s", idx, pnode, msg)
11012

    
11013
  def Exec(self, feedback_fn):
11014
    """Modifies an instance.
11015

11016
    All parameters take effect only at the next restart of the instance.
11017

11018
    """
11019
    # Process here the warnings from CheckPrereq, as we don't have a
11020
    # feedback_fn there.
11021
    for warn in self.warn:
11022
      feedback_fn("WARNING: %s" % warn)
11023

    
11024
    result = []
11025
    instance = self.instance
11026
    # disk changes
11027
    for disk_op, disk_dict in self.op.disks:
11028
      if disk_op == constants.DDM_REMOVE:
11029
        # remove the last disk
11030
        device = instance.disks.pop()
11031
        device_idx = len(instance.disks)
11032
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11033
          self.cfg.SetDiskID(disk, node)
11034
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11035
          if msg:
11036
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11037
                            " continuing anyway", device_idx, node, msg)
11038
        result.append(("disk/%d" % device_idx, "remove"))
11039
      elif disk_op == constants.DDM_ADD:
11040
        # add a new disk
11041
        if instance.disk_template in (constants.DT_FILE,
11042
                                        constants.DT_SHARED_FILE):
11043
          file_driver, file_path = instance.disks[0].logical_id
11044
          file_path = os.path.dirname(file_path)
11045
        else:
11046
          file_driver = file_path = None
11047
        disk_idx_base = len(instance.disks)
11048
        new_disk = _GenerateDiskTemplate(self,
11049
                                         instance.disk_template,
11050
                                         instance.name, instance.primary_node,
11051
                                         instance.secondary_nodes,
11052
                                         [disk_dict],
11053
                                         file_path,
11054
                                         file_driver,
11055
                                         disk_idx_base, feedback_fn)[0]
11056
        instance.disks.append(new_disk)
11057
        info = _GetInstanceInfoText(instance)
11058

    
11059
        logging.info("Creating volume %s for instance %s",
11060
                     new_disk.iv_name, instance.name)
11061
        # Note: this needs to be kept in sync with _CreateDisks
11062
        #HARDCODE
11063
        for node in instance.all_nodes:
11064
          f_create = node == instance.primary_node
11065
          try:
11066
            _CreateBlockDev(self, node, instance, new_disk,
11067
                            f_create, info, f_create)
11068
          except errors.OpExecError, err:
11069
            self.LogWarning("Failed to create volume %s (%s) on"
11070
                            " node %s: %s",
11071
                            new_disk.iv_name, new_disk, node, err)
11072
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11073
                       (new_disk.size, new_disk.mode)))
11074
      else:
11075
        # change a given disk
11076
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11077
        result.append(("disk.mode/%d" % disk_op,
11078
                       disk_dict[constants.IDISK_MODE]))
11079

    
11080
    if self.op.disk_template:
11081
      r_shut = _ShutdownInstanceDisks(self, instance)
11082
      if not r_shut:
11083
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11084
                                 " proceed with disk template conversion")
11085
      mode = (instance.disk_template, self.op.disk_template)
11086
      try:
11087
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11088
      except:
11089
        self.cfg.ReleaseDRBDMinors(instance.name)
11090
        raise
11091
      result.append(("disk_template", self.op.disk_template))
11092

    
11093
    # NIC changes
11094
    for nic_op, nic_dict in self.op.nics:
11095
      if nic_op == constants.DDM_REMOVE:
11096
        # remove the last nic
11097
        del instance.nics[-1]
11098
        result.append(("nic.%d" % len(instance.nics), "remove"))
11099
      elif nic_op == constants.DDM_ADD:
11100
        # mac and bridge should be set, by now
11101
        mac = nic_dict[constants.INIC_MAC]
11102
        ip = nic_dict.get(constants.INIC_IP, None)
11103
        nicparams = self.nic_pinst[constants.DDM_ADD]
11104
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11105
        instance.nics.append(new_nic)
11106
        result.append(("nic.%d" % (len(instance.nics) - 1),
11107
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11108
                       (new_nic.mac, new_nic.ip,
11109
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11110
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11111
                       )))
11112
      else:
11113
        for key in (constants.INIC_MAC, constants.INIC_IP):
11114
          if key in nic_dict:
11115
            setattr(instance.nics[nic_op], key, nic_dict[key])
11116
        if nic_op in self.nic_pinst:
11117
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11118
        for key, val in nic_dict.iteritems():
11119
          result.append(("nic.%s/%d" % (key, nic_op), val))
11120

    
11121
    # hvparams changes
11122
    if self.op.hvparams:
11123
      instance.hvparams = self.hv_inst
11124
      for key, val in self.op.hvparams.iteritems():
11125
        result.append(("hv/%s" % key, val))
11126

    
11127
    # beparams changes
11128
    if self.op.beparams:
11129
      instance.beparams = self.be_inst
11130
      for key, val in self.op.beparams.iteritems():
11131
        result.append(("be/%s" % key, val))
11132

    
11133
    # OS change
11134
    if self.op.os_name:
11135
      instance.os = self.op.os_name
11136

    
11137
    # osparams changes
11138
    if self.op.osparams:
11139
      instance.osparams = self.os_inst
11140
      for key, val in self.op.osparams.iteritems():
11141
        result.append(("os/%s" % key, val))
11142

    
11143
    self.cfg.Update(instance, feedback_fn)
11144

    
11145
    return result
11146

    
11147
  _DISK_CONVERSIONS = {
11148
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11149
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11150
    }
11151

    
11152

    
11153
class LUInstanceChangeGroup(LogicalUnit):
11154
  HPATH = "instance-change-group"
11155
  HTYPE = constants.HTYPE_INSTANCE
11156
  REQ_BGL = False
11157

    
11158
  def ExpandNames(self):
11159
    self.share_locks = _ShareAll()
11160
    self.needed_locks = {
11161
      locking.LEVEL_NODEGROUP: [],
11162
      locking.LEVEL_NODE: [],
11163
      }
11164

    
11165
    self._ExpandAndLockInstance()
11166

    
11167
    if self.op.target_groups:
11168
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11169
                                  self.op.target_groups)
11170
    else:
11171
      self.req_target_uuids = None
11172

    
11173
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11174

    
11175
  def DeclareLocks(self, level):
11176
    if level == locking.LEVEL_NODEGROUP:
11177
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11178

    
11179
      if self.req_target_uuids:
11180
        lock_groups = set(self.req_target_uuids)
11181

    
11182
        # Lock all groups used by instance optimistically; this requires going
11183
        # via the node before it's locked, requiring verification later on
11184
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11185
        lock_groups.update(instance_groups)
11186
      else:
11187
        # No target groups, need to lock all of them
11188
        lock_groups = locking.ALL_SET
11189

    
11190
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11191

    
11192
    elif level == locking.LEVEL_NODE:
11193
      if self.req_target_uuids:
11194
        # Lock all nodes used by instances
11195
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11196
        self._LockInstancesNodes()
11197

    
11198
        # Lock all nodes in all potential target groups
11199
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11200
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11201
        member_nodes = [node_name
11202
                        for group in lock_groups
11203
                        for node_name in self.cfg.GetNodeGroup(group).members]
11204
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11205
      else:
11206
        # Lock all nodes as all groups are potential targets
11207
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11208

    
11209
  def CheckPrereq(self):
11210
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11211
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11212
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11213

    
11214
    assert (self.req_target_uuids is None or
11215
            owned_groups.issuperset(self.req_target_uuids))
11216
    assert owned_instances == set([self.op.instance_name])
11217

    
11218
    # Get instance information
11219
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11220

    
11221
    # Check if node groups for locked instance are still correct
11222
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11223
      ("Instance %s's nodes changed while we kept the lock" %
11224
       self.op.instance_name)
11225

    
11226
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11227
                                           owned_groups)
11228

    
11229
    if self.req_target_uuids:
11230
      # User requested specific target groups
11231
      self.target_uuids = self.req_target_uuids
11232
    else:
11233
      # All groups except those used by the instance are potential targets
11234
      self.target_uuids = owned_groups - inst_groups
11235

    
11236
    conflicting_groups = self.target_uuids & inst_groups
11237
    if conflicting_groups:
11238
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11239
                                 " used by the instance '%s'" %
11240
                                 (utils.CommaJoin(conflicting_groups),
11241
                                  self.op.instance_name),
11242
                                 errors.ECODE_INVAL)
11243

    
11244
    if not self.target_uuids:
11245
      raise errors.OpPrereqError("There are no possible target groups",
11246
                                 errors.ECODE_INVAL)
11247

    
11248
  def BuildHooksEnv(self):
11249
    """Build hooks env.
11250

11251
    """
11252
    assert self.target_uuids
11253

    
11254
    env = {
11255
      "TARGET_GROUPS": " ".join(self.target_uuids),
11256
      }
11257

    
11258
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11259

    
11260
    return env
11261

    
11262
  def BuildHooksNodes(self):
11263
    """Build hooks nodes.
11264

11265
    """
11266
    mn = self.cfg.GetMasterNode()
11267
    return ([mn], [mn])
11268

    
11269
  def Exec(self, feedback_fn):
11270
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11271

    
11272
    assert instances == [self.op.instance_name], "Instance not locked"
11273

    
11274
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11275
                     instances=instances, target_groups=list(self.target_uuids))
11276

    
11277
    ial.Run(self.op.iallocator)
11278

    
11279
    if not ial.success:
11280
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11281
                                 " instance '%s' using iallocator '%s': %s" %
11282
                                 (self.op.instance_name, self.op.iallocator,
11283
                                  ial.info),
11284
                                 errors.ECODE_NORES)
11285

    
11286
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11287

    
11288
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11289
                 " instance '%s'", len(jobs), self.op.instance_name)
11290

    
11291
    return ResultWithJobs(jobs)
11292

    
11293

    
11294
class LUBackupQuery(NoHooksLU):
11295
  """Query the exports list
11296

11297
  """
11298
  REQ_BGL = False
11299

    
11300
  def ExpandNames(self):
11301
    self.needed_locks = {}
11302
    self.share_locks[locking.LEVEL_NODE] = 1
11303
    if not self.op.nodes:
11304
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11305
    else:
11306
      self.needed_locks[locking.LEVEL_NODE] = \
11307
        _GetWantedNodes(self, self.op.nodes)
11308

    
11309
  def Exec(self, feedback_fn):
11310
    """Compute the list of all the exported system images.
11311

11312
    @rtype: dict
11313
    @return: a dictionary with the structure node->(export-list)
11314
        where export-list is a list of the instances exported on
11315
        that node.
11316

11317
    """
11318
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11319
    rpcresult = self.rpc.call_export_list(self.nodes)
11320
    result = {}
11321
    for node in rpcresult:
11322
      if rpcresult[node].fail_msg:
11323
        result[node] = False
11324
      else:
11325
        result[node] = rpcresult[node].payload
11326

    
11327
    return result
11328

    
11329

    
11330
class LUBackupPrepare(NoHooksLU):
11331
  """Prepares an instance for an export and returns useful information.
11332

11333
  """
11334
  REQ_BGL = False
11335

    
11336
  def ExpandNames(self):
11337
    self._ExpandAndLockInstance()
11338

    
11339
  def CheckPrereq(self):
11340
    """Check prerequisites.
11341

11342
    """
11343
    instance_name = self.op.instance_name
11344

    
11345
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11346
    assert self.instance is not None, \
11347
          "Cannot retrieve locked instance %s" % self.op.instance_name
11348
    _CheckNodeOnline(self, self.instance.primary_node)
11349

    
11350
    self._cds = _GetClusterDomainSecret()
11351

    
11352
  def Exec(self, feedback_fn):
11353
    """Prepares an instance for an export.
11354

11355
    """
11356
    instance = self.instance
11357

    
11358
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11359
      salt = utils.GenerateSecret(8)
11360

    
11361
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11362
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11363
                                              constants.RIE_CERT_VALIDITY)
11364
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11365

    
11366
      (name, cert_pem) = result.payload
11367

    
11368
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11369
                                             cert_pem)
11370

    
11371
      return {
11372
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11373
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11374
                          salt),
11375
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11376
        }
11377

    
11378
    return None
11379

    
11380

    
11381
class LUBackupExport(LogicalUnit):
11382
  """Export an instance to an image in the cluster.
11383

11384
  """
11385
  HPATH = "instance-export"
11386
  HTYPE = constants.HTYPE_INSTANCE
11387
  REQ_BGL = False
11388

    
11389
  def CheckArguments(self):
11390
    """Check the arguments.
11391

11392
    """
11393
    self.x509_key_name = self.op.x509_key_name
11394
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11395

    
11396
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11397
      if not self.x509_key_name:
11398
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11399
                                   errors.ECODE_INVAL)
11400

    
11401
      if not self.dest_x509_ca_pem:
11402
        raise errors.OpPrereqError("Missing destination X509 CA",
11403
                                   errors.ECODE_INVAL)
11404

    
11405
  def ExpandNames(self):
11406
    self._ExpandAndLockInstance()
11407

    
11408
    # Lock all nodes for local exports
11409
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11410
      # FIXME: lock only instance primary and destination node
11411
      #
11412
      # Sad but true, for now we have do lock all nodes, as we don't know where
11413
      # the previous export might be, and in this LU we search for it and
11414
      # remove it from its current node. In the future we could fix this by:
11415
      #  - making a tasklet to search (share-lock all), then create the
11416
      #    new one, then one to remove, after
11417
      #  - removing the removal operation altogether
11418
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11419

    
11420
  def DeclareLocks(self, level):
11421
    """Last minute lock declaration."""
11422
    # All nodes are locked anyway, so nothing to do here.
11423

    
11424
  def BuildHooksEnv(self):
11425
    """Build hooks env.
11426

11427
    This will run on the master, primary node and target node.
11428

11429
    """
11430
    env = {
11431
      "EXPORT_MODE": self.op.mode,
11432
      "EXPORT_NODE": self.op.target_node,
11433
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11434
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11435
      # TODO: Generic function for boolean env variables
11436
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11437
      }
11438

    
11439
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11440

    
11441
    return env
11442

    
11443
  def BuildHooksNodes(self):
11444
    """Build hooks nodes.
11445

11446
    """
11447
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11448

    
11449
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11450
      nl.append(self.op.target_node)
11451

    
11452
    return (nl, nl)
11453

    
11454
  def CheckPrereq(self):
11455
    """Check prerequisites.
11456

11457
    This checks that the instance and node names are valid.
11458

11459
    """
11460
    instance_name = self.op.instance_name
11461

    
11462
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11463
    assert self.instance is not None, \
11464
          "Cannot retrieve locked instance %s" % self.op.instance_name
11465
    _CheckNodeOnline(self, self.instance.primary_node)
11466

    
11467
    if (self.op.remove_instance and self.instance.admin_up and
11468
        not self.op.shutdown):
11469
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11470
                                 " down before")
11471

    
11472
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11473
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11474
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11475
      assert self.dst_node is not None
11476

    
11477
      _CheckNodeOnline(self, self.dst_node.name)
11478
      _CheckNodeNotDrained(self, self.dst_node.name)
11479

    
11480
      self._cds = None
11481
      self.dest_disk_info = None
11482
      self.dest_x509_ca = None
11483

    
11484
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11485
      self.dst_node = None
11486

    
11487
      if len(self.op.target_node) != len(self.instance.disks):
11488
        raise errors.OpPrereqError(("Received destination information for %s"
11489
                                    " disks, but instance %s has %s disks") %
11490
                                   (len(self.op.target_node), instance_name,
11491
                                    len(self.instance.disks)),
11492
                                   errors.ECODE_INVAL)
11493

    
11494
      cds = _GetClusterDomainSecret()
11495

    
11496
      # Check X509 key name
11497
      try:
11498
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11499
      except (TypeError, ValueError), err:
11500
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11501

    
11502
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11503
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11504
                                   errors.ECODE_INVAL)
11505

    
11506
      # Load and verify CA
11507
      try:
11508
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11509
      except OpenSSL.crypto.Error, err:
11510
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11511
                                   (err, ), errors.ECODE_INVAL)
11512

    
11513
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11514
      if errcode is not None:
11515
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11516
                                   (msg, ), errors.ECODE_INVAL)
11517

    
11518
      self.dest_x509_ca = cert
11519

    
11520
      # Verify target information
11521
      disk_info = []
11522
      for idx, disk_data in enumerate(self.op.target_node):
11523
        try:
11524
          (host, port, magic) = \
11525
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11526
        except errors.GenericError, err:
11527
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11528
                                     (idx, err), errors.ECODE_INVAL)
11529

    
11530
        disk_info.append((host, port, magic))
11531

    
11532
      assert len(disk_info) == len(self.op.target_node)
11533
      self.dest_disk_info = disk_info
11534

    
11535
    else:
11536
      raise errors.ProgrammerError("Unhandled export mode %r" %
11537
                                   self.op.mode)
11538

    
11539
    # instance disk type verification
11540
    # TODO: Implement export support for file-based disks
11541
    for disk in self.instance.disks:
11542
      if disk.dev_type == constants.LD_FILE:
11543
        raise errors.OpPrereqError("Export not supported for instances with"
11544
                                   " file-based disks", errors.ECODE_INVAL)
11545

    
11546
  def _CleanupExports(self, feedback_fn):
11547
    """Removes exports of current instance from all other nodes.
11548

11549
    If an instance in a cluster with nodes A..D was exported to node C, its
11550
    exports will be removed from the nodes A, B and D.
11551

11552
    """
11553
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11554

    
11555
    nodelist = self.cfg.GetNodeList()
11556
    nodelist.remove(self.dst_node.name)
11557

    
11558
    # on one-node clusters nodelist will be empty after the removal
11559
    # if we proceed the backup would be removed because OpBackupQuery
11560
    # substitutes an empty list with the full cluster node list.
11561
    iname = self.instance.name
11562
    if nodelist:
11563
      feedback_fn("Removing old exports for instance %s" % iname)
11564
      exportlist = self.rpc.call_export_list(nodelist)
11565
      for node in exportlist:
11566
        if exportlist[node].fail_msg:
11567
          continue
11568
        if iname in exportlist[node].payload:
11569
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11570
          if msg:
11571
            self.LogWarning("Could not remove older export for instance %s"
11572
                            " on node %s: %s", iname, node, msg)
11573

    
11574
  def Exec(self, feedback_fn):
11575
    """Export an instance to an image in the cluster.
11576

11577
    """
11578
    assert self.op.mode in constants.EXPORT_MODES
11579

    
11580
    instance = self.instance
11581
    src_node = instance.primary_node
11582

    
11583
    if self.op.shutdown:
11584
      # shutdown the instance, but not the disks
11585
      feedback_fn("Shutting down instance %s" % instance.name)
11586
      result = self.rpc.call_instance_shutdown(src_node, instance,
11587
                                               self.op.shutdown_timeout)
11588
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11589
      result.Raise("Could not shutdown instance %s on"
11590
                   " node %s" % (instance.name, src_node))
11591

    
11592
    # set the disks ID correctly since call_instance_start needs the
11593
    # correct drbd minor to create the symlinks
11594
    for disk in instance.disks:
11595
      self.cfg.SetDiskID(disk, src_node)
11596

    
11597
    activate_disks = (not instance.admin_up)
11598

    
11599
    if activate_disks:
11600
      # Activate the instance disks if we'exporting a stopped instance
11601
      feedback_fn("Activating disks for %s" % instance.name)
11602
      _StartInstanceDisks(self, instance, None)
11603

    
11604
    try:
11605
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11606
                                                     instance)
11607

    
11608
      helper.CreateSnapshots()
11609
      try:
11610
        if (self.op.shutdown and instance.admin_up and
11611
            not self.op.remove_instance):
11612
          assert not activate_disks
11613
          feedback_fn("Starting instance %s" % instance.name)
11614
          result = self.rpc.call_instance_start(src_node, instance,
11615
                                                None, None, False)
11616
          msg = result.fail_msg
11617
          if msg:
11618
            feedback_fn("Failed to start instance: %s" % msg)
11619
            _ShutdownInstanceDisks(self, instance)
11620
            raise errors.OpExecError("Could not start instance: %s" % msg)
11621

    
11622
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11623
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11624
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11625
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11626
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11627

    
11628
          (key_name, _, _) = self.x509_key_name
11629

    
11630
          dest_ca_pem = \
11631
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11632
                                            self.dest_x509_ca)
11633

    
11634
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11635
                                                     key_name, dest_ca_pem,
11636
                                                     timeouts)
11637
      finally:
11638
        helper.Cleanup()
11639

    
11640
      # Check for backwards compatibility
11641
      assert len(dresults) == len(instance.disks)
11642
      assert compat.all(isinstance(i, bool) for i in dresults), \
11643
             "Not all results are boolean: %r" % dresults
11644

    
11645
    finally:
11646
      if activate_disks:
11647
        feedback_fn("Deactivating disks for %s" % instance.name)
11648
        _ShutdownInstanceDisks(self, instance)
11649

    
11650
    if not (compat.all(dresults) and fin_resu):
11651
      failures = []
11652
      if not fin_resu:
11653
        failures.append("export finalization")
11654
      if not compat.all(dresults):
11655
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11656
                               if not dsk)
11657
        failures.append("disk export: disk(s) %s" % fdsk)
11658

    
11659
      raise errors.OpExecError("Export failed, errors in %s" %
11660
                               utils.CommaJoin(failures))
11661

    
11662
    # At this point, the export was successful, we can cleanup/finish
11663

    
11664
    # Remove instance if requested
11665
    if self.op.remove_instance:
11666
      feedback_fn("Removing instance %s" % instance.name)
11667
      _RemoveInstance(self, feedback_fn, instance,
11668
                      self.op.ignore_remove_failures)
11669

    
11670
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11671
      self._CleanupExports(feedback_fn)
11672

    
11673
    return fin_resu, dresults
11674

    
11675

    
11676
class LUBackupRemove(NoHooksLU):
11677
  """Remove exports related to the named instance.
11678

11679
  """
11680
  REQ_BGL = False
11681

    
11682
  def ExpandNames(self):
11683
    self.needed_locks = {}
11684
    # We need all nodes to be locked in order for RemoveExport to work, but we
11685
    # don't need to lock the instance itself, as nothing will happen to it (and
11686
    # we can remove exports also for a removed instance)
11687
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11688

    
11689
  def Exec(self, feedback_fn):
11690
    """Remove any export.
11691

11692
    """
11693
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11694
    # If the instance was not found we'll try with the name that was passed in.
11695
    # This will only work if it was an FQDN, though.
11696
    fqdn_warn = False
11697
    if not instance_name:
11698
      fqdn_warn = True
11699
      instance_name = self.op.instance_name
11700

    
11701
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11702
    exportlist = self.rpc.call_export_list(locked_nodes)
11703
    found = False
11704
    for node in exportlist:
11705
      msg = exportlist[node].fail_msg
11706
      if msg:
11707
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11708
        continue
11709
      if instance_name in exportlist[node].payload:
11710
        found = True
11711
        result = self.rpc.call_export_remove(node, instance_name)
11712
        msg = result.fail_msg
11713
        if msg:
11714
          logging.error("Could not remove export for instance %s"
11715
                        " on node %s: %s", instance_name, node, msg)
11716

    
11717
    if fqdn_warn and not found:
11718
      feedback_fn("Export not found. If trying to remove an export belonging"
11719
                  " to a deleted instance please use its Fully Qualified"
11720
                  " Domain Name.")
11721

    
11722

    
11723
class LUGroupAdd(LogicalUnit):
11724
  """Logical unit for creating node groups.
11725

11726
  """
11727
  HPATH = "group-add"
11728
  HTYPE = constants.HTYPE_GROUP
11729
  REQ_BGL = False
11730

    
11731
  def ExpandNames(self):
11732
    # We need the new group's UUID here so that we can create and acquire the
11733
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11734
    # that it should not check whether the UUID exists in the configuration.
11735
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11736
    self.needed_locks = {}
11737
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11738

    
11739
  def CheckPrereq(self):
11740
    """Check prerequisites.
11741

11742
    This checks that the given group name is not an existing node group
11743
    already.
11744

11745
    """
11746
    try:
11747
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11748
    except errors.OpPrereqError:
11749
      pass
11750
    else:
11751
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11752
                                 " node group (UUID: %s)" %
11753
                                 (self.op.group_name, existing_uuid),
11754
                                 errors.ECODE_EXISTS)
11755

    
11756
    if self.op.ndparams:
11757
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11758

    
11759
  def BuildHooksEnv(self):
11760
    """Build hooks env.
11761

11762
    """
11763
    return {
11764
      "GROUP_NAME": self.op.group_name,
11765
      }
11766

    
11767
  def BuildHooksNodes(self):
11768
    """Build hooks nodes.
11769

11770
    """
11771
    mn = self.cfg.GetMasterNode()
11772
    return ([mn], [mn])
11773

    
11774
  def Exec(self, feedback_fn):
11775
    """Add the node group to the cluster.
11776

11777
    """
11778
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11779
                                  uuid=self.group_uuid,
11780
                                  alloc_policy=self.op.alloc_policy,
11781
                                  ndparams=self.op.ndparams)
11782

    
11783
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11784
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11785

    
11786

    
11787
class LUGroupAssignNodes(NoHooksLU):
11788
  """Logical unit for assigning nodes to groups.
11789

11790
  """
11791
  REQ_BGL = False
11792

    
11793
  def ExpandNames(self):
11794
    # These raise errors.OpPrereqError on their own:
11795
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11796
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11797

    
11798
    # We want to lock all the affected nodes and groups. We have readily
11799
    # available the list of nodes, and the *destination* group. To gather the
11800
    # list of "source" groups, we need to fetch node information later on.
11801
    self.needed_locks = {
11802
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11803
      locking.LEVEL_NODE: self.op.nodes,
11804
      }
11805

    
11806
  def DeclareLocks(self, level):
11807
    if level == locking.LEVEL_NODEGROUP:
11808
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11809

    
11810
      # Try to get all affected nodes' groups without having the group or node
11811
      # lock yet. Needs verification later in the code flow.
11812
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11813

    
11814
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11815

    
11816
  def CheckPrereq(self):
11817
    """Check prerequisites.
11818

11819
    """
11820
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11821
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11822
            frozenset(self.op.nodes))
11823

    
11824
    expected_locks = (set([self.group_uuid]) |
11825
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11826
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11827
    if actual_locks != expected_locks:
11828
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11829
                               " current groups are '%s', used to be '%s'" %
11830
                               (utils.CommaJoin(expected_locks),
11831
                                utils.CommaJoin(actual_locks)))
11832

    
11833
    self.node_data = self.cfg.GetAllNodesInfo()
11834
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11835
    instance_data = self.cfg.GetAllInstancesInfo()
11836

    
11837
    if self.group is None:
11838
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11839
                               (self.op.group_name, self.group_uuid))
11840

    
11841
    (new_splits, previous_splits) = \
11842
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11843
                                             for node in self.op.nodes],
11844
                                            self.node_data, instance_data)
11845

    
11846
    if new_splits:
11847
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11848

    
11849
      if not self.op.force:
11850
        raise errors.OpExecError("The following instances get split by this"
11851
                                 " change and --force was not given: %s" %
11852
                                 fmt_new_splits)
11853
      else:
11854
        self.LogWarning("This operation will split the following instances: %s",
11855
                        fmt_new_splits)
11856

    
11857
        if previous_splits:
11858
          self.LogWarning("In addition, these already-split instances continue"
11859
                          " to be split across groups: %s",
11860
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11861

    
11862
  def Exec(self, feedback_fn):
11863
    """Assign nodes to a new group.
11864

11865
    """
11866
    for node in self.op.nodes:
11867
      self.node_data[node].group = self.group_uuid
11868

    
11869
    # FIXME: Depends on side-effects of modifying the result of
11870
    # C{cfg.GetAllNodesInfo}
11871

    
11872
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11873

    
11874
  @staticmethod
11875
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11876
    """Check for split instances after a node assignment.
11877

11878
    This method considers a series of node assignments as an atomic operation,
11879
    and returns information about split instances after applying the set of
11880
    changes.
11881

11882
    In particular, it returns information about newly split instances, and
11883
    instances that were already split, and remain so after the change.
11884

11885
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11886
    considered.
11887

11888
    @type changes: list of (node_name, new_group_uuid) pairs.
11889
    @param changes: list of node assignments to consider.
11890
    @param node_data: a dict with data for all nodes
11891
    @param instance_data: a dict with all instances to consider
11892
    @rtype: a two-tuple
11893
    @return: a list of instances that were previously okay and result split as a
11894
      consequence of this change, and a list of instances that were previously
11895
      split and this change does not fix.
11896

11897
    """
11898
    changed_nodes = dict((node, group) for node, group in changes
11899
                         if node_data[node].group != group)
11900

    
11901
    all_split_instances = set()
11902
    previously_split_instances = set()
11903

    
11904
    def InstanceNodes(instance):
11905
      return [instance.primary_node] + list(instance.secondary_nodes)
11906

    
11907
    for inst in instance_data.values():
11908
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11909
        continue
11910

    
11911
      instance_nodes = InstanceNodes(inst)
11912

    
11913
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11914
        previously_split_instances.add(inst.name)
11915

    
11916
      if len(set(changed_nodes.get(node, node_data[node].group)
11917
                 for node in instance_nodes)) > 1:
11918
        all_split_instances.add(inst.name)
11919

    
11920
    return (list(all_split_instances - previously_split_instances),
11921
            list(previously_split_instances & all_split_instances))
11922

    
11923

    
11924
class _GroupQuery(_QueryBase):
11925
  FIELDS = query.GROUP_FIELDS
11926

    
11927
  def ExpandNames(self, lu):
11928
    lu.needed_locks = {}
11929

    
11930
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11931
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11932

    
11933
    if not self.names:
11934
      self.wanted = [name_to_uuid[name]
11935
                     for name in utils.NiceSort(name_to_uuid.keys())]
11936
    else:
11937
      # Accept names to be either names or UUIDs.
11938
      missing = []
11939
      self.wanted = []
11940
      all_uuid = frozenset(self._all_groups.keys())
11941

    
11942
      for name in self.names:
11943
        if name in all_uuid:
11944
          self.wanted.append(name)
11945
        elif name in name_to_uuid:
11946
          self.wanted.append(name_to_uuid[name])
11947
        else:
11948
          missing.append(name)
11949

    
11950
      if missing:
11951
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11952
                                   utils.CommaJoin(missing),
11953
                                   errors.ECODE_NOENT)
11954

    
11955
  def DeclareLocks(self, lu, level):
11956
    pass
11957

    
11958
  def _GetQueryData(self, lu):
11959
    """Computes the list of node groups and their attributes.
11960

11961
    """
11962
    do_nodes = query.GQ_NODE in self.requested_data
11963
    do_instances = query.GQ_INST in self.requested_data
11964

    
11965
    group_to_nodes = None
11966
    group_to_instances = None
11967

    
11968
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11969
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11970
    # latter GetAllInstancesInfo() is not enough, for we have to go through
11971
    # instance->node. Hence, we will need to process nodes even if we only need
11972
    # instance information.
11973
    if do_nodes or do_instances:
11974
      all_nodes = lu.cfg.GetAllNodesInfo()
11975
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11976
      node_to_group = {}
11977

    
11978
      for node in all_nodes.values():
11979
        if node.group in group_to_nodes:
11980
          group_to_nodes[node.group].append(node.name)
11981
          node_to_group[node.name] = node.group
11982

    
11983
      if do_instances:
11984
        all_instances = lu.cfg.GetAllInstancesInfo()
11985
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
11986

    
11987
        for instance in all_instances.values():
11988
          node = instance.primary_node
11989
          if node in node_to_group:
11990
            group_to_instances[node_to_group[node]].append(instance.name)
11991

    
11992
        if not do_nodes:
11993
          # Do not pass on node information if it was not requested.
11994
          group_to_nodes = None
11995

    
11996
    return query.GroupQueryData([self._all_groups[uuid]
11997
                                 for uuid in self.wanted],
11998
                                group_to_nodes, group_to_instances)
11999

    
12000

    
12001
class LUGroupQuery(NoHooksLU):
12002
  """Logical unit for querying node groups.
12003

12004
  """
12005
  REQ_BGL = False
12006

    
12007
  def CheckArguments(self):
12008
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12009
                          self.op.output_fields, False)
12010

    
12011
  def ExpandNames(self):
12012
    self.gq.ExpandNames(self)
12013

    
12014
  def Exec(self, feedback_fn):
12015
    return self.gq.OldStyleQuery(self)
12016

    
12017

    
12018
class LUGroupSetParams(LogicalUnit):
12019
  """Modifies the parameters of a node group.
12020

12021
  """
12022
  HPATH = "group-modify"
12023
  HTYPE = constants.HTYPE_GROUP
12024
  REQ_BGL = False
12025

    
12026
  def CheckArguments(self):
12027
    all_changes = [
12028
      self.op.ndparams,
12029
      self.op.alloc_policy,
12030
      ]
12031

    
12032
    if all_changes.count(None) == len(all_changes):
12033
      raise errors.OpPrereqError("Please pass at least one modification",
12034
                                 errors.ECODE_INVAL)
12035

    
12036
  def ExpandNames(self):
12037
    # This raises errors.OpPrereqError on its own:
12038
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12039

    
12040
    self.needed_locks = {
12041
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12042
      }
12043

    
12044
  def CheckPrereq(self):
12045
    """Check prerequisites.
12046

12047
    """
12048
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12049

    
12050
    if self.group is None:
12051
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12052
                               (self.op.group_name, self.group_uuid))
12053

    
12054
    if self.op.ndparams:
12055
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12056
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12057
      self.new_ndparams = new_ndparams
12058

    
12059
  def BuildHooksEnv(self):
12060
    """Build hooks env.
12061

12062
    """
12063
    return {
12064
      "GROUP_NAME": self.op.group_name,
12065
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12066
      }
12067

    
12068
  def BuildHooksNodes(self):
12069
    """Build hooks nodes.
12070

12071
    """
12072
    mn = self.cfg.GetMasterNode()
12073
    return ([mn], [mn])
12074

    
12075
  def Exec(self, feedback_fn):
12076
    """Modifies the node group.
12077

12078
    """
12079
    result = []
12080

    
12081
    if self.op.ndparams:
12082
      self.group.ndparams = self.new_ndparams
12083
      result.append(("ndparams", str(self.group.ndparams)))
12084

    
12085
    if self.op.alloc_policy:
12086
      self.group.alloc_policy = self.op.alloc_policy
12087

    
12088
    self.cfg.Update(self.group, feedback_fn)
12089
    return result
12090

    
12091

    
12092

    
12093
class LUGroupRemove(LogicalUnit):
12094
  HPATH = "group-remove"
12095
  HTYPE = constants.HTYPE_GROUP
12096
  REQ_BGL = False
12097

    
12098
  def ExpandNames(self):
12099
    # This will raises errors.OpPrereqError on its own:
12100
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12101
    self.needed_locks = {
12102
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12103
      }
12104

    
12105
  def CheckPrereq(self):
12106
    """Check prerequisites.
12107

12108
    This checks that the given group name exists as a node group, that is
12109
    empty (i.e., contains no nodes), and that is not the last group of the
12110
    cluster.
12111

12112
    """
12113
    # Verify that the group is empty.
12114
    group_nodes = [node.name
12115
                   for node in self.cfg.GetAllNodesInfo().values()
12116
                   if node.group == self.group_uuid]
12117

    
12118
    if group_nodes:
12119
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12120
                                 " nodes: %s" %
12121
                                 (self.op.group_name,
12122
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12123
                                 errors.ECODE_STATE)
12124

    
12125
    # Verify the cluster would not be left group-less.
12126
    if len(self.cfg.GetNodeGroupList()) == 1:
12127
      raise errors.OpPrereqError("Group '%s' is the only group,"
12128
                                 " cannot be removed" %
12129
                                 self.op.group_name,
12130
                                 errors.ECODE_STATE)
12131

    
12132
  def BuildHooksEnv(self):
12133
    """Build hooks env.
12134

12135
    """
12136
    return {
12137
      "GROUP_NAME": self.op.group_name,
12138
      }
12139

    
12140
  def BuildHooksNodes(self):
12141
    """Build hooks nodes.
12142

12143
    """
12144
    mn = self.cfg.GetMasterNode()
12145
    return ([mn], [mn])
12146

    
12147
  def Exec(self, feedback_fn):
12148
    """Remove the node group.
12149

12150
    """
12151
    try:
12152
      self.cfg.RemoveNodeGroup(self.group_uuid)
12153
    except errors.ConfigurationError:
12154
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12155
                               (self.op.group_name, self.group_uuid))
12156

    
12157
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12158

    
12159

    
12160
class LUGroupRename(LogicalUnit):
12161
  HPATH = "group-rename"
12162
  HTYPE = constants.HTYPE_GROUP
12163
  REQ_BGL = False
12164

    
12165
  def ExpandNames(self):
12166
    # This raises errors.OpPrereqError on its own:
12167
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12168

    
12169
    self.needed_locks = {
12170
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12171
      }
12172

    
12173
  def CheckPrereq(self):
12174
    """Check prerequisites.
12175

12176
    Ensures requested new name is not yet used.
12177

12178
    """
12179
    try:
12180
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12181
    except errors.OpPrereqError:
12182
      pass
12183
    else:
12184
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12185
                                 " node group (UUID: %s)" %
12186
                                 (self.op.new_name, new_name_uuid),
12187
                                 errors.ECODE_EXISTS)
12188

    
12189
  def BuildHooksEnv(self):
12190
    """Build hooks env.
12191

12192
    """
12193
    return {
12194
      "OLD_NAME": self.op.group_name,
12195
      "NEW_NAME": self.op.new_name,
12196
      }
12197

    
12198
  def BuildHooksNodes(self):
12199
    """Build hooks nodes.
12200

12201
    """
12202
    mn = self.cfg.GetMasterNode()
12203

    
12204
    all_nodes = self.cfg.GetAllNodesInfo()
12205
    all_nodes.pop(mn, None)
12206

    
12207
    run_nodes = [mn]
12208
    run_nodes.extend(node.name for node in all_nodes.values()
12209
                     if node.group == self.group_uuid)
12210

    
12211
    return (run_nodes, run_nodes)
12212

    
12213
  def Exec(self, feedback_fn):
12214
    """Rename the node group.
12215

12216
    """
12217
    group = self.cfg.GetNodeGroup(self.group_uuid)
12218

    
12219
    if group is None:
12220
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12221
                               (self.op.group_name, self.group_uuid))
12222

    
12223
    group.name = self.op.new_name
12224
    self.cfg.Update(group, feedback_fn)
12225

    
12226
    return self.op.new_name
12227

    
12228

    
12229
class LUGroupEvacuate(LogicalUnit):
12230
  HPATH = "group-evacuate"
12231
  HTYPE = constants.HTYPE_GROUP
12232
  REQ_BGL = False
12233

    
12234
  def ExpandNames(self):
12235
    # This raises errors.OpPrereqError on its own:
12236
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12237

    
12238
    if self.op.target_groups:
12239
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12240
                                  self.op.target_groups)
12241
    else:
12242
      self.req_target_uuids = []
12243

    
12244
    if self.group_uuid in self.req_target_uuids:
12245
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12246
                                 " as a target group (targets are %s)" %
12247
                                 (self.group_uuid,
12248
                                  utils.CommaJoin(self.req_target_uuids)),
12249
                                 errors.ECODE_INVAL)
12250

    
12251
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12252

    
12253
    self.share_locks = _ShareAll()
12254
    self.needed_locks = {
12255
      locking.LEVEL_INSTANCE: [],
12256
      locking.LEVEL_NODEGROUP: [],
12257
      locking.LEVEL_NODE: [],
12258
      }
12259

    
12260
  def DeclareLocks(self, level):
12261
    if level == locking.LEVEL_INSTANCE:
12262
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12263

    
12264
      # Lock instances optimistically, needs verification once node and group
12265
      # locks have been acquired
12266
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12267
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12268

    
12269
    elif level == locking.LEVEL_NODEGROUP:
12270
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12271

    
12272
      if self.req_target_uuids:
12273
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12274

    
12275
        # Lock all groups used by instances optimistically; this requires going
12276
        # via the node before it's locked, requiring verification later on
12277
        lock_groups.update(group_uuid
12278
                           for instance_name in
12279
                             self.owned_locks(locking.LEVEL_INSTANCE)
12280
                           for group_uuid in
12281
                             self.cfg.GetInstanceNodeGroups(instance_name))
12282
      else:
12283
        # No target groups, need to lock all of them
12284
        lock_groups = locking.ALL_SET
12285

    
12286
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12287

    
12288
    elif level == locking.LEVEL_NODE:
12289
      # This will only lock the nodes in the group to be evacuated which
12290
      # contain actual instances
12291
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12292
      self._LockInstancesNodes()
12293

    
12294
      # Lock all nodes in group to be evacuated and target groups
12295
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12296
      assert self.group_uuid in owned_groups
12297
      member_nodes = [node_name
12298
                      for group in owned_groups
12299
                      for node_name in self.cfg.GetNodeGroup(group).members]
12300
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12301

    
12302
  def CheckPrereq(self):
12303
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12304
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12305
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12306

    
12307
    assert owned_groups.issuperset(self.req_target_uuids)
12308
    assert self.group_uuid in owned_groups
12309

    
12310
    # Check if locked instances are still correct
12311
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12312

    
12313
    # Get instance information
12314
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12315

    
12316
    # Check if node groups for locked instances are still correct
12317
    for instance_name in owned_instances:
12318
      inst = self.instances[instance_name]
12319
      assert owned_nodes.issuperset(inst.all_nodes), \
12320
        "Instance %s's nodes changed while we kept the lock" % instance_name
12321

    
12322
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12323
                                             owned_groups)
12324

    
12325
      assert self.group_uuid in inst_groups, \
12326
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12327

    
12328
    if self.req_target_uuids:
12329
      # User requested specific target groups
12330
      self.target_uuids = self.req_target_uuids
12331
    else:
12332
      # All groups except the one to be evacuated are potential targets
12333
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12334
                           if group_uuid != self.group_uuid]
12335

    
12336
      if not self.target_uuids:
12337
        raise errors.OpPrereqError("There are no possible target groups",
12338
                                   errors.ECODE_INVAL)
12339

    
12340
  def BuildHooksEnv(self):
12341
    """Build hooks env.
12342

12343
    """
12344
    return {
12345
      "GROUP_NAME": self.op.group_name,
12346
      "TARGET_GROUPS": " ".join(self.target_uuids),
12347
      }
12348

    
12349
  def BuildHooksNodes(self):
12350
    """Build hooks nodes.
12351

12352
    """
12353
    mn = self.cfg.GetMasterNode()
12354

    
12355
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12356

    
12357
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12358

    
12359
    return (run_nodes, run_nodes)
12360

    
12361
  def Exec(self, feedback_fn):
12362
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12363

    
12364
    assert self.group_uuid not in self.target_uuids
12365

    
12366
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12367
                     instances=instances, target_groups=self.target_uuids)
12368

    
12369
    ial.Run(self.op.iallocator)
12370

    
12371
    if not ial.success:
12372
      raise errors.OpPrereqError("Can't compute group evacuation using"
12373
                                 " iallocator '%s': %s" %
12374
                                 (self.op.iallocator, ial.info),
12375
                                 errors.ECODE_NORES)
12376

    
12377
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12378

    
12379
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12380
                 len(jobs), self.op.group_name)
12381

    
12382
    return ResultWithJobs(jobs)
12383

    
12384

    
12385
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12386
  """Generic tags LU.
12387

12388
  This is an abstract class which is the parent of all the other tags LUs.
12389

12390
  """
12391
  def ExpandNames(self):
12392
    self.group_uuid = None
12393
    self.needed_locks = {}
12394
    if self.op.kind == constants.TAG_NODE:
12395
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12396
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12397
    elif self.op.kind == constants.TAG_INSTANCE:
12398
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12399
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12400
    elif self.op.kind == constants.TAG_NODEGROUP:
12401
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12402

    
12403
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12404
    # not possible to acquire the BGL based on opcode parameters)
12405

    
12406
  def CheckPrereq(self):
12407
    """Check prerequisites.
12408

12409
    """
12410
    if self.op.kind == constants.TAG_CLUSTER:
12411
      self.target = self.cfg.GetClusterInfo()
12412
    elif self.op.kind == constants.TAG_NODE:
12413
      self.target = self.cfg.GetNodeInfo(self.op.name)
12414
    elif self.op.kind == constants.TAG_INSTANCE:
12415
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12416
    elif self.op.kind == constants.TAG_NODEGROUP:
12417
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12418
    else:
12419
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12420
                                 str(self.op.kind), errors.ECODE_INVAL)
12421

    
12422

    
12423
class LUTagsGet(TagsLU):
12424
  """Returns the tags of a given object.
12425

12426
  """
12427
  REQ_BGL = False
12428

    
12429
  def ExpandNames(self):
12430
    TagsLU.ExpandNames(self)
12431

    
12432
    # Share locks as this is only a read operation
12433
    self.share_locks = _ShareAll()
12434

    
12435
  def Exec(self, feedback_fn):
12436
    """Returns the tag list.
12437

12438
    """
12439
    return list(self.target.GetTags())
12440

    
12441

    
12442
class LUTagsSearch(NoHooksLU):
12443
  """Searches the tags for a given pattern.
12444

12445
  """
12446
  REQ_BGL = False
12447

    
12448
  def ExpandNames(self):
12449
    self.needed_locks = {}
12450

    
12451
  def CheckPrereq(self):
12452
    """Check prerequisites.
12453

12454
    This checks the pattern passed for validity by compiling it.
12455

12456
    """
12457
    try:
12458
      self.re = re.compile(self.op.pattern)
12459
    except re.error, err:
12460
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12461
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12462

    
12463
  def Exec(self, feedback_fn):
12464
    """Returns the tag list.
12465

12466
    """
12467
    cfg = self.cfg
12468
    tgts = [("/cluster", cfg.GetClusterInfo())]
12469
    ilist = cfg.GetAllInstancesInfo().values()
12470
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12471
    nlist = cfg.GetAllNodesInfo().values()
12472
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12473
    tgts.extend(("/nodegroup/%s" % n.name, n)
12474
                for n in cfg.GetAllNodeGroupsInfo().values())
12475
    results = []
12476
    for path, target in tgts:
12477
      for tag in target.GetTags():
12478
        if self.re.search(tag):
12479
          results.append((path, tag))
12480
    return results
12481

    
12482

    
12483
class LUTagsSet(TagsLU):
12484
  """Sets a tag on a given object.
12485

12486
  """
12487
  REQ_BGL = False
12488

    
12489
  def CheckPrereq(self):
12490
    """Check prerequisites.
12491

12492
    This checks the type and length of the tag name and value.
12493

12494
    """
12495
    TagsLU.CheckPrereq(self)
12496
    for tag in self.op.tags:
12497
      objects.TaggableObject.ValidateTag(tag)
12498

    
12499
  def Exec(self, feedback_fn):
12500
    """Sets the tag.
12501

12502
    """
12503
    try:
12504
      for tag in self.op.tags:
12505
        self.target.AddTag(tag)
12506
    except errors.TagError, err:
12507
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12508
    self.cfg.Update(self.target, feedback_fn)
12509

    
12510

    
12511
class LUTagsDel(TagsLU):
12512
  """Delete a list of tags from a given object.
12513

12514
  """
12515
  REQ_BGL = False
12516

    
12517
  def CheckPrereq(self):
12518
    """Check prerequisites.
12519

12520
    This checks that we have the given tag.
12521

12522
    """
12523
    TagsLU.CheckPrereq(self)
12524
    for tag in self.op.tags:
12525
      objects.TaggableObject.ValidateTag(tag)
12526
    del_tags = frozenset(self.op.tags)
12527
    cur_tags = self.target.GetTags()
12528

    
12529
    diff_tags = del_tags - cur_tags
12530
    if diff_tags:
12531
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12532
      raise errors.OpPrereqError("Tag(s) %s not found" %
12533
                                 (utils.CommaJoin(diff_names), ),
12534
                                 errors.ECODE_NOENT)
12535

    
12536
  def Exec(self, feedback_fn):
12537
    """Remove the tag from the object.
12538

12539
    """
12540
    for tag in self.op.tags:
12541
      self.target.RemoveTag(tag)
12542
    self.cfg.Update(self.target, feedback_fn)
12543

    
12544

    
12545
class LUTestDelay(NoHooksLU):
12546
  """Sleep for a specified amount of time.
12547

12548
  This LU sleeps on the master and/or nodes for a specified amount of
12549
  time.
12550

12551
  """
12552
  REQ_BGL = False
12553

    
12554
  def ExpandNames(self):
12555
    """Expand names and set required locks.
12556

12557
    This expands the node list, if any.
12558

12559
    """
12560
    self.needed_locks = {}
12561
    if self.op.on_nodes:
12562
      # _GetWantedNodes can be used here, but is not always appropriate to use
12563
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12564
      # more information.
12565
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12566
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12567

    
12568
  def _TestDelay(self):
12569
    """Do the actual sleep.
12570

12571
    """
12572
    if self.op.on_master:
12573
      if not utils.TestDelay(self.op.duration):
12574
        raise errors.OpExecError("Error during master delay test")
12575
    if self.op.on_nodes:
12576
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12577
      for node, node_result in result.items():
12578
        node_result.Raise("Failure during rpc call to node %s" % node)
12579

    
12580
  def Exec(self, feedback_fn):
12581
    """Execute the test delay opcode, with the wanted repetitions.
12582

12583
    """
12584
    if self.op.repeat == 0:
12585
      self._TestDelay()
12586
    else:
12587
      top_value = self.op.repeat - 1
12588
      for i in range(self.op.repeat):
12589
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12590
        self._TestDelay()
12591

    
12592

    
12593
class LUTestJqueue(NoHooksLU):
12594
  """Utility LU to test some aspects of the job queue.
12595

12596
  """
12597
  REQ_BGL = False
12598

    
12599
  # Must be lower than default timeout for WaitForJobChange to see whether it
12600
  # notices changed jobs
12601
  _CLIENT_CONNECT_TIMEOUT = 20.0
12602
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12603

    
12604
  @classmethod
12605
  def _NotifyUsingSocket(cls, cb, errcls):
12606
    """Opens a Unix socket and waits for another program to connect.
12607

12608
    @type cb: callable
12609
    @param cb: Callback to send socket name to client
12610
    @type errcls: class
12611
    @param errcls: Exception class to use for errors
12612

12613
    """
12614
    # Using a temporary directory as there's no easy way to create temporary
12615
    # sockets without writing a custom loop around tempfile.mktemp and
12616
    # socket.bind
12617
    tmpdir = tempfile.mkdtemp()
12618
    try:
12619
      tmpsock = utils.PathJoin(tmpdir, "sock")
12620

    
12621
      logging.debug("Creating temporary socket at %s", tmpsock)
12622
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12623
      try:
12624
        sock.bind(tmpsock)
12625
        sock.listen(1)
12626

    
12627
        # Send details to client
12628
        cb(tmpsock)
12629

    
12630
        # Wait for client to connect before continuing
12631
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12632
        try:
12633
          (conn, _) = sock.accept()
12634
        except socket.error, err:
12635
          raise errcls("Client didn't connect in time (%s)" % err)
12636
      finally:
12637
        sock.close()
12638
    finally:
12639
      # Remove as soon as client is connected
12640
      shutil.rmtree(tmpdir)
12641

    
12642
    # Wait for client to close
12643
    try:
12644
      try:
12645
        # pylint: disable-msg=E1101
12646
        # Instance of '_socketobject' has no ... member
12647
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12648
        conn.recv(1)
12649
      except socket.error, err:
12650
        raise errcls("Client failed to confirm notification (%s)" % err)
12651
    finally:
12652
      conn.close()
12653

    
12654
  def _SendNotification(self, test, arg, sockname):
12655
    """Sends a notification to the client.
12656

12657
    @type test: string
12658
    @param test: Test name
12659
    @param arg: Test argument (depends on test)
12660
    @type sockname: string
12661
    @param sockname: Socket path
12662

12663
    """
12664
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12665

    
12666
  def _Notify(self, prereq, test, arg):
12667
    """Notifies the client of a test.
12668

12669
    @type prereq: bool
12670
    @param prereq: Whether this is a prereq-phase test
12671
    @type test: string
12672
    @param test: Test name
12673
    @param arg: Test argument (depends on test)
12674

12675
    """
12676
    if prereq:
12677
      errcls = errors.OpPrereqError
12678
    else:
12679
      errcls = errors.OpExecError
12680

    
12681
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12682
                                                  test, arg),
12683
                                   errcls)
12684

    
12685
  def CheckArguments(self):
12686
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12687
    self.expandnames_calls = 0
12688

    
12689
  def ExpandNames(self):
12690
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12691
    if checkargs_calls < 1:
12692
      raise errors.ProgrammerError("CheckArguments was not called")
12693

    
12694
    self.expandnames_calls += 1
12695

    
12696
    if self.op.notify_waitlock:
12697
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12698

    
12699
    self.LogInfo("Expanding names")
12700

    
12701
    # Get lock on master node (just to get a lock, not for a particular reason)
12702
    self.needed_locks = {
12703
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12704
      }
12705

    
12706
  def Exec(self, feedback_fn):
12707
    if self.expandnames_calls < 1:
12708
      raise errors.ProgrammerError("ExpandNames was not called")
12709

    
12710
    if self.op.notify_exec:
12711
      self._Notify(False, constants.JQT_EXEC, None)
12712

    
12713
    self.LogInfo("Executing")
12714

    
12715
    if self.op.log_messages:
12716
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12717
      for idx, msg in enumerate(self.op.log_messages):
12718
        self.LogInfo("Sending log message %s", idx + 1)
12719
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12720
        # Report how many test messages have been sent
12721
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12722

    
12723
    if self.op.fail:
12724
      raise errors.OpExecError("Opcode failure was requested")
12725

    
12726
    return True
12727

    
12728

    
12729
class IAllocator(object):
12730
  """IAllocator framework.
12731

12732
  An IAllocator instance has three sets of attributes:
12733
    - cfg that is needed to query the cluster
12734
    - input data (all members of the _KEYS class attribute are required)
12735
    - four buffer attributes (in|out_data|text), that represent the
12736
      input (to the external script) in text and data structure format,
12737
      and the output from it, again in two formats
12738
    - the result variables from the script (success, info, nodes) for
12739
      easy usage
12740

12741
  """
12742
  # pylint: disable-msg=R0902
12743
  # lots of instance attributes
12744

    
12745
  def __init__(self, cfg, rpc, mode, **kwargs):
12746
    self.cfg = cfg
12747
    self.rpc = rpc
12748
    # init buffer variables
12749
    self.in_text = self.out_text = self.in_data = self.out_data = None
12750
    # init all input fields so that pylint is happy
12751
    self.mode = mode
12752
    self.memory = self.disks = self.disk_template = None
12753
    self.os = self.tags = self.nics = self.vcpus = None
12754
    self.hypervisor = None
12755
    self.relocate_from = None
12756
    self.name = None
12757
    self.evac_nodes = None
12758
    self.instances = None
12759
    self.evac_mode = None
12760
    self.target_groups = []
12761
    # computed fields
12762
    self.required_nodes = None
12763
    # init result fields
12764
    self.success = self.info = self.result = None
12765

    
12766
    try:
12767
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12768
    except KeyError:
12769
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12770
                                   " IAllocator" % self.mode)
12771

    
12772
    keyset = [n for (n, _) in keydata]
12773

    
12774
    for key in kwargs:
12775
      if key not in keyset:
12776
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12777
                                     " IAllocator" % key)
12778
      setattr(self, key, kwargs[key])
12779

    
12780
    for key in keyset:
12781
      if key not in kwargs:
12782
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12783
                                     " IAllocator" % key)
12784
    self._BuildInputData(compat.partial(fn, self), keydata)
12785

    
12786
  def _ComputeClusterData(self):
12787
    """Compute the generic allocator input data.
12788

12789
    This is the data that is independent of the actual operation.
12790

12791
    """
12792
    cfg = self.cfg
12793
    cluster_info = cfg.GetClusterInfo()
12794
    # cluster data
12795
    data = {
12796
      "version": constants.IALLOCATOR_VERSION,
12797
      "cluster_name": cfg.GetClusterName(),
12798
      "cluster_tags": list(cluster_info.GetTags()),
12799
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12800
      # we don't have job IDs
12801
      }
12802
    ninfo = cfg.GetAllNodesInfo()
12803
    iinfo = cfg.GetAllInstancesInfo().values()
12804
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12805

    
12806
    # node data
12807
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12808

    
12809
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12810
      hypervisor_name = self.hypervisor
12811
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12812
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12813
    else:
12814
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12815

    
12816
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12817
                                        hypervisor_name)
12818
    node_iinfo = \
12819
      self.rpc.call_all_instances_info(node_list,
12820
                                       cluster_info.enabled_hypervisors)
12821

    
12822
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12823

    
12824
    config_ndata = self._ComputeBasicNodeData(ninfo)
12825
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12826
                                                 i_list, config_ndata)
12827
    assert len(data["nodes"]) == len(ninfo), \
12828
        "Incomplete node data computed"
12829

    
12830
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12831

    
12832
    self.in_data = data
12833

    
12834
  @staticmethod
12835
  def _ComputeNodeGroupData(cfg):
12836
    """Compute node groups data.
12837

12838
    """
12839
    ng = dict((guuid, {
12840
      "name": gdata.name,
12841
      "alloc_policy": gdata.alloc_policy,
12842
      })
12843
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12844

    
12845
    return ng
12846

    
12847
  @staticmethod
12848
  def _ComputeBasicNodeData(node_cfg):
12849
    """Compute global node data.
12850

12851
    @rtype: dict
12852
    @returns: a dict of name: (node dict, node config)
12853

12854
    """
12855
    # fill in static (config-based) values
12856
    node_results = dict((ninfo.name, {
12857
      "tags": list(ninfo.GetTags()),
12858
      "primary_ip": ninfo.primary_ip,
12859
      "secondary_ip": ninfo.secondary_ip,
12860
      "offline": ninfo.offline,
12861
      "drained": ninfo.drained,
12862
      "master_candidate": ninfo.master_candidate,
12863
      "group": ninfo.group,
12864
      "master_capable": ninfo.master_capable,
12865
      "vm_capable": ninfo.vm_capable,
12866
      })
12867
      for ninfo in node_cfg.values())
12868

    
12869
    return node_results
12870

    
12871
  @staticmethod
12872
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12873
                              node_results):
12874
    """Compute global node data.
12875

12876
    @param node_results: the basic node structures as filled from the config
12877

12878
    """
12879
    # make a copy of the current dict
12880
    node_results = dict(node_results)
12881
    for nname, nresult in node_data.items():
12882
      assert nname in node_results, "Missing basic data for node %s" % nname
12883
      ninfo = node_cfg[nname]
12884

    
12885
      if not (ninfo.offline or ninfo.drained):
12886
        nresult.Raise("Can't get data for node %s" % nname)
12887
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12888
                                nname)
12889
        remote_info = nresult.payload
12890

    
12891
        for attr in ["memory_total", "memory_free", "memory_dom0",
12892
                     "vg_size", "vg_free", "cpu_total"]:
12893
          if attr not in remote_info:
12894
            raise errors.OpExecError("Node '%s' didn't return attribute"
12895
                                     " '%s'" % (nname, attr))
12896
          if not isinstance(remote_info[attr], int):
12897
            raise errors.OpExecError("Node '%s' returned invalid value"
12898
                                     " for '%s': %s" %
12899
                                     (nname, attr, remote_info[attr]))
12900
        # compute memory used by primary instances
12901
        i_p_mem = i_p_up_mem = 0
12902
        for iinfo, beinfo in i_list:
12903
          if iinfo.primary_node == nname:
12904
            i_p_mem += beinfo[constants.BE_MEMORY]
12905
            if iinfo.name not in node_iinfo[nname].payload:
12906
              i_used_mem = 0
12907
            else:
12908
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12909
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12910
            remote_info["memory_free"] -= max(0, i_mem_diff)
12911

    
12912
            if iinfo.admin_up:
12913
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12914

    
12915
        # compute memory used by instances
12916
        pnr_dyn = {
12917
          "total_memory": remote_info["memory_total"],
12918
          "reserved_memory": remote_info["memory_dom0"],
12919
          "free_memory": remote_info["memory_free"],
12920
          "total_disk": remote_info["vg_size"],
12921
          "free_disk": remote_info["vg_free"],
12922
          "total_cpus": remote_info["cpu_total"],
12923
          "i_pri_memory": i_p_mem,
12924
          "i_pri_up_memory": i_p_up_mem,
12925
          }
12926
        pnr_dyn.update(node_results[nname])
12927
        node_results[nname] = pnr_dyn
12928

    
12929
    return node_results
12930

    
12931
  @staticmethod
12932
  def _ComputeInstanceData(cluster_info, i_list):
12933
    """Compute global instance data.
12934

12935
    """
12936
    instance_data = {}
12937
    for iinfo, beinfo in i_list:
12938
      nic_data = []
12939
      for nic in iinfo.nics:
12940
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12941
        nic_dict = {
12942
          "mac": nic.mac,
12943
          "ip": nic.ip,
12944
          "mode": filled_params[constants.NIC_MODE],
12945
          "link": filled_params[constants.NIC_LINK],
12946
          }
12947
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12948
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12949
        nic_data.append(nic_dict)
12950
      pir = {
12951
        "tags": list(iinfo.GetTags()),
12952
        "admin_up": iinfo.admin_up,
12953
        "vcpus": beinfo[constants.BE_VCPUS],
12954
        "memory": beinfo[constants.BE_MEMORY],
12955
        "os": iinfo.os,
12956
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12957
        "nics": nic_data,
12958
        "disks": [{constants.IDISK_SIZE: dsk.size,
12959
                   constants.IDISK_MODE: dsk.mode}
12960
                  for dsk in iinfo.disks],
12961
        "disk_template": iinfo.disk_template,
12962
        "hypervisor": iinfo.hypervisor,
12963
        }
12964
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12965
                                                 pir["disks"])
12966
      instance_data[iinfo.name] = pir
12967

    
12968
    return instance_data
12969

    
12970
  def _AddNewInstance(self):
12971
    """Add new instance data to allocator structure.
12972

12973
    This in combination with _AllocatorGetClusterData will create the
12974
    correct structure needed as input for the allocator.
12975

12976
    The checks for the completeness of the opcode must have already been
12977
    done.
12978

12979
    """
12980
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12981

    
12982
    if self.disk_template in constants.DTS_INT_MIRROR:
12983
      self.required_nodes = 2
12984
    else:
12985
      self.required_nodes = 1
12986

    
12987
    request = {
12988
      "name": self.name,
12989
      "disk_template": self.disk_template,
12990
      "tags": self.tags,
12991
      "os": self.os,
12992
      "vcpus": self.vcpus,
12993
      "memory": self.memory,
12994
      "disks": self.disks,
12995
      "disk_space_total": disk_space,
12996
      "nics": self.nics,
12997
      "required_nodes": self.required_nodes,
12998
      "hypervisor": self.hypervisor,
12999
      }
13000

    
13001
    return request
13002

    
13003
  def _AddRelocateInstance(self):
13004
    """Add relocate instance data to allocator structure.
13005

13006
    This in combination with _IAllocatorGetClusterData will create the
13007
    correct structure needed as input for the allocator.
13008

13009
    The checks for the completeness of the opcode must have already been
13010
    done.
13011

13012
    """
13013
    instance = self.cfg.GetInstanceInfo(self.name)
13014
    if instance is None:
13015
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13016
                                   " IAllocator" % self.name)
13017

    
13018
    if instance.disk_template not in constants.DTS_MIRRORED:
13019
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13020
                                 errors.ECODE_INVAL)
13021

    
13022
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13023
        len(instance.secondary_nodes) != 1:
13024
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13025
                                 errors.ECODE_STATE)
13026

    
13027
    self.required_nodes = 1
13028
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13029
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13030

    
13031
    request = {
13032
      "name": self.name,
13033
      "disk_space_total": disk_space,
13034
      "required_nodes": self.required_nodes,
13035
      "relocate_from": self.relocate_from,
13036
      }
13037
    return request
13038

    
13039
  def _AddEvacuateNodes(self):
13040
    """Add evacuate nodes data to allocator structure.
13041

13042
    """
13043
    request = {
13044
      "evac_nodes": self.evac_nodes
13045
      }
13046
    return request
13047

    
13048
  def _AddNodeEvacuate(self):
13049
    """Get data for node-evacuate requests.
13050

13051
    """
13052
    return {
13053
      "instances": self.instances,
13054
      "evac_mode": self.evac_mode,
13055
      }
13056

    
13057
  def _AddChangeGroup(self):
13058
    """Get data for node-evacuate requests.
13059

13060
    """
13061
    return {
13062
      "instances": self.instances,
13063
      "target_groups": self.target_groups,
13064
      }
13065

    
13066
  def _BuildInputData(self, fn, keydata):
13067
    """Build input data structures.
13068

13069
    """
13070
    self._ComputeClusterData()
13071

    
13072
    request = fn()
13073
    request["type"] = self.mode
13074
    for keyname, keytype in keydata:
13075
      if keyname not in request:
13076
        raise errors.ProgrammerError("Request parameter %s is missing" %
13077
                                     keyname)
13078
      val = request[keyname]
13079
      if not keytype(val):
13080
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13081
                                     " validation, value %s, expected"
13082
                                     " type %s" % (keyname, val, keytype))
13083
    self.in_data["request"] = request
13084

    
13085
    self.in_text = serializer.Dump(self.in_data)
13086

    
13087
  _STRING_LIST = ht.TListOf(ht.TString)
13088
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13089
     # pylint: disable-msg=E1101
13090
     # Class '...' has no 'OP_ID' member
13091
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13092
                          opcodes.OpInstanceMigrate.OP_ID,
13093
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13094
     })))
13095

    
13096
  _NEVAC_MOVED = \
13097
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13098
                       ht.TItems([ht.TNonEmptyString,
13099
                                  ht.TNonEmptyString,
13100
                                  ht.TListOf(ht.TNonEmptyString),
13101
                                 ])))
13102
  _NEVAC_FAILED = \
13103
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13104
                       ht.TItems([ht.TNonEmptyString,
13105
                                  ht.TMaybeString,
13106
                                 ])))
13107
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13108
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13109

    
13110
  _MODE_DATA = {
13111
    constants.IALLOCATOR_MODE_ALLOC:
13112
      (_AddNewInstance,
13113
       [
13114
        ("name", ht.TString),
13115
        ("memory", ht.TInt),
13116
        ("disks", ht.TListOf(ht.TDict)),
13117
        ("disk_template", ht.TString),
13118
        ("os", ht.TString),
13119
        ("tags", _STRING_LIST),
13120
        ("nics", ht.TListOf(ht.TDict)),
13121
        ("vcpus", ht.TInt),
13122
        ("hypervisor", ht.TString),
13123
        ], ht.TList),
13124
    constants.IALLOCATOR_MODE_RELOC:
13125
      (_AddRelocateInstance,
13126
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13127
       ht.TList),
13128
    constants.IALLOCATOR_MODE_MEVAC:
13129
      (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
13130
       ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
13131
     constants.IALLOCATOR_MODE_NODE_EVAC:
13132
      (_AddNodeEvacuate, [
13133
        ("instances", _STRING_LIST),
13134
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13135
        ], _NEVAC_RESULT),
13136
     constants.IALLOCATOR_MODE_CHG_GROUP:
13137
      (_AddChangeGroup, [
13138
        ("instances", _STRING_LIST),
13139
        ("target_groups", _STRING_LIST),
13140
        ], _NEVAC_RESULT),
13141
    }
13142

    
13143
  def Run(self, name, validate=True, call_fn=None):
13144
    """Run an instance allocator and return the results.
13145

13146
    """
13147
    if call_fn is None:
13148
      call_fn = self.rpc.call_iallocator_runner
13149

    
13150
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13151
    result.Raise("Failure while running the iallocator script")
13152

    
13153
    self.out_text = result.payload
13154
    if validate:
13155
      self._ValidateResult()
13156

    
13157
  def _ValidateResult(self):
13158
    """Process the allocator results.
13159

13160
    This will process and if successful save the result in
13161
    self.out_data and the other parameters.
13162

13163
    """
13164
    try:
13165
      rdict = serializer.Load(self.out_text)
13166
    except Exception, err:
13167
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13168

    
13169
    if not isinstance(rdict, dict):
13170
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13171

    
13172
    # TODO: remove backwards compatiblity in later versions
13173
    if "nodes" in rdict and "result" not in rdict:
13174
      rdict["result"] = rdict["nodes"]
13175
      del rdict["nodes"]
13176

    
13177
    for key in "success", "info", "result":
13178
      if key not in rdict:
13179
        raise errors.OpExecError("Can't parse iallocator results:"
13180
                                 " missing key '%s'" % key)
13181
      setattr(self, key, rdict[key])
13182

    
13183
    if not self._result_check(self.result):
13184
      raise errors.OpExecError("Iallocator returned invalid result,"
13185
                               " expected %s, got %s" %
13186
                               (self._result_check, self.result),
13187
                               errors.ECODE_INVAL)
13188

    
13189
    if self.mode in (constants.IALLOCATOR_MODE_RELOC,
13190
                     constants.IALLOCATOR_MODE_MEVAC):
13191
      node2group = dict((name, ndata["group"])
13192
                        for (name, ndata) in self.in_data["nodes"].items())
13193

    
13194
      fn = compat.partial(self._NodesToGroups, node2group,
13195
                          self.in_data["nodegroups"])
13196

    
13197
      if self.mode == constants.IALLOCATOR_MODE_RELOC:
13198
        assert self.relocate_from is not None
13199
        assert self.required_nodes == 1
13200

    
13201
        request_groups = fn(self.relocate_from)
13202
        result_groups = fn(rdict["result"])
13203

    
13204
        if result_groups != request_groups:
13205
          raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13206
                                   " differ from original groups (%s)" %
13207
                                   (utils.CommaJoin(result_groups),
13208
                                    utils.CommaJoin(request_groups)))
13209
      elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
13210
        request_groups = fn(self.evac_nodes)
13211
        for (instance_name, secnode) in self.result:
13212
          result_groups = fn([secnode])
13213
          if result_groups != request_groups:
13214
            raise errors.OpExecError("Iallocator returned new secondary node"
13215
                                     " '%s' (group '%s') for instance '%s'"
13216
                                     " which is not in original group '%s'" %
13217
                                     (secnode, utils.CommaJoin(result_groups),
13218
                                      instance_name,
13219
                                      utils.CommaJoin(request_groups)))
13220
      else:
13221
        raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13222

    
13223
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13224
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13225

    
13226
    self.out_data = rdict
13227

    
13228
  @staticmethod
13229
  def _NodesToGroups(node2group, groups, nodes):
13230
    """Returns a list of unique group names for a list of nodes.
13231

13232
    @type node2group: dict
13233
    @param node2group: Map from node name to group UUID
13234
    @type groups: dict
13235
    @param groups: Group information
13236
    @type nodes: list
13237
    @param nodes: Node names
13238

13239
    """
13240
    result = set()
13241

    
13242
    for node in nodes:
13243
      try:
13244
        group_uuid = node2group[node]
13245
      except KeyError:
13246
        # Ignore unknown node
13247
        pass
13248
      else:
13249
        try:
13250
          group = groups[group_uuid]
13251
        except KeyError:
13252
          # Can't find group, let's use UUID
13253
          group_name = group_uuid
13254
        else:
13255
          group_name = group["name"]
13256

    
13257
        result.add(group_name)
13258

    
13259
    return sorted(result)
13260

    
13261

    
13262
class LUTestAllocator(NoHooksLU):
13263
  """Run allocator tests.
13264

13265
  This LU runs the allocator tests
13266

13267
  """
13268
  def CheckPrereq(self):
13269
    """Check prerequisites.
13270

13271
    This checks the opcode parameters depending on the director and mode test.
13272

13273
    """
13274
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13275
      for attr in ["memory", "disks", "disk_template",
13276
                   "os", "tags", "nics", "vcpus"]:
13277
        if not hasattr(self.op, attr):
13278
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13279
                                     attr, errors.ECODE_INVAL)
13280
      iname = self.cfg.ExpandInstanceName(self.op.name)
13281
      if iname is not None:
13282
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13283
                                   iname, errors.ECODE_EXISTS)
13284
      if not isinstance(self.op.nics, list):
13285
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13286
                                   errors.ECODE_INVAL)
13287
      if not isinstance(self.op.disks, list):
13288
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13289
                                   errors.ECODE_INVAL)
13290
      for row in self.op.disks:
13291
        if (not isinstance(row, dict) or
13292
            constants.IDISK_SIZE not in row or
13293
            not isinstance(row[constants.IDISK_SIZE], int) or
13294
            constants.IDISK_MODE not in row or
13295
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13296
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13297
                                     " parameter", errors.ECODE_INVAL)
13298
      if self.op.hypervisor is None:
13299
        self.op.hypervisor = self.cfg.GetHypervisorType()
13300
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13301
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13302
      self.op.name = fname
13303
      self.relocate_from = \
13304
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13305
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13306
      if not hasattr(self.op, "evac_nodes"):
13307
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13308
                                   " opcode input", errors.ECODE_INVAL)
13309
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13310
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13311
      if not self.op.instances:
13312
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13313
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13314
    else:
13315
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13316
                                 self.op.mode, errors.ECODE_INVAL)
13317

    
13318
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13319
      if self.op.allocator is None:
13320
        raise errors.OpPrereqError("Missing allocator name",
13321
                                   errors.ECODE_INVAL)
13322
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13323
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13324
                                 self.op.direction, errors.ECODE_INVAL)
13325

    
13326
  def Exec(self, feedback_fn):
13327
    """Run the allocator test.
13328

13329
    """
13330
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13331
      ial = IAllocator(self.cfg, self.rpc,
13332
                       mode=self.op.mode,
13333
                       name=self.op.name,
13334
                       memory=self.op.memory,
13335
                       disks=self.op.disks,
13336
                       disk_template=self.op.disk_template,
13337
                       os=self.op.os,
13338
                       tags=self.op.tags,
13339
                       nics=self.op.nics,
13340
                       vcpus=self.op.vcpus,
13341
                       hypervisor=self.op.hypervisor,
13342
                       )
13343
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13344
      ial = IAllocator(self.cfg, self.rpc,
13345
                       mode=self.op.mode,
13346
                       name=self.op.name,
13347
                       relocate_from=list(self.relocate_from),
13348
                       )
13349
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13350
      ial = IAllocator(self.cfg, self.rpc,
13351
                       mode=self.op.mode,
13352
                       evac_nodes=self.op.evac_nodes)
13353
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13354
      ial = IAllocator(self.cfg, self.rpc,
13355
                       mode=self.op.mode,
13356
                       instances=self.op.instances,
13357
                       target_groups=self.op.target_groups)
13358
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13359
      ial = IAllocator(self.cfg, self.rpc,
13360
                       mode=self.op.mode,
13361
                       instances=self.op.instances,
13362
                       evac_mode=self.op.evac_mode)
13363
    else:
13364
      raise errors.ProgrammerError("Uncatched mode %s in"
13365
                                   " LUTestAllocator.Exec", self.op.mode)
13366

    
13367
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13368
      result = ial.in_text
13369
    else:
13370
      ial.Run(self.op.allocator, validate=False)
13371
      result = ial.out_text
13372
    return result
13373

    
13374

    
13375
#: Query type implementations
13376
_QUERY_IMPL = {
13377
  constants.QR_INSTANCE: _InstanceQuery,
13378
  constants.QR_NODE: _NodeQuery,
13379
  constants.QR_GROUP: _GroupQuery,
13380
  constants.QR_OS: _OsQuery,
13381
  }
13382

    
13383
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13384

    
13385

    
13386
def _GetQueryImplementation(name):
13387
  """Returns the implemtnation for a query type.
13388

13389
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13390

13391
  """
13392
  try:
13393
    return _QUERY_IMPL[name]
13394
  except KeyError:
13395
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13396
                               errors.ECODE_INVAL)