Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ fcad7225

History | View | Annotate | Download (471.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43
import operator
44

    
45
from ganeti import ssh
46
from ganeti import utils
47
from ganeti import errors
48
from ganeti import hypervisor
49
from ganeti import locking
50
from ganeti import constants
51
from ganeti import objects
52
from ganeti import serializer
53
from ganeti import ssconf
54
from ganeti import uidpool
55
from ganeti import compat
56
from ganeti import masterd
57
from ganeti import netutils
58
from ganeti import query
59
from ganeti import qlang
60
from ganeti import opcodes
61
from ganeti import ht
62

    
63
import ganeti.masterd.instance # pylint: disable-msg=W0611
64

    
65

    
66
class ResultWithJobs:
67
  """Data container for LU results with jobs.
68

69
  Instances of this class returned from L{LogicalUnit.Exec} will be recognized
70
  by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
71
  contained in the C{jobs} attribute and include the job IDs in the opcode
72
  result.
73

74
  """
75
  def __init__(self, jobs, **kwargs):
76
    """Initializes this class.
77

78
    Additional return values can be specified as keyword arguments.
79

80
    @type jobs: list of lists of L{opcode.OpCode}
81
    @param jobs: A list of lists of opcode objects
82

83
    """
84
    self.jobs = jobs
85
    self.other = kwargs
86

    
87

    
88
class LogicalUnit(object):
89
  """Logical Unit base class.
90

91
  Subclasses must follow these rules:
92
    - implement ExpandNames
93
    - implement CheckPrereq (except when tasklets are used)
94
    - implement Exec (except when tasklets are used)
95
    - implement BuildHooksEnv
96
    - implement BuildHooksNodes
97
    - redefine HPATH and HTYPE
98
    - optionally redefine their run requirements:
99
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
100

101
  Note that all commands require root permissions.
102

103
  @ivar dry_run_result: the value (if any) that will be returned to the caller
104
      in dry-run mode (signalled by opcode dry_run parameter)
105

106
  """
107
  HPATH = None
108
  HTYPE = None
109
  REQ_BGL = True
110

    
111
  def __init__(self, processor, op, context, rpc):
112
    """Constructor for LogicalUnit.
113

114
    This needs to be overridden in derived classes in order to check op
115
    validity.
116

117
    """
118
    self.proc = processor
119
    self.op = op
120
    self.cfg = context.cfg
121
    self.glm = context.glm
122
    # readability alias
123
    self.owned_locks = context.glm.list_owned
124
    self.context = context
125
    self.rpc = rpc
126
    # Dicts used to declare locking needs to mcpu
127
    self.needed_locks = None
128
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
129
    self.add_locks = {}
130
    self.remove_locks = {}
131
    # Used to force good behavior when calling helper functions
132
    self.recalculate_locks = {}
133
    # logging
134
    self.Log = processor.Log # pylint: disable-msg=C0103
135
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
136
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
137
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
138
    # support for dry-run
139
    self.dry_run_result = None
140
    # support for generic debug attribute
141
    if (not hasattr(self.op, "debug_level") or
142
        not isinstance(self.op.debug_level, int)):
143
      self.op.debug_level = 0
144

    
145
    # Tasklets
146
    self.tasklets = None
147

    
148
    # Validate opcode parameters and set defaults
149
    self.op.Validate(True)
150

    
151
    self.CheckArguments()
152

    
153
  def CheckArguments(self):
154
    """Check syntactic validity for the opcode arguments.
155

156
    This method is for doing a simple syntactic check and ensure
157
    validity of opcode parameters, without any cluster-related
158
    checks. While the same can be accomplished in ExpandNames and/or
159
    CheckPrereq, doing these separate is better because:
160

161
      - ExpandNames is left as as purely a lock-related function
162
      - CheckPrereq is run after we have acquired locks (and possible
163
        waited for them)
164

165
    The function is allowed to change the self.op attribute so that
166
    later methods can no longer worry about missing parameters.
167

168
    """
169
    pass
170

    
171
  def ExpandNames(self):
172
    """Expand names for this LU.
173

174
    This method is called before starting to execute the opcode, and it should
175
    update all the parameters of the opcode to their canonical form (e.g. a
176
    short node name must be fully expanded after this method has successfully
177
    completed). This way locking, hooks, logging, etc. can work correctly.
178

179
    LUs which implement this method must also populate the self.needed_locks
180
    member, as a dict with lock levels as keys, and a list of needed lock names
181
    as values. Rules:
182

183
      - use an empty dict if you don't need any lock
184
      - if you don't need any lock at a particular level omit that level
185
      - don't put anything for the BGL level
186
      - if you want all locks at a level use locking.ALL_SET as a value
187

188
    If you need to share locks (rather than acquire them exclusively) at one
189
    level you can modify self.share_locks, setting a true value (usually 1) for
190
    that level. By default locks are not shared.
191

192
    This function can also define a list of tasklets, which then will be
193
    executed in order instead of the usual LU-level CheckPrereq and Exec
194
    functions, if those are not defined by the LU.
195

196
    Examples::
197

198
      # Acquire all nodes and one instance
199
      self.needed_locks = {
200
        locking.LEVEL_NODE: locking.ALL_SET,
201
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
202
      }
203
      # Acquire just two nodes
204
      self.needed_locks = {
205
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
206
      }
207
      # Acquire no locks
208
      self.needed_locks = {} # No, you can't leave it to the default value None
209

210
    """
211
    # The implementation of this method is mandatory only if the new LU is
212
    # concurrent, so that old LUs don't need to be changed all at the same
213
    # time.
214
    if self.REQ_BGL:
215
      self.needed_locks = {} # Exclusive LUs don't need locks.
216
    else:
217
      raise NotImplementedError
218

    
219
  def DeclareLocks(self, level):
220
    """Declare LU locking needs for a level
221

222
    While most LUs can just declare their locking needs at ExpandNames time,
223
    sometimes there's the need to calculate some locks after having acquired
224
    the ones before. This function is called just before acquiring locks at a
225
    particular level, but after acquiring the ones at lower levels, and permits
226
    such calculations. It can be used to modify self.needed_locks, and by
227
    default it does nothing.
228

229
    This function is only called if you have something already set in
230
    self.needed_locks for the level.
231

232
    @param level: Locking level which is going to be locked
233
    @type level: member of ganeti.locking.LEVELS
234

235
    """
236

    
237
  def CheckPrereq(self):
238
    """Check prerequisites for this LU.
239

240
    This method should check that the prerequisites for the execution
241
    of this LU are fulfilled. It can do internode communication, but
242
    it should be idempotent - no cluster or system changes are
243
    allowed.
244

245
    The method should raise errors.OpPrereqError in case something is
246
    not fulfilled. Its return value is ignored.
247

248
    This method should also update all the parameters of the opcode to
249
    their canonical form if it hasn't been done by ExpandNames before.
250

251
    """
252
    if self.tasklets is not None:
253
      for (idx, tl) in enumerate(self.tasklets):
254
        logging.debug("Checking prerequisites for tasklet %s/%s",
255
                      idx + 1, len(self.tasklets))
256
        tl.CheckPrereq()
257
    else:
258
      pass
259

    
260
  def Exec(self, feedback_fn):
261
    """Execute the LU.
262

263
    This method should implement the actual work. It should raise
264
    errors.OpExecError for failures that are somewhat dealt with in
265
    code, or expected.
266

267
    """
268
    if self.tasklets is not None:
269
      for (idx, tl) in enumerate(self.tasklets):
270
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
271
        tl.Exec(feedback_fn)
272
    else:
273
      raise NotImplementedError
274

    
275
  def BuildHooksEnv(self):
276
    """Build hooks environment for this LU.
277

278
    @rtype: dict
279
    @return: Dictionary containing the environment that will be used for
280
      running the hooks for this LU. The keys of the dict must not be prefixed
281
      with "GANETI_"--that'll be added by the hooks runner. The hooks runner
282
      will extend the environment with additional variables. If no environment
283
      should be defined, an empty dictionary should be returned (not C{None}).
284
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
285
      will not be called.
286

287
    """
288
    raise NotImplementedError
289

    
290
  def BuildHooksNodes(self):
291
    """Build list of nodes to run LU's hooks.
292

293
    @rtype: tuple; (list, list)
294
    @return: Tuple containing a list of node names on which the hook
295
      should run before the execution and a list of node names on which the
296
      hook should run after the execution. No nodes should be returned as an
297
      empty list (and not None).
298
    @note: If the C{HPATH} attribute of the LU class is C{None}, this function
299
      will not be called.
300

301
    """
302
    raise NotImplementedError
303

    
304
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
305
    """Notify the LU about the results of its hooks.
306

307
    This method is called every time a hooks phase is executed, and notifies
308
    the Logical Unit about the hooks' result. The LU can then use it to alter
309
    its result based on the hooks.  By default the method does nothing and the
310
    previous result is passed back unchanged but any LU can define it if it
311
    wants to use the local cluster hook-scripts somehow.
312

313
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
314
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
315
    @param hook_results: the results of the multi-node hooks rpc call
316
    @param feedback_fn: function used send feedback back to the caller
317
    @param lu_result: the previous Exec result this LU had, or None
318
        in the PRE phase
319
    @return: the new Exec result, based on the previous result
320
        and hook results
321

322
    """
323
    # API must be kept, thus we ignore the unused argument and could
324
    # be a function warnings
325
    # pylint: disable-msg=W0613,R0201
326
    return lu_result
327

    
328
  def _ExpandAndLockInstance(self):
329
    """Helper function to expand and lock an instance.
330

331
    Many LUs that work on an instance take its name in self.op.instance_name
332
    and need to expand it and then declare the expanded name for locking. This
333
    function does it, and then updates self.op.instance_name to the expanded
334
    name. It also initializes needed_locks as a dict, if this hasn't been done
335
    before.
336

337
    """
338
    if self.needed_locks is None:
339
      self.needed_locks = {}
340
    else:
341
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
342
        "_ExpandAndLockInstance called with instance-level locks set"
343
    self.op.instance_name = _ExpandInstanceName(self.cfg,
344
                                                self.op.instance_name)
345
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
346

    
347
  def _LockInstancesNodes(self, primary_only=False):
348
    """Helper function to declare instances' nodes for locking.
349

350
    This function should be called after locking one or more instances to lock
351
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
352
    with all primary or secondary nodes for instances already locked and
353
    present in self.needed_locks[locking.LEVEL_INSTANCE].
354

355
    It should be called from DeclareLocks, and for safety only works if
356
    self.recalculate_locks[locking.LEVEL_NODE] is set.
357

358
    In the future it may grow parameters to just lock some instance's nodes, or
359
    to just lock primaries or secondary nodes, if needed.
360

361
    If should be called in DeclareLocks in a way similar to::
362

363
      if level == locking.LEVEL_NODE:
364
        self._LockInstancesNodes()
365

366
    @type primary_only: boolean
367
    @param primary_only: only lock primary nodes of locked instances
368

369
    """
370
    assert locking.LEVEL_NODE in self.recalculate_locks, \
371
      "_LockInstancesNodes helper function called with no nodes to recalculate"
372

    
373
    # TODO: check if we're really been called with the instance locks held
374

    
375
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
376
    # future we might want to have different behaviors depending on the value
377
    # of self.recalculate_locks[locking.LEVEL_NODE]
378
    wanted_nodes = []
379
    locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
380
    for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
381
      wanted_nodes.append(instance.primary_node)
382
      if not primary_only:
383
        wanted_nodes.extend(instance.secondary_nodes)
384

    
385
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
386
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
387
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
388
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
389

    
390
    del self.recalculate_locks[locking.LEVEL_NODE]
391

    
392

    
393
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
394
  """Simple LU which runs no hooks.
395

396
  This LU is intended as a parent for other LogicalUnits which will
397
  run no hooks, in order to reduce duplicate code.
398

399
  """
400
  HPATH = None
401
  HTYPE = None
402

    
403
  def BuildHooksEnv(self):
404
    """Empty BuildHooksEnv for NoHooksLu.
405

406
    This just raises an error.
407

408
    """
409
    raise AssertionError("BuildHooksEnv called for NoHooksLUs")
410

    
411
  def BuildHooksNodes(self):
412
    """Empty BuildHooksNodes for NoHooksLU.
413

414
    """
415
    raise AssertionError("BuildHooksNodes called for NoHooksLU")
416

    
417

    
418
class Tasklet:
419
  """Tasklet base class.
420

421
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
422
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
423
  tasklets know nothing about locks.
424

425
  Subclasses must follow these rules:
426
    - Implement CheckPrereq
427
    - Implement Exec
428

429
  """
430
  def __init__(self, lu):
431
    self.lu = lu
432

    
433
    # Shortcuts
434
    self.cfg = lu.cfg
435
    self.rpc = lu.rpc
436

    
437
  def CheckPrereq(self):
438
    """Check prerequisites for this tasklets.
439

440
    This method should check whether the prerequisites for the execution of
441
    this tasklet are fulfilled. It can do internode communication, but it
442
    should be idempotent - no cluster or system changes are allowed.
443

444
    The method should raise errors.OpPrereqError in case something is not
445
    fulfilled. Its return value is ignored.
446

447
    This method should also update all parameters to their canonical form if it
448
    hasn't been done before.
449

450
    """
451
    pass
452

    
453
  def Exec(self, feedback_fn):
454
    """Execute the tasklet.
455

456
    This method should implement the actual work. It should raise
457
    errors.OpExecError for failures that are somewhat dealt with in code, or
458
    expected.
459

460
    """
461
    raise NotImplementedError
462

    
463

    
464
class _QueryBase:
465
  """Base for query utility classes.
466

467
  """
468
  #: Attribute holding field definitions
469
  FIELDS = None
470

    
471
  def __init__(self, filter_, fields, use_locking):
472
    """Initializes this class.
473

474
    """
475
    self.use_locking = use_locking
476

    
477
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
478
                             namefield="name")
479
    self.requested_data = self.query.RequestedData()
480
    self.names = self.query.RequestedNames()
481

    
482
    # Sort only if no names were requested
483
    self.sort_by_name = not self.names
484

    
485
    self.do_locking = None
486
    self.wanted = None
487

    
488
  def _GetNames(self, lu, all_names, lock_level):
489
    """Helper function to determine names asked for in the query.
490

491
    """
492
    if self.do_locking:
493
      names = lu.owned_locks(lock_level)
494
    else:
495
      names = all_names
496

    
497
    if self.wanted == locking.ALL_SET:
498
      assert not self.names
499
      # caller didn't specify names, so ordering is not important
500
      return utils.NiceSort(names)
501

    
502
    # caller specified names and we must keep the same order
503
    assert self.names
504
    assert not self.do_locking or lu.glm.is_owned(lock_level)
505

    
506
    missing = set(self.wanted).difference(names)
507
    if missing:
508
      raise errors.OpExecError("Some items were removed before retrieving"
509
                               " their data: %s" % missing)
510

    
511
    # Return expanded names
512
    return self.wanted
513

    
514
  def ExpandNames(self, lu):
515
    """Expand names for this query.
516

517
    See L{LogicalUnit.ExpandNames}.
518

519
    """
520
    raise NotImplementedError()
521

    
522
  def DeclareLocks(self, lu, level):
523
    """Declare locks for this query.
524

525
    See L{LogicalUnit.DeclareLocks}.
526

527
    """
528
    raise NotImplementedError()
529

    
530
  def _GetQueryData(self, lu):
531
    """Collects all data for this query.
532

533
    @return: Query data object
534

535
    """
536
    raise NotImplementedError()
537

    
538
  def NewStyleQuery(self, lu):
539
    """Collect data and execute query.
540

541
    """
542
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
543
                                  sort_by_name=self.sort_by_name)
544

    
545
  def OldStyleQuery(self, lu):
546
    """Collect data and execute query.
547

548
    """
549
    return self.query.OldStyleQuery(self._GetQueryData(lu),
550
                                    sort_by_name=self.sort_by_name)
551

    
552

    
553
def _ShareAll():
554
  """Returns a dict declaring all lock levels shared.
555

556
  """
557
  return dict.fromkeys(locking.LEVELS, 1)
558

    
559

    
560
def _CheckInstanceNodeGroups(cfg, instance_name, owned_groups):
561
  """Checks if the owned node groups are still correct for an instance.
562

563
  @type cfg: L{config.ConfigWriter}
564
  @param cfg: The cluster configuration
565
  @type instance_name: string
566
  @param instance_name: Instance name
567
  @type owned_groups: set or frozenset
568
  @param owned_groups: List of currently owned node groups
569

570
  """
571
  inst_groups = cfg.GetInstanceNodeGroups(instance_name)
572

    
573
  if not owned_groups.issuperset(inst_groups):
574
    raise errors.OpPrereqError("Instance %s's node groups changed since"
575
                               " locks were acquired, current groups are"
576
                               " are '%s', owning groups '%s'; retry the"
577
                               " operation" %
578
                               (instance_name,
579
                                utils.CommaJoin(inst_groups),
580
                                utils.CommaJoin(owned_groups)),
581
                               errors.ECODE_STATE)
582

    
583
  return inst_groups
584

    
585

    
586
def _CheckNodeGroupInstances(cfg, group_uuid, owned_instances):
587
  """Checks if the instances in a node group are still correct.
588

589
  @type cfg: L{config.ConfigWriter}
590
  @param cfg: The cluster configuration
591
  @type group_uuid: string
592
  @param group_uuid: Node group UUID
593
  @type owned_instances: set or frozenset
594
  @param owned_instances: List of currently owned instances
595

596
  """
597
  wanted_instances = cfg.GetNodeGroupInstances(group_uuid)
598
  if owned_instances != wanted_instances:
599
    raise errors.OpPrereqError("Instances in node group '%s' changed since"
600
                               " locks were acquired, wanted '%s', have '%s';"
601
                               " retry the operation" %
602
                               (group_uuid,
603
                                utils.CommaJoin(wanted_instances),
604
                                utils.CommaJoin(owned_instances)),
605
                               errors.ECODE_STATE)
606

    
607
  return wanted_instances
608

    
609

    
610
def _SupportsOob(cfg, node):
611
  """Tells if node supports OOB.
612

613
  @type cfg: L{config.ConfigWriter}
614
  @param cfg: The cluster configuration
615
  @type node: L{objects.Node}
616
  @param node: The node
617
  @return: The OOB script if supported or an empty string otherwise
618

619
  """
620
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
621

    
622

    
623
def _GetWantedNodes(lu, nodes):
624
  """Returns list of checked and expanded node names.
625

626
  @type lu: L{LogicalUnit}
627
  @param lu: the logical unit on whose behalf we execute
628
  @type nodes: list
629
  @param nodes: list of node names or None for all nodes
630
  @rtype: list
631
  @return: the list of nodes, sorted
632
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
633

634
  """
635
  if nodes:
636
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
637

    
638
  return utils.NiceSort(lu.cfg.GetNodeList())
639

    
640

    
641
def _GetWantedInstances(lu, instances):
642
  """Returns list of checked and expanded instance names.
643

644
  @type lu: L{LogicalUnit}
645
  @param lu: the logical unit on whose behalf we execute
646
  @type instances: list
647
  @param instances: list of instance names or None for all instances
648
  @rtype: list
649
  @return: the list of instances, sorted
650
  @raise errors.OpPrereqError: if the instances parameter is wrong type
651
  @raise errors.OpPrereqError: if any of the passed instances is not found
652

653
  """
654
  if instances:
655
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
656
  else:
657
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
658
  return wanted
659

    
660

    
661
def _GetUpdatedParams(old_params, update_dict,
662
                      use_default=True, use_none=False):
663
  """Return the new version of a parameter dictionary.
664

665
  @type old_params: dict
666
  @param old_params: old parameters
667
  @type update_dict: dict
668
  @param update_dict: dict containing new parameter values, or
669
      constants.VALUE_DEFAULT to reset the parameter to its default
670
      value
671
  @param use_default: boolean
672
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
673
      values as 'to be deleted' values
674
  @param use_none: boolean
675
  @type use_none: whether to recognise C{None} values as 'to be
676
      deleted' values
677
  @rtype: dict
678
  @return: the new parameter dictionary
679

680
  """
681
  params_copy = copy.deepcopy(old_params)
682
  for key, val in update_dict.iteritems():
683
    if ((use_default and val == constants.VALUE_DEFAULT) or
684
        (use_none and val is None)):
685
      try:
686
        del params_copy[key]
687
      except KeyError:
688
        pass
689
    else:
690
      params_copy[key] = val
691
  return params_copy
692

    
693

    
694
def _ReleaseLocks(lu, level, names=None, keep=None):
695
  """Releases locks owned by an LU.
696

697
  @type lu: L{LogicalUnit}
698
  @param level: Lock level
699
  @type names: list or None
700
  @param names: Names of locks to release
701
  @type keep: list or None
702
  @param keep: Names of locks to retain
703

704
  """
705
  assert not (keep is not None and names is not None), \
706
         "Only one of the 'names' and the 'keep' parameters can be given"
707

    
708
  if names is not None:
709
    should_release = names.__contains__
710
  elif keep:
711
    should_release = lambda name: name not in keep
712
  else:
713
    should_release = None
714

    
715
  if should_release:
716
    retain = []
717
    release = []
718

    
719
    # Determine which locks to release
720
    for name in lu.owned_locks(level):
721
      if should_release(name):
722
        release.append(name)
723
      else:
724
        retain.append(name)
725

    
726
    assert len(lu.owned_locks(level)) == (len(retain) + len(release))
727

    
728
    # Release just some locks
729
    lu.glm.release(level, names=release)
730

    
731
    assert frozenset(lu.owned_locks(level)) == frozenset(retain)
732
  else:
733
    # Release everything
734
    lu.glm.release(level)
735

    
736
    assert not lu.glm.is_owned(level), "No locks should be owned"
737

    
738

    
739
def _MapInstanceDisksToNodes(instances):
740
  """Creates a map from (node, volume) to instance name.
741

742
  @type instances: list of L{objects.Instance}
743
  @rtype: dict; tuple of (node name, volume name) as key, instance name as value
744

745
  """
746
  return dict(((node, vol), inst.name)
747
              for inst in instances
748
              for (node, vols) in inst.MapLVsByNode().items()
749
              for vol in vols)
750

    
751

    
752
def _RunPostHook(lu, node_name):
753
  """Runs the post-hook for an opcode on a single node.
754

755
  """
756
  hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
757
  try:
758
    hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
759
  except:
760
    # pylint: disable-msg=W0702
761
    lu.LogWarning("Errors occurred running hooks on %s" % node_name)
762

    
763

    
764
def _CheckOutputFields(static, dynamic, selected):
765
  """Checks whether all selected fields are valid.
766

767
  @type static: L{utils.FieldSet}
768
  @param static: static fields set
769
  @type dynamic: L{utils.FieldSet}
770
  @param dynamic: dynamic fields set
771

772
  """
773
  f = utils.FieldSet()
774
  f.Extend(static)
775
  f.Extend(dynamic)
776

    
777
  delta = f.NonMatching(selected)
778
  if delta:
779
    raise errors.OpPrereqError("Unknown output fields selected: %s"
780
                               % ",".join(delta), errors.ECODE_INVAL)
781

    
782

    
783
def _CheckGlobalHvParams(params):
784
  """Validates that given hypervisor params are not global ones.
785

786
  This will ensure that instances don't get customised versions of
787
  global params.
788

789
  """
790
  used_globals = constants.HVC_GLOBALS.intersection(params)
791
  if used_globals:
792
    msg = ("The following hypervisor parameters are global and cannot"
793
           " be customized at instance level, please modify them at"
794
           " cluster level: %s" % utils.CommaJoin(used_globals))
795
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
796

    
797

    
798
def _CheckNodeOnline(lu, node, msg=None):
799
  """Ensure that a given node is online.
800

801
  @param lu: the LU on behalf of which we make the check
802
  @param node: the node to check
803
  @param msg: if passed, should be a message to replace the default one
804
  @raise errors.OpPrereqError: if the node is offline
805

806
  """
807
  if msg is None:
808
    msg = "Can't use offline node"
809
  if lu.cfg.GetNodeInfo(node).offline:
810
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
811

    
812

    
813
def _CheckNodeNotDrained(lu, node):
814
  """Ensure that a given node is not drained.
815

816
  @param lu: the LU on behalf of which we make the check
817
  @param node: the node to check
818
  @raise errors.OpPrereqError: if the node is drained
819

820
  """
821
  if lu.cfg.GetNodeInfo(node).drained:
822
    raise errors.OpPrereqError("Can't use drained node %s" % node,
823
                               errors.ECODE_STATE)
824

    
825

    
826
def _CheckNodeVmCapable(lu, node):
827
  """Ensure that a given node is vm capable.
828

829
  @param lu: the LU on behalf of which we make the check
830
  @param node: the node to check
831
  @raise errors.OpPrereqError: if the node is not vm capable
832

833
  """
834
  if not lu.cfg.GetNodeInfo(node).vm_capable:
835
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
836
                               errors.ECODE_STATE)
837

    
838

    
839
def _CheckNodeHasOS(lu, node, os_name, force_variant):
840
  """Ensure that a node supports a given OS.
841

842
  @param lu: the LU on behalf of which we make the check
843
  @param node: the node to check
844
  @param os_name: the OS to query about
845
  @param force_variant: whether to ignore variant errors
846
  @raise errors.OpPrereqError: if the node is not supporting the OS
847

848
  """
849
  result = lu.rpc.call_os_get(node, os_name)
850
  result.Raise("OS '%s' not in supported OS list for node %s" %
851
               (os_name, node),
852
               prereq=True, ecode=errors.ECODE_INVAL)
853
  if not force_variant:
854
    _CheckOSVariant(result.payload, os_name)
855

    
856

    
857
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
858
  """Ensure that a node has the given secondary ip.
859

860
  @type lu: L{LogicalUnit}
861
  @param lu: the LU on behalf of which we make the check
862
  @type node: string
863
  @param node: the node to check
864
  @type secondary_ip: string
865
  @param secondary_ip: the ip to check
866
  @type prereq: boolean
867
  @param prereq: whether to throw a prerequisite or an execute error
868
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
869
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
870

871
  """
872
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
873
  result.Raise("Failure checking secondary ip on node %s" % node,
874
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
875
  if not result.payload:
876
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
877
           " please fix and re-run this command" % secondary_ip)
878
    if prereq:
879
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
880
    else:
881
      raise errors.OpExecError(msg)
882

    
883

    
884
def _GetClusterDomainSecret():
885
  """Reads the cluster domain secret.
886

887
  """
888
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
889
                               strict=True)
890

    
891

    
892
def _CheckInstanceDown(lu, instance, reason):
893
  """Ensure that an instance is not running."""
894
  if instance.admin_up:
895
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
896
                               (instance.name, reason), errors.ECODE_STATE)
897

    
898
  pnode = instance.primary_node
899
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
900
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
901
              prereq=True, ecode=errors.ECODE_ENVIRON)
902

    
903
  if instance.name in ins_l.payload:
904
    raise errors.OpPrereqError("Instance %s is running, %s" %
905
                               (instance.name, reason), errors.ECODE_STATE)
906

    
907

    
908
def _ExpandItemName(fn, name, kind):
909
  """Expand an item name.
910

911
  @param fn: the function to use for expansion
912
  @param name: requested item name
913
  @param kind: text description ('Node' or 'Instance')
914
  @return: the resolved (full) name
915
  @raise errors.OpPrereqError: if the item is not found
916

917
  """
918
  full_name = fn(name)
919
  if full_name is None:
920
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
921
                               errors.ECODE_NOENT)
922
  return full_name
923

    
924

    
925
def _ExpandNodeName(cfg, name):
926
  """Wrapper over L{_ExpandItemName} for nodes."""
927
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
928

    
929

    
930
def _ExpandInstanceName(cfg, name):
931
  """Wrapper over L{_ExpandItemName} for instance."""
932
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
933

    
934

    
935
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
936
                          memory, vcpus, nics, disk_template, disks,
937
                          bep, hvp, hypervisor_name, tags):
938
  """Builds instance related env variables for hooks
939

940
  This builds the hook environment from individual variables.
941

942
  @type name: string
943
  @param name: the name of the instance
944
  @type primary_node: string
945
  @param primary_node: the name of the instance's primary node
946
  @type secondary_nodes: list
947
  @param secondary_nodes: list of secondary nodes as strings
948
  @type os_type: string
949
  @param os_type: the name of the instance's OS
950
  @type status: boolean
951
  @param status: the should_run status of the instance
952
  @type memory: string
953
  @param memory: the memory size of the instance
954
  @type vcpus: string
955
  @param vcpus: the count of VCPUs the instance has
956
  @type nics: list
957
  @param nics: list of tuples (ip, mac, mode, link) representing
958
      the NICs the instance has
959
  @type disk_template: string
960
  @param disk_template: the disk template of the instance
961
  @type disks: list
962
  @param disks: the list of (size, mode) pairs
963
  @type bep: dict
964
  @param bep: the backend parameters for the instance
965
  @type hvp: dict
966
  @param hvp: the hypervisor parameters for the instance
967
  @type hypervisor_name: string
968
  @param hypervisor_name: the hypervisor for the instance
969
  @type tags: list
970
  @param tags: list of instance tags as strings
971
  @rtype: dict
972
  @return: the hook environment for this instance
973

974
  """
975
  if status:
976
    str_status = "up"
977
  else:
978
    str_status = "down"
979
  env = {
980
    "OP_TARGET": name,
981
    "INSTANCE_NAME": name,
982
    "INSTANCE_PRIMARY": primary_node,
983
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
984
    "INSTANCE_OS_TYPE": os_type,
985
    "INSTANCE_STATUS": str_status,
986
    "INSTANCE_MEMORY": memory,
987
    "INSTANCE_VCPUS": vcpus,
988
    "INSTANCE_DISK_TEMPLATE": disk_template,
989
    "INSTANCE_HYPERVISOR": hypervisor_name,
990
  }
991

    
992
  if nics:
993
    nic_count = len(nics)
994
    for idx, (ip, mac, mode, link) in enumerate(nics):
995
      if ip is None:
996
        ip = ""
997
      env["INSTANCE_NIC%d_IP" % idx] = ip
998
      env["INSTANCE_NIC%d_MAC" % idx] = mac
999
      env["INSTANCE_NIC%d_MODE" % idx] = mode
1000
      env["INSTANCE_NIC%d_LINK" % idx] = link
1001
      if mode == constants.NIC_MODE_BRIDGED:
1002
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
1003
  else:
1004
    nic_count = 0
1005

    
1006
  env["INSTANCE_NIC_COUNT"] = nic_count
1007

    
1008
  if disks:
1009
    disk_count = len(disks)
1010
    for idx, (size, mode) in enumerate(disks):
1011
      env["INSTANCE_DISK%d_SIZE" % idx] = size
1012
      env["INSTANCE_DISK%d_MODE" % idx] = mode
1013
  else:
1014
    disk_count = 0
1015

    
1016
  env["INSTANCE_DISK_COUNT"] = disk_count
1017

    
1018
  if not tags:
1019
    tags = []
1020

    
1021
  env["INSTANCE_TAGS"] = " ".join(tags)
1022

    
1023
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
1024
    for key, value in source.items():
1025
      env["INSTANCE_%s_%s" % (kind, key)] = value
1026

    
1027
  return env
1028

    
1029

    
1030
def _NICListToTuple(lu, nics):
1031
  """Build a list of nic information tuples.
1032

1033
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
1034
  value in LUInstanceQueryData.
1035

1036
  @type lu:  L{LogicalUnit}
1037
  @param lu: the logical unit on whose behalf we execute
1038
  @type nics: list of L{objects.NIC}
1039
  @param nics: list of nics to convert to hooks tuples
1040

1041
  """
1042
  hooks_nics = []
1043
  cluster = lu.cfg.GetClusterInfo()
1044
  for nic in nics:
1045
    ip = nic.ip
1046
    mac = nic.mac
1047
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
1048
    mode = filled_params[constants.NIC_MODE]
1049
    link = filled_params[constants.NIC_LINK]
1050
    hooks_nics.append((ip, mac, mode, link))
1051
  return hooks_nics
1052

    
1053

    
1054
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1055
  """Builds instance related env variables for hooks from an object.
1056

1057
  @type lu: L{LogicalUnit}
1058
  @param lu: the logical unit on whose behalf we execute
1059
  @type instance: L{objects.Instance}
1060
  @param instance: the instance for which we should build the
1061
      environment
1062
  @type override: dict
1063
  @param override: dictionary with key/values that will override
1064
      our values
1065
  @rtype: dict
1066
  @return: the hook environment dictionary
1067

1068
  """
1069
  cluster = lu.cfg.GetClusterInfo()
1070
  bep = cluster.FillBE(instance)
1071
  hvp = cluster.FillHV(instance)
1072
  args = {
1073
    "name": instance.name,
1074
    "primary_node": instance.primary_node,
1075
    "secondary_nodes": instance.secondary_nodes,
1076
    "os_type": instance.os,
1077
    "status": instance.admin_up,
1078
    "memory": bep[constants.BE_MEMORY],
1079
    "vcpus": bep[constants.BE_VCPUS],
1080
    "nics": _NICListToTuple(lu, instance.nics),
1081
    "disk_template": instance.disk_template,
1082
    "disks": [(disk.size, disk.mode) for disk in instance.disks],
1083
    "bep": bep,
1084
    "hvp": hvp,
1085
    "hypervisor_name": instance.hypervisor,
1086
    "tags": instance.tags,
1087
  }
1088
  if override:
1089
    args.update(override)
1090
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1091

    
1092

    
1093
def _AdjustCandidatePool(lu, exceptions):
1094
  """Adjust the candidate pool after node operations.
1095

1096
  """
1097
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1098
  if mod_list:
1099
    lu.LogInfo("Promoted nodes to master candidate role: %s",
1100
               utils.CommaJoin(node.name for node in mod_list))
1101
    for name in mod_list:
1102
      lu.context.ReaddNode(name)
1103
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1104
  if mc_now > mc_max:
1105
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1106
               (mc_now, mc_max))
1107

    
1108

    
1109
def _DecideSelfPromotion(lu, exceptions=None):
1110
  """Decide whether I should promote myself as a master candidate.
1111

1112
  """
1113
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1114
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1115
  # the new node will increase mc_max with one, so:
1116
  mc_should = min(mc_should + 1, cp_size)
1117
  return mc_now < mc_should
1118

    
1119

    
1120
def _CheckNicsBridgesExist(lu, target_nics, target_node):
1121
  """Check that the brigdes needed by a list of nics exist.
1122

1123
  """
1124
  cluster = lu.cfg.GetClusterInfo()
1125
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1126
  brlist = [params[constants.NIC_LINK] for params in paramslist
1127
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1128
  if brlist:
1129
    result = lu.rpc.call_bridges_exist(target_node, brlist)
1130
    result.Raise("Error checking bridges on destination node '%s'" %
1131
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1132

    
1133

    
1134
def _CheckInstanceBridgesExist(lu, instance, node=None):
1135
  """Check that the brigdes needed by an instance exist.
1136

1137
  """
1138
  if node is None:
1139
    node = instance.primary_node
1140
  _CheckNicsBridgesExist(lu, instance.nics, node)
1141

    
1142

    
1143
def _CheckOSVariant(os_obj, name):
1144
  """Check whether an OS name conforms to the os variants specification.
1145

1146
  @type os_obj: L{objects.OS}
1147
  @param os_obj: OS object to check
1148
  @type name: string
1149
  @param name: OS name passed by the user, to check for validity
1150

1151
  """
1152
  variant = objects.OS.GetVariant(name)
1153
  if not os_obj.supported_variants:
1154
    if variant:
1155
      raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1156
                                 " passed)" % (os_obj.name, variant),
1157
                                 errors.ECODE_INVAL)
1158
    return
1159
  if not variant:
1160
    raise errors.OpPrereqError("OS name must include a variant",
1161
                               errors.ECODE_INVAL)
1162

    
1163
  if variant not in os_obj.supported_variants:
1164
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1165

    
1166

    
1167
def _GetNodeInstancesInner(cfg, fn):
1168
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1169

    
1170

    
1171
def _GetNodeInstances(cfg, node_name):
1172
  """Returns a list of all primary and secondary instances on a node.
1173

1174
  """
1175

    
1176
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1177

    
1178

    
1179
def _GetNodePrimaryInstances(cfg, node_name):
1180
  """Returns primary instances on a node.
1181

1182
  """
1183
  return _GetNodeInstancesInner(cfg,
1184
                                lambda inst: node_name == inst.primary_node)
1185

    
1186

    
1187
def _GetNodeSecondaryInstances(cfg, node_name):
1188
  """Returns secondary instances on a node.
1189

1190
  """
1191
  return _GetNodeInstancesInner(cfg,
1192
                                lambda inst: node_name in inst.secondary_nodes)
1193

    
1194

    
1195
def _GetStorageTypeArgs(cfg, storage_type):
1196
  """Returns the arguments for a storage type.
1197

1198
  """
1199
  # Special case for file storage
1200
  if storage_type == constants.ST_FILE:
1201
    # storage.FileStorage wants a list of storage directories
1202
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1203

    
1204
  return []
1205

    
1206

    
1207
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1208
  faulty = []
1209

    
1210
  for dev in instance.disks:
1211
    cfg.SetDiskID(dev, node_name)
1212

    
1213
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1214
  result.Raise("Failed to get disk status from node %s" % node_name,
1215
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1216

    
1217
  for idx, bdev_status in enumerate(result.payload):
1218
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1219
      faulty.append(idx)
1220

    
1221
  return faulty
1222

    
1223

    
1224
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1225
  """Check the sanity of iallocator and node arguments and use the
1226
  cluster-wide iallocator if appropriate.
1227

1228
  Check that at most one of (iallocator, node) is specified. If none is
1229
  specified, then the LU's opcode's iallocator slot is filled with the
1230
  cluster-wide default iallocator.
1231

1232
  @type iallocator_slot: string
1233
  @param iallocator_slot: the name of the opcode iallocator slot
1234
  @type node_slot: string
1235
  @param node_slot: the name of the opcode target node slot
1236

1237
  """
1238
  node = getattr(lu.op, node_slot, None)
1239
  iallocator = getattr(lu.op, iallocator_slot, None)
1240

    
1241
  if node is not None and iallocator is not None:
1242
    raise errors.OpPrereqError("Do not specify both, iallocator and node",
1243
                               errors.ECODE_INVAL)
1244
  elif node is None and iallocator is None:
1245
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1246
    if default_iallocator:
1247
      setattr(lu.op, iallocator_slot, default_iallocator)
1248
    else:
1249
      raise errors.OpPrereqError("No iallocator or node given and no"
1250
                                 " cluster-wide default iallocator found;"
1251
                                 " please specify either an iallocator or a"
1252
                                 " node, or set a cluster-wide default"
1253
                                 " iallocator")
1254

    
1255

    
1256
def _GetDefaultIAllocator(cfg, iallocator):
1257
  """Decides on which iallocator to use.
1258

1259
  @type cfg: L{config.ConfigWriter}
1260
  @param cfg: Cluster configuration object
1261
  @type iallocator: string or None
1262
  @param iallocator: Iallocator specified in opcode
1263
  @rtype: string
1264
  @return: Iallocator name
1265

1266
  """
1267
  if not iallocator:
1268
    # Use default iallocator
1269
    iallocator = cfg.GetDefaultIAllocator()
1270

    
1271
  if not iallocator:
1272
    raise errors.OpPrereqError("No iallocator was specified, neither in the"
1273
                               " opcode nor as a cluster-wide default",
1274
                               errors.ECODE_INVAL)
1275

    
1276
  return iallocator
1277

    
1278

    
1279
class LUClusterPostInit(LogicalUnit):
1280
  """Logical unit for running hooks after cluster initialization.
1281

1282
  """
1283
  HPATH = "cluster-init"
1284
  HTYPE = constants.HTYPE_CLUSTER
1285

    
1286
  def BuildHooksEnv(self):
1287
    """Build hooks env.
1288

1289
    """
1290
    return {
1291
      "OP_TARGET": self.cfg.GetClusterName(),
1292
      }
1293

    
1294
  def BuildHooksNodes(self):
1295
    """Build hooks nodes.
1296

1297
    """
1298
    return ([], [self.cfg.GetMasterNode()])
1299

    
1300
  def Exec(self, feedback_fn):
1301
    """Nothing to do.
1302

1303
    """
1304
    return True
1305

    
1306

    
1307
class LUClusterDestroy(LogicalUnit):
1308
  """Logical unit for destroying the cluster.
1309

1310
  """
1311
  HPATH = "cluster-destroy"
1312
  HTYPE = constants.HTYPE_CLUSTER
1313

    
1314
  def BuildHooksEnv(self):
1315
    """Build hooks env.
1316

1317
    """
1318
    return {
1319
      "OP_TARGET": self.cfg.GetClusterName(),
1320
      }
1321

    
1322
  def BuildHooksNodes(self):
1323
    """Build hooks nodes.
1324

1325
    """
1326
    return ([], [])
1327

    
1328
  def CheckPrereq(self):
1329
    """Check prerequisites.
1330

1331
    This checks whether the cluster is empty.
1332

1333
    Any errors are signaled by raising errors.OpPrereqError.
1334

1335
    """
1336
    master = self.cfg.GetMasterNode()
1337

    
1338
    nodelist = self.cfg.GetNodeList()
1339
    if len(nodelist) != 1 or nodelist[0] != master:
1340
      raise errors.OpPrereqError("There are still %d node(s) in"
1341
                                 " this cluster." % (len(nodelist) - 1),
1342
                                 errors.ECODE_INVAL)
1343
    instancelist = self.cfg.GetInstanceList()
1344
    if instancelist:
1345
      raise errors.OpPrereqError("There are still %d instance(s) in"
1346
                                 " this cluster." % len(instancelist),
1347
                                 errors.ECODE_INVAL)
1348

    
1349
  def Exec(self, feedback_fn):
1350
    """Destroys the cluster.
1351

1352
    """
1353
    master = self.cfg.GetMasterNode()
1354

    
1355
    # Run post hooks on master node before it's removed
1356
    _RunPostHook(self, master)
1357

    
1358
    result = self.rpc.call_node_stop_master(master, False)
1359
    result.Raise("Could not disable the master role")
1360

    
1361
    return master
1362

    
1363

    
1364
def _VerifyCertificate(filename):
1365
  """Verifies a certificate for L{LUClusterVerifyConfig}.
1366

1367
  @type filename: string
1368
  @param filename: Path to PEM file
1369

1370
  """
1371
  try:
1372
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1373
                                           utils.ReadFile(filename))
1374
  except Exception, err: # pylint: disable-msg=W0703
1375
    return (LUClusterVerifyConfig.ETYPE_ERROR,
1376
            "Failed to load X509 certificate %s: %s" % (filename, err))
1377

    
1378
  (errcode, msg) = \
1379
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1380
                                constants.SSL_CERT_EXPIRATION_ERROR)
1381

    
1382
  if msg:
1383
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1384
  else:
1385
    fnamemsg = None
1386

    
1387
  if errcode is None:
1388
    return (None, fnamemsg)
1389
  elif errcode == utils.CERT_WARNING:
1390
    return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1391
  elif errcode == utils.CERT_ERROR:
1392
    return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1393

    
1394
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1395

    
1396

    
1397
def _GetAllHypervisorParameters(cluster, instances):
1398
  """Compute the set of all hypervisor parameters.
1399

1400
  @type cluster: L{objects.Cluster}
1401
  @param cluster: the cluster object
1402
  @param instances: list of L{objects.Instance}
1403
  @param instances: additional instances from which to obtain parameters
1404
  @rtype: list of (origin, hypervisor, parameters)
1405
  @return: a list with all parameters found, indicating the hypervisor they
1406
       apply to, and the origin (can be "cluster", "os X", or "instance Y")
1407

1408
  """
1409
  hvp_data = []
1410

    
1411
  for hv_name in cluster.enabled_hypervisors:
1412
    hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1413

    
1414
  for os_name, os_hvp in cluster.os_hvp.items():
1415
    for hv_name, hv_params in os_hvp.items():
1416
      if hv_params:
1417
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1418
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
1419

    
1420
  # TODO: collapse identical parameter values in a single one
1421
  for instance in instances:
1422
    if instance.hvparams:
1423
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1424
                       cluster.FillHV(instance)))
1425

    
1426
  return hvp_data
1427

    
1428

    
1429
class _VerifyErrors(object):
1430
  """Mix-in for cluster/group verify LUs.
1431

1432
  It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1433
  self.op and self._feedback_fn to be available.)
1434

1435
  """
1436
  TCLUSTER = "cluster"
1437
  TNODE = "node"
1438
  TINSTANCE = "instance"
1439

    
1440
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1441
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1442
  ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1443
  ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1444
  ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1445
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1446
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1447
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1448
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1449
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1450
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1451
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1452
  ENODEDRBD = (TNODE, "ENODEDRBD")
1453
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1454
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1455
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1456
  ENODEHV = (TNODE, "ENODEHV")
1457
  ENODELVM = (TNODE, "ENODELVM")
1458
  ENODEN1 = (TNODE, "ENODEN1")
1459
  ENODENET = (TNODE, "ENODENET")
1460
  ENODEOS = (TNODE, "ENODEOS")
1461
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1462
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1463
  ENODERPC = (TNODE, "ENODERPC")
1464
  ENODESSH = (TNODE, "ENODESSH")
1465
  ENODEVERSION = (TNODE, "ENODEVERSION")
1466
  ENODESETUP = (TNODE, "ENODESETUP")
1467
  ENODETIME = (TNODE, "ENODETIME")
1468
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1469

    
1470
  ETYPE_FIELD = "code"
1471
  ETYPE_ERROR = "ERROR"
1472
  ETYPE_WARNING = "WARNING"
1473

    
1474
  def _Error(self, ecode, item, msg, *args, **kwargs):
1475
    """Format an error message.
1476

1477
    Based on the opcode's error_codes parameter, either format a
1478
    parseable error code, or a simpler error string.
1479

1480
    This must be called only from Exec and functions called from Exec.
1481

1482
    """
1483
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1484
    itype, etxt = ecode
1485
    # first complete the msg
1486
    if args:
1487
      msg = msg % args
1488
    # then format the whole message
1489
    if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1490
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1491
    else:
1492
      if item:
1493
        item = " " + item
1494
      else:
1495
        item = ""
1496
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1497
    # and finally report it via the feedback_fn
1498
    self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1499

    
1500
  def _ErrorIf(self, cond, *args, **kwargs):
1501
    """Log an error message if the passed condition is True.
1502

1503
    """
1504
    cond = (bool(cond)
1505
            or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1506
    if cond:
1507
      self._Error(*args, **kwargs)
1508
    # do not mark the operation as failed for WARN cases only
1509
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1510
      self.bad = self.bad or cond
1511

    
1512

    
1513
class LUClusterVerify(NoHooksLU):
1514
  """Submits all jobs necessary to verify the cluster.
1515

1516
  """
1517
  REQ_BGL = False
1518

    
1519
  def ExpandNames(self):
1520
    self.needed_locks = {}
1521

    
1522
  def Exec(self, feedback_fn):
1523
    jobs = []
1524

    
1525
    if self.op.group_name:
1526
      groups = [self.op.group_name]
1527
      depends_fn = lambda: None
1528
    else:
1529
      groups = self.cfg.GetNodeGroupList()
1530

    
1531
      # Verify global configuration
1532
      jobs.append([opcodes.OpClusterVerifyConfig()])
1533

    
1534
      # Always depend on global verification
1535
      depends_fn = lambda: [(-len(jobs), [])]
1536

    
1537
    jobs.extend([opcodes.OpClusterVerifyGroup(group_name=group,
1538
                                              depends=depends_fn())]
1539
                for group in groups)
1540

    
1541
    # Fix up all parameters
1542
    for op in itertools.chain(*jobs): # pylint: disable-msg=W0142
1543
      op.debug_simulate_errors = self.op.debug_simulate_errors
1544
      op.verbose = self.op.verbose
1545
      op.error_codes = self.op.error_codes
1546
      try:
1547
        op.skip_checks = self.op.skip_checks
1548
      except AttributeError:
1549
        assert not isinstance(op, opcodes.OpClusterVerifyGroup)
1550

    
1551
    return ResultWithJobs(jobs)
1552

    
1553

    
1554
class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1555
  """Verifies the cluster config.
1556

1557
  """
1558
  REQ_BGL = True
1559

    
1560
  def _VerifyHVP(self, hvp_data):
1561
    """Verifies locally the syntax of the hypervisor parameters.
1562

1563
    """
1564
    for item, hv_name, hv_params in hvp_data:
1565
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1566
             (item, hv_name))
1567
      try:
1568
        hv_class = hypervisor.GetHypervisor(hv_name)
1569
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1570
        hv_class.CheckParameterSyntax(hv_params)
1571
      except errors.GenericError, err:
1572
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1573

    
1574
  def ExpandNames(self):
1575
    # Information can be safely retrieved as the BGL is acquired in exclusive
1576
    # mode
1577
    self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1578
    self.all_node_info = self.cfg.GetAllNodesInfo()
1579
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1580
    self.needed_locks = {}
1581

    
1582
  def Exec(self, feedback_fn):
1583
    """Verify integrity of cluster, performing various test on nodes.
1584

1585
    """
1586
    self.bad = False
1587
    self._feedback_fn = feedback_fn
1588

    
1589
    feedback_fn("* Verifying cluster config")
1590

    
1591
    for msg in self.cfg.VerifyConfig():
1592
      self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1593

    
1594
    feedback_fn("* Verifying cluster certificate files")
1595

    
1596
    for cert_filename in constants.ALL_CERT_FILES:
1597
      (errcode, msg) = _VerifyCertificate(cert_filename)
1598
      self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1599

    
1600
    feedback_fn("* Verifying hypervisor parameters")
1601

    
1602
    self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1603
                                                self.all_inst_info.values()))
1604

    
1605
    feedback_fn("* Verifying all nodes belong to an existing group")
1606

    
1607
    # We do this verification here because, should this bogus circumstance
1608
    # occur, it would never be caught by VerifyGroup, which only acts on
1609
    # nodes/instances reachable from existing node groups.
1610

    
1611
    dangling_nodes = set(node.name for node in self.all_node_info.values()
1612
                         if node.group not in self.all_group_info)
1613

    
1614
    dangling_instances = {}
1615
    no_node_instances = []
1616

    
1617
    for inst in self.all_inst_info.values():
1618
      if inst.primary_node in dangling_nodes:
1619
        dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1620
      elif inst.primary_node not in self.all_node_info:
1621
        no_node_instances.append(inst.name)
1622

    
1623
    pretty_dangling = [
1624
        "%s (%s)" %
1625
        (node.name,
1626
         utils.CommaJoin(dangling_instances.get(node.name,
1627
                                                ["no instances"])))
1628
        for node in dangling_nodes]
1629

    
1630
    self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1631
                  "the following nodes (and their instances) belong to a non"
1632
                  " existing group: %s", utils.CommaJoin(pretty_dangling))
1633

    
1634
    self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1635
                  "the following instances have a non-existing primary-node:"
1636
                  " %s", utils.CommaJoin(no_node_instances))
1637

    
1638
    return (not self.bad, [g.name for g in self.all_group_info.values()])
1639

    
1640

    
1641
class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1642
  """Verifies the status of a node group.
1643

1644
  """
1645
  HPATH = "cluster-verify"
1646
  HTYPE = constants.HTYPE_CLUSTER
1647
  REQ_BGL = False
1648

    
1649
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1650

    
1651
  class NodeImage(object):
1652
    """A class representing the logical and physical status of a node.
1653

1654
    @type name: string
1655
    @ivar name: the node name to which this object refers
1656
    @ivar volumes: a structure as returned from
1657
        L{ganeti.backend.GetVolumeList} (runtime)
1658
    @ivar instances: a list of running instances (runtime)
1659
    @ivar pinst: list of configured primary instances (config)
1660
    @ivar sinst: list of configured secondary instances (config)
1661
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1662
        instances for which this node is secondary (config)
1663
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1664
    @ivar dfree: free disk, as reported by the node (runtime)
1665
    @ivar offline: the offline status (config)
1666
    @type rpc_fail: boolean
1667
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1668
        not whether the individual keys were correct) (runtime)
1669
    @type lvm_fail: boolean
1670
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1671
    @type hyp_fail: boolean
1672
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1673
    @type ghost: boolean
1674
    @ivar ghost: whether this is a known node or not (config)
1675
    @type os_fail: boolean
1676
    @ivar os_fail: whether the RPC call didn't return valid OS data
1677
    @type oslist: list
1678
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1679
    @type vm_capable: boolean
1680
    @ivar vm_capable: whether the node can host instances
1681

1682
    """
1683
    def __init__(self, offline=False, name=None, vm_capable=True):
1684
      self.name = name
1685
      self.volumes = {}
1686
      self.instances = []
1687
      self.pinst = []
1688
      self.sinst = []
1689
      self.sbp = {}
1690
      self.mfree = 0
1691
      self.dfree = 0
1692
      self.offline = offline
1693
      self.vm_capable = vm_capable
1694
      self.rpc_fail = False
1695
      self.lvm_fail = False
1696
      self.hyp_fail = False
1697
      self.ghost = False
1698
      self.os_fail = False
1699
      self.oslist = {}
1700

    
1701
  def ExpandNames(self):
1702
    # This raises errors.OpPrereqError on its own:
1703
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1704

    
1705
    # Get instances in node group; this is unsafe and needs verification later
1706
    inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1707

    
1708
    self.needed_locks = {
1709
      locking.LEVEL_INSTANCE: inst_names,
1710
      locking.LEVEL_NODEGROUP: [self.group_uuid],
1711
      locking.LEVEL_NODE: [],
1712
      }
1713

    
1714
    self.share_locks = _ShareAll()
1715

    
1716
  def DeclareLocks(self, level):
1717
    if level == locking.LEVEL_NODE:
1718
      # Get members of node group; this is unsafe and needs verification later
1719
      nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1720

    
1721
      all_inst_info = self.cfg.GetAllInstancesInfo()
1722

    
1723
      # In Exec(), we warn about mirrored instances that have primary and
1724
      # secondary living in separate node groups. To fully verify that
1725
      # volumes for these instances are healthy, we will need to do an
1726
      # extra call to their secondaries. We ensure here those nodes will
1727
      # be locked.
1728
      for inst in self.owned_locks(locking.LEVEL_INSTANCE):
1729
        # Important: access only the instances whose lock is owned
1730
        if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1731
          nodes.update(all_inst_info[inst].secondary_nodes)
1732

    
1733
      self.needed_locks[locking.LEVEL_NODE] = nodes
1734

    
1735
  def CheckPrereq(self):
1736
    group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1737
    group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1738

    
1739
    unlocked_nodes = \
1740
        group_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1741

    
1742
    unlocked_instances = \
1743
        group_instances.difference(self.owned_locks(locking.LEVEL_INSTANCE))
1744

    
1745
    if unlocked_nodes:
1746
      raise errors.OpPrereqError("Missing lock for nodes: %s" %
1747
                                 utils.CommaJoin(unlocked_nodes))
1748

    
1749
    if unlocked_instances:
1750
      raise errors.OpPrereqError("Missing lock for instances: %s" %
1751
                                 utils.CommaJoin(unlocked_instances))
1752

    
1753
    self.all_node_info = self.cfg.GetAllNodesInfo()
1754
    self.all_inst_info = self.cfg.GetAllInstancesInfo()
1755

    
1756
    self.my_node_names = utils.NiceSort(group_nodes)
1757
    self.my_inst_names = utils.NiceSort(group_instances)
1758

    
1759
    self.my_node_info = dict((name, self.all_node_info[name])
1760
                             for name in self.my_node_names)
1761

    
1762
    self.my_inst_info = dict((name, self.all_inst_info[name])
1763
                             for name in self.my_inst_names)
1764

    
1765
    # We detect here the nodes that will need the extra RPC calls for verifying
1766
    # split LV volumes; they should be locked.
1767
    extra_lv_nodes = set()
1768

    
1769
    for inst in self.my_inst_info.values():
1770
      if inst.disk_template in constants.DTS_INT_MIRROR:
1771
        group = self.my_node_info[inst.primary_node].group
1772
        for nname in inst.secondary_nodes:
1773
          if self.all_node_info[nname].group != group:
1774
            extra_lv_nodes.add(nname)
1775

    
1776
    unlocked_lv_nodes = \
1777
        extra_lv_nodes.difference(self.owned_locks(locking.LEVEL_NODE))
1778

    
1779
    if unlocked_lv_nodes:
1780
      raise errors.OpPrereqError("these nodes could be locked: %s" %
1781
                                 utils.CommaJoin(unlocked_lv_nodes))
1782
    self.extra_lv_nodes = list(extra_lv_nodes)
1783

    
1784
  def _VerifyNode(self, ninfo, nresult):
1785
    """Perform some basic validation on data returned from a node.
1786

1787
      - check the result data structure is well formed and has all the
1788
        mandatory fields
1789
      - check ganeti version
1790

1791
    @type ninfo: L{objects.Node}
1792
    @param ninfo: the node to check
1793
    @param nresult: the results from the node
1794
    @rtype: boolean
1795
    @return: whether overall this call was successful (and we can expect
1796
         reasonable values in the respose)
1797

1798
    """
1799
    node = ninfo.name
1800
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1801

    
1802
    # main result, nresult should be a non-empty dict
1803
    test = not nresult or not isinstance(nresult, dict)
1804
    _ErrorIf(test, self.ENODERPC, node,
1805
                  "unable to verify node: no data returned")
1806
    if test:
1807
      return False
1808

    
1809
    # compares ganeti version
1810
    local_version = constants.PROTOCOL_VERSION
1811
    remote_version = nresult.get("version", None)
1812
    test = not (remote_version and
1813
                isinstance(remote_version, (list, tuple)) and
1814
                len(remote_version) == 2)
1815
    _ErrorIf(test, self.ENODERPC, node,
1816
             "connection to node returned invalid data")
1817
    if test:
1818
      return False
1819

    
1820
    test = local_version != remote_version[0]
1821
    _ErrorIf(test, self.ENODEVERSION, node,
1822
             "incompatible protocol versions: master %s,"
1823
             " node %s", local_version, remote_version[0])
1824
    if test:
1825
      return False
1826

    
1827
    # node seems compatible, we can actually try to look into its results
1828

    
1829
    # full package version
1830
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1831
                  self.ENODEVERSION, node,
1832
                  "software version mismatch: master %s, node %s",
1833
                  constants.RELEASE_VERSION, remote_version[1],
1834
                  code=self.ETYPE_WARNING)
1835

    
1836
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1837
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1838
      for hv_name, hv_result in hyp_result.iteritems():
1839
        test = hv_result is not None
1840
        _ErrorIf(test, self.ENODEHV, node,
1841
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1842

    
1843
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1844
    if ninfo.vm_capable and isinstance(hvp_result, list):
1845
      for item, hv_name, hv_result in hvp_result:
1846
        _ErrorIf(True, self.ENODEHV, node,
1847
                 "hypervisor %s parameter verify failure (source %s): %s",
1848
                 hv_name, item, hv_result)
1849

    
1850
    test = nresult.get(constants.NV_NODESETUP,
1851
                       ["Missing NODESETUP results"])
1852
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1853
             "; ".join(test))
1854

    
1855
    return True
1856

    
1857
  def _VerifyNodeTime(self, ninfo, nresult,
1858
                      nvinfo_starttime, nvinfo_endtime):
1859
    """Check the node time.
1860

1861
    @type ninfo: L{objects.Node}
1862
    @param ninfo: the node to check
1863
    @param nresult: the remote results for the node
1864
    @param nvinfo_starttime: the start time of the RPC call
1865
    @param nvinfo_endtime: the end time of the RPC call
1866

1867
    """
1868
    node = ninfo.name
1869
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1870

    
1871
    ntime = nresult.get(constants.NV_TIME, None)
1872
    try:
1873
      ntime_merged = utils.MergeTime(ntime)
1874
    except (ValueError, TypeError):
1875
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1876
      return
1877

    
1878
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1879
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1880
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1881
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1882
    else:
1883
      ntime_diff = None
1884

    
1885
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1886
             "Node time diverges by at least %s from master node time",
1887
             ntime_diff)
1888

    
1889
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1890
    """Check the node LVM results.
1891

1892
    @type ninfo: L{objects.Node}
1893
    @param ninfo: the node to check
1894
    @param nresult: the remote results for the node
1895
    @param vg_name: the configured VG name
1896

1897
    """
1898
    if vg_name is None:
1899
      return
1900

    
1901
    node = ninfo.name
1902
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1903

    
1904
    # checks vg existence and size > 20G
1905
    vglist = nresult.get(constants.NV_VGLIST, None)
1906
    test = not vglist
1907
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1908
    if not test:
1909
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1910
                                            constants.MIN_VG_SIZE)
1911
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1912

    
1913
    # check pv names
1914
    pvlist = nresult.get(constants.NV_PVLIST, None)
1915
    test = pvlist is None
1916
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1917
    if not test:
1918
      # check that ':' is not present in PV names, since it's a
1919
      # special character for lvcreate (denotes the range of PEs to
1920
      # use on the PV)
1921
      for _, pvname, owner_vg in pvlist:
1922
        test = ":" in pvname
1923
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1924
                 " '%s' of VG '%s'", pvname, owner_vg)
1925

    
1926
  def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1927
    """Check the node bridges.
1928

1929
    @type ninfo: L{objects.Node}
1930
    @param ninfo: the node to check
1931
    @param nresult: the remote results for the node
1932
    @param bridges: the expected list of bridges
1933

1934
    """
1935
    if not bridges:
1936
      return
1937

    
1938
    node = ninfo.name
1939
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1940

    
1941
    missing = nresult.get(constants.NV_BRIDGES, None)
1942
    test = not isinstance(missing, list)
1943
    _ErrorIf(test, self.ENODENET, node,
1944
             "did not return valid bridge information")
1945
    if not test:
1946
      _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1947
               utils.CommaJoin(sorted(missing)))
1948

    
1949
  def _VerifyNodeNetwork(self, ninfo, nresult):
1950
    """Check the node network connectivity results.
1951

1952
    @type ninfo: L{objects.Node}
1953
    @param ninfo: the node to check
1954
    @param nresult: the remote results for the node
1955

1956
    """
1957
    node = ninfo.name
1958
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1959

    
1960
    test = constants.NV_NODELIST not in nresult
1961
    _ErrorIf(test, self.ENODESSH, node,
1962
             "node hasn't returned node ssh connectivity data")
1963
    if not test:
1964
      if nresult[constants.NV_NODELIST]:
1965
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1966
          _ErrorIf(True, self.ENODESSH, node,
1967
                   "ssh communication with node '%s': %s", a_node, a_msg)
1968

    
1969
    test = constants.NV_NODENETTEST not in nresult
1970
    _ErrorIf(test, self.ENODENET, node,
1971
             "node hasn't returned node tcp connectivity data")
1972
    if not test:
1973
      if nresult[constants.NV_NODENETTEST]:
1974
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1975
        for anode in nlist:
1976
          _ErrorIf(True, self.ENODENET, node,
1977
                   "tcp communication with node '%s': %s",
1978
                   anode, nresult[constants.NV_NODENETTEST][anode])
1979

    
1980
    test = constants.NV_MASTERIP not in nresult
1981
    _ErrorIf(test, self.ENODENET, node,
1982
             "node hasn't returned node master IP reachability data")
1983
    if not test:
1984
      if not nresult[constants.NV_MASTERIP]:
1985
        if node == self.master_node:
1986
          msg = "the master node cannot reach the master IP (not configured?)"
1987
        else:
1988
          msg = "cannot reach the master IP"
1989
        _ErrorIf(True, self.ENODENET, node, msg)
1990

    
1991
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1992
                      diskstatus):
1993
    """Verify an instance.
1994

1995
    This function checks to see if the required block devices are
1996
    available on the instance's node.
1997

1998
    """
1999
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2000
    node_current = instanceconfig.primary_node
2001

    
2002
    node_vol_should = {}
2003
    instanceconfig.MapLVsByNode(node_vol_should)
2004

    
2005
    for node in node_vol_should:
2006
      n_img = node_image[node]
2007
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2008
        # ignore missing volumes on offline or broken nodes
2009
        continue
2010
      for volume in node_vol_should[node]:
2011
        test = volume not in n_img.volumes
2012
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
2013
                 "volume %s missing on node %s", volume, node)
2014

    
2015
    if instanceconfig.admin_up:
2016
      pri_img = node_image[node_current]
2017
      test = instance not in pri_img.instances and not pri_img.offline
2018
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
2019
               "instance not running on its primary node %s",
2020
               node_current)
2021

    
2022
    diskdata = [(nname, success, status, idx)
2023
                for (nname, disks) in diskstatus.items()
2024
                for idx, (success, status) in enumerate(disks)]
2025

    
2026
    for nname, success, bdev_status, idx in diskdata:
2027
      # the 'ghost node' construction in Exec() ensures that we have a
2028
      # node here
2029
      snode = node_image[nname]
2030
      bad_snode = snode.ghost or snode.offline
2031
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
2032
               self.EINSTANCEFAULTYDISK, instance,
2033
               "couldn't retrieve status for disk/%s on %s: %s",
2034
               idx, nname, bdev_status)
2035
      _ErrorIf((instanceconfig.admin_up and success and
2036
                bdev_status.ldisk_status == constants.LDS_FAULTY),
2037
               self.EINSTANCEFAULTYDISK, instance,
2038
               "disk/%s on %s is faulty", idx, nname)
2039

    
2040
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
2041
    """Verify if there are any unknown volumes in the cluster.
2042

2043
    The .os, .swap and backup volumes are ignored. All other volumes are
2044
    reported as unknown.
2045

2046
    @type reserved: L{ganeti.utils.FieldSet}
2047
    @param reserved: a FieldSet of reserved volume names
2048

2049
    """
2050
    for node, n_img in node_image.items():
2051
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
2052
        # skip non-healthy nodes
2053
        continue
2054
      for volume in n_img.volumes:
2055
        test = ((node not in node_vol_should or
2056
                volume not in node_vol_should[node]) and
2057
                not reserved.Matches(volume))
2058
        self._ErrorIf(test, self.ENODEORPHANLV, node,
2059
                      "volume %s is unknown", volume)
2060

    
2061
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
2062
    """Verify N+1 Memory Resilience.
2063

2064
    Check that if one single node dies we can still start all the
2065
    instances it was primary for.
2066

2067
    """
2068
    cluster_info = self.cfg.GetClusterInfo()
2069
    for node, n_img in node_image.items():
2070
      # This code checks that every node which is now listed as
2071
      # secondary has enough memory to host all instances it is
2072
      # supposed to should a single other node in the cluster fail.
2073
      # FIXME: not ready for failover to an arbitrary node
2074
      # FIXME: does not support file-backed instances
2075
      # WARNING: we currently take into account down instances as well
2076
      # as up ones, considering that even if they're down someone
2077
      # might want to start them even in the event of a node failure.
2078
      if n_img.offline:
2079
        # we're skipping offline nodes from the N+1 warning, since
2080
        # most likely we don't have good memory infromation from them;
2081
        # we already list instances living on such nodes, and that's
2082
        # enough warning
2083
        continue
2084
      for prinode, instances in n_img.sbp.items():
2085
        needed_mem = 0
2086
        for instance in instances:
2087
          bep = cluster_info.FillBE(instance_cfg[instance])
2088
          if bep[constants.BE_AUTO_BALANCE]:
2089
            needed_mem += bep[constants.BE_MEMORY]
2090
        test = n_img.mfree < needed_mem
2091
        self._ErrorIf(test, self.ENODEN1, node,
2092
                      "not enough memory to accomodate instance failovers"
2093
                      " should node %s fail (%dMiB needed, %dMiB available)",
2094
                      prinode, needed_mem, n_img.mfree)
2095

    
2096
  @classmethod
2097
  def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2098
                   (files_all, files_all_opt, files_mc, files_vm)):
2099
    """Verifies file checksums collected from all nodes.
2100

2101
    @param errorif: Callback for reporting errors
2102
    @param nodeinfo: List of L{objects.Node} objects
2103
    @param master_node: Name of master node
2104
    @param all_nvinfo: RPC results
2105

2106
    """
2107
    node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2108

    
2109
    assert master_node in node_names
2110
    assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2111
            sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2112
           "Found file listed in more than one file list"
2113

    
2114
    # Define functions determining which nodes to consider for a file
2115
    file2nodefn = dict([(filename, fn)
2116
      for (files, fn) in [(files_all, None),
2117
                          (files_all_opt, None),
2118
                          (files_mc, lambda node: (node.master_candidate or
2119
                                                   node.name == master_node)),
2120
                          (files_vm, lambda node: node.vm_capable)]
2121
      for filename in files])
2122

    
2123
    fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2124

    
2125
    for node in nodeinfo:
2126
      if node.offline:
2127
        continue
2128

    
2129
      nresult = all_nvinfo[node.name]
2130

    
2131
      if nresult.fail_msg or not nresult.payload:
2132
        node_files = None
2133
      else:
2134
        node_files = nresult.payload.get(constants.NV_FILELIST, None)
2135

    
2136
      test = not (node_files and isinstance(node_files, dict))
2137
      errorif(test, cls.ENODEFILECHECK, node.name,
2138
              "Node did not return file checksum data")
2139
      if test:
2140
        continue
2141

    
2142
      for (filename, checksum) in node_files.items():
2143
        # Check if the file should be considered for a node
2144
        fn = file2nodefn[filename]
2145
        if fn is None or fn(node):
2146
          fileinfo[filename].setdefault(checksum, set()).add(node.name)
2147

    
2148
    for (filename, checksums) in fileinfo.items():
2149
      assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2150

    
2151
      # Nodes having the file
2152
      with_file = frozenset(node_name
2153
                            for nodes in fileinfo[filename].values()
2154
                            for node_name in nodes)
2155

    
2156
      # Nodes missing file
2157
      missing_file = node_names - with_file
2158

    
2159
      if filename in files_all_opt:
2160
        # All or no nodes
2161
        errorif(missing_file and missing_file != node_names,
2162
                cls.ECLUSTERFILECHECK, None,
2163
                "File %s is optional, but it must exist on all or no"
2164
                " nodes (not found on %s)",
2165
                filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2166
      else:
2167
        errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2168
                "File %s is missing from node(s) %s", filename,
2169
                utils.CommaJoin(utils.NiceSort(missing_file)))
2170

    
2171
      # See if there are multiple versions of the file
2172
      test = len(checksums) > 1
2173
      if test:
2174
        variants = ["variant %s on %s" %
2175
                    (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2176
                    for (idx, (checksum, nodes)) in
2177
                      enumerate(sorted(checksums.items()))]
2178
      else:
2179
        variants = []
2180

    
2181
      errorif(test, cls.ECLUSTERFILECHECK, None,
2182
              "File %s found with %s different checksums (%s)",
2183
              filename, len(checksums), "; ".join(variants))
2184

    
2185
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2186
                      drbd_map):
2187
    """Verifies and the node DRBD status.
2188

2189
    @type ninfo: L{objects.Node}
2190
    @param ninfo: the node to check
2191
    @param nresult: the remote results for the node
2192
    @param instanceinfo: the dict of instances
2193
    @param drbd_helper: the configured DRBD usermode helper
2194
    @param drbd_map: the DRBD map as returned by
2195
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2196

2197
    """
2198
    node = ninfo.name
2199
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2200

    
2201
    if drbd_helper:
2202
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2203
      test = (helper_result == None)
2204
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
2205
               "no drbd usermode helper returned")
2206
      if helper_result:
2207
        status, payload = helper_result
2208
        test = not status
2209
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2210
                 "drbd usermode helper check unsuccessful: %s", payload)
2211
        test = status and (payload != drbd_helper)
2212
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
2213
                 "wrong drbd usermode helper: %s", payload)
2214

    
2215
    # compute the DRBD minors
2216
    node_drbd = {}
2217
    for minor, instance in drbd_map[node].items():
2218
      test = instance not in instanceinfo
2219
      _ErrorIf(test, self.ECLUSTERCFG, None,
2220
               "ghost instance '%s' in temporary DRBD map", instance)
2221
        # ghost instance should not be running, but otherwise we
2222
        # don't give double warnings (both ghost instance and
2223
        # unallocated minor in use)
2224
      if test:
2225
        node_drbd[minor] = (instance, False)
2226
      else:
2227
        instance = instanceinfo[instance]
2228
        node_drbd[minor] = (instance.name, instance.admin_up)
2229

    
2230
    # and now check them
2231
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
2232
    test = not isinstance(used_minors, (tuple, list))
2233
    _ErrorIf(test, self.ENODEDRBD, node,
2234
             "cannot parse drbd status file: %s", str(used_minors))
2235
    if test:
2236
      # we cannot check drbd status
2237
      return
2238

    
2239
    for minor, (iname, must_exist) in node_drbd.items():
2240
      test = minor not in used_minors and must_exist
2241
      _ErrorIf(test, self.ENODEDRBD, node,
2242
               "drbd minor %d of instance %s is not active", minor, iname)
2243
    for minor in used_minors:
2244
      test = minor not in node_drbd
2245
      _ErrorIf(test, self.ENODEDRBD, node,
2246
               "unallocated drbd minor %d is in use", minor)
2247

    
2248
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
2249
    """Builds the node OS structures.
2250

2251
    @type ninfo: L{objects.Node}
2252
    @param ninfo: the node to check
2253
    @param nresult: the remote results for the node
2254
    @param nimg: the node image object
2255

2256
    """
2257
    node = ninfo.name
2258
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2259

    
2260
    remote_os = nresult.get(constants.NV_OSLIST, None)
2261
    test = (not isinstance(remote_os, list) or
2262
            not compat.all(isinstance(v, list) and len(v) == 7
2263
                           for v in remote_os))
2264

    
2265
    _ErrorIf(test, self.ENODEOS, node,
2266
             "node hasn't returned valid OS data")
2267

    
2268
    nimg.os_fail = test
2269

    
2270
    if test:
2271
      return
2272

    
2273
    os_dict = {}
2274

    
2275
    for (name, os_path, status, diagnose,
2276
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2277

    
2278
      if name not in os_dict:
2279
        os_dict[name] = []
2280

    
2281
      # parameters is a list of lists instead of list of tuples due to
2282
      # JSON lacking a real tuple type, fix it:
2283
      parameters = [tuple(v) for v in parameters]
2284
      os_dict[name].append((os_path, status, diagnose,
2285
                            set(variants), set(parameters), set(api_ver)))
2286

    
2287
    nimg.oslist = os_dict
2288

    
2289
  def _VerifyNodeOS(self, ninfo, nimg, base):
2290
    """Verifies the node OS list.
2291

2292
    @type ninfo: L{objects.Node}
2293
    @param ninfo: the node to check
2294
    @param nimg: the node image object
2295
    @param base: the 'template' node we match against (e.g. from the master)
2296

2297
    """
2298
    node = ninfo.name
2299
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2300

    
2301
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2302

    
2303
    beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2304
    for os_name, os_data in nimg.oslist.items():
2305
      assert os_data, "Empty OS status for OS %s?!" % os_name
2306
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2307
      _ErrorIf(not f_status, self.ENODEOS, node,
2308
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2309
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2310
               "OS '%s' has multiple entries (first one shadows the rest): %s",
2311
               os_name, utils.CommaJoin([v[0] for v in os_data]))
2312
      # comparisons with the 'base' image
2313
      test = os_name not in base.oslist
2314
      _ErrorIf(test, self.ENODEOS, node,
2315
               "Extra OS %s not present on reference node (%s)",
2316
               os_name, base.name)
2317
      if test:
2318
        continue
2319
      assert base.oslist[os_name], "Base node has empty OS status?"
2320
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2321
      if not b_status:
2322
        # base OS is invalid, skipping
2323
        continue
2324
      for kind, a, b in [("API version", f_api, b_api),
2325
                         ("variants list", f_var, b_var),
2326
                         ("parameters", beautify_params(f_param),
2327
                          beautify_params(b_param))]:
2328
        _ErrorIf(a != b, self.ENODEOS, node,
2329
                 "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2330
                 kind, os_name, base.name,
2331
                 utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2332

    
2333
    # check any missing OSes
2334
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2335
    _ErrorIf(missing, self.ENODEOS, node,
2336
             "OSes present on reference node %s but missing on this node: %s",
2337
             base.name, utils.CommaJoin(missing))
2338

    
2339
  def _VerifyOob(self, ninfo, nresult):
2340
    """Verifies out of band functionality of a node.
2341

2342
    @type ninfo: L{objects.Node}
2343
    @param ninfo: the node to check
2344
    @param nresult: the remote results for the node
2345

2346
    """
2347
    node = ninfo.name
2348
    # We just have to verify the paths on master and/or master candidates
2349
    # as the oob helper is invoked on the master
2350
    if ((ninfo.master_candidate or ninfo.master_capable) and
2351
        constants.NV_OOB_PATHS in nresult):
2352
      for path_result in nresult[constants.NV_OOB_PATHS]:
2353
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2354

    
2355
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2356
    """Verifies and updates the node volume data.
2357

2358
    This function will update a L{NodeImage}'s internal structures
2359
    with data from the remote call.
2360

2361
    @type ninfo: L{objects.Node}
2362
    @param ninfo: the node to check
2363
    @param nresult: the remote results for the node
2364
    @param nimg: the node image object
2365
    @param vg_name: the configured VG name
2366

2367
    """
2368
    node = ninfo.name
2369
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2370

    
2371
    nimg.lvm_fail = True
2372
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2373
    if vg_name is None:
2374
      pass
2375
    elif isinstance(lvdata, basestring):
2376
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2377
               utils.SafeEncode(lvdata))
2378
    elif not isinstance(lvdata, dict):
2379
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2380
    else:
2381
      nimg.volumes = lvdata
2382
      nimg.lvm_fail = False
2383

    
2384
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2385
    """Verifies and updates the node instance list.
2386

2387
    If the listing was successful, then updates this node's instance
2388
    list. Otherwise, it marks the RPC call as failed for the instance
2389
    list key.
2390

2391
    @type ninfo: L{objects.Node}
2392
    @param ninfo: the node to check
2393
    @param nresult: the remote results for the node
2394
    @param nimg: the node image object
2395

2396
    """
2397
    idata = nresult.get(constants.NV_INSTANCELIST, None)
2398
    test = not isinstance(idata, list)
2399
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2400
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
2401
    if test:
2402
      nimg.hyp_fail = True
2403
    else:
2404
      nimg.instances = idata
2405

    
2406
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2407
    """Verifies and computes a node information map
2408

2409
    @type ninfo: L{objects.Node}
2410
    @param ninfo: the node to check
2411
    @param nresult: the remote results for the node
2412
    @param nimg: the node image object
2413
    @param vg_name: the configured VG name
2414

2415
    """
2416
    node = ninfo.name
2417
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2418

    
2419
    # try to read free memory (from the hypervisor)
2420
    hv_info = nresult.get(constants.NV_HVINFO, None)
2421
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2422
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2423
    if not test:
2424
      try:
2425
        nimg.mfree = int(hv_info["memory_free"])
2426
      except (ValueError, TypeError):
2427
        _ErrorIf(True, self.ENODERPC, node,
2428
                 "node returned invalid nodeinfo, check hypervisor")
2429

    
2430
    # FIXME: devise a free space model for file based instances as well
2431
    if vg_name is not None:
2432
      test = (constants.NV_VGLIST not in nresult or
2433
              vg_name not in nresult[constants.NV_VGLIST])
2434
      _ErrorIf(test, self.ENODELVM, node,
2435
               "node didn't return data for the volume group '%s'"
2436
               " - it is either missing or broken", vg_name)
2437
      if not test:
2438
        try:
2439
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2440
        except (ValueError, TypeError):
2441
          _ErrorIf(True, self.ENODERPC, node,
2442
                   "node returned invalid LVM info, check LVM status")
2443

    
2444
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2445
    """Gets per-disk status information for all instances.
2446

2447
    @type nodelist: list of strings
2448
    @param nodelist: Node names
2449
    @type node_image: dict of (name, L{objects.Node})
2450
    @param node_image: Node objects
2451
    @type instanceinfo: dict of (name, L{objects.Instance})
2452
    @param instanceinfo: Instance objects
2453
    @rtype: {instance: {node: [(succes, payload)]}}
2454
    @return: a dictionary of per-instance dictionaries with nodes as
2455
        keys and disk information as values; the disk information is a
2456
        list of tuples (success, payload)
2457

2458
    """
2459
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2460

    
2461
    node_disks = {}
2462
    node_disks_devonly = {}
2463
    diskless_instances = set()
2464
    diskless = constants.DT_DISKLESS
2465

    
2466
    for nname in nodelist:
2467
      node_instances = list(itertools.chain(node_image[nname].pinst,
2468
                                            node_image[nname].sinst))
2469
      diskless_instances.update(inst for inst in node_instances
2470
                                if instanceinfo[inst].disk_template == diskless)
2471
      disks = [(inst, disk)
2472
               for inst in node_instances
2473
               for disk in instanceinfo[inst].disks]
2474

    
2475
      if not disks:
2476
        # No need to collect data
2477
        continue
2478

    
2479
      node_disks[nname] = disks
2480

    
2481
      # Creating copies as SetDiskID below will modify the objects and that can
2482
      # lead to incorrect data returned from nodes
2483
      devonly = [dev.Copy() for (_, dev) in disks]
2484

    
2485
      for dev in devonly:
2486
        self.cfg.SetDiskID(dev, nname)
2487

    
2488
      node_disks_devonly[nname] = devonly
2489

    
2490
    assert len(node_disks) == len(node_disks_devonly)
2491

    
2492
    # Collect data from all nodes with disks
2493
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2494
                                                          node_disks_devonly)
2495

    
2496
    assert len(result) == len(node_disks)
2497

    
2498
    instdisk = {}
2499

    
2500
    for (nname, nres) in result.items():
2501
      disks = node_disks[nname]
2502

    
2503
      if nres.offline:
2504
        # No data from this node
2505
        data = len(disks) * [(False, "node offline")]
2506
      else:
2507
        msg = nres.fail_msg
2508
        _ErrorIf(msg, self.ENODERPC, nname,
2509
                 "while getting disk information: %s", msg)
2510
        if msg:
2511
          # No data from this node
2512
          data = len(disks) * [(False, msg)]
2513
        else:
2514
          data = []
2515
          for idx, i in enumerate(nres.payload):
2516
            if isinstance(i, (tuple, list)) and len(i) == 2:
2517
              data.append(i)
2518
            else:
2519
              logging.warning("Invalid result from node %s, entry %d: %s",
2520
                              nname, idx, i)
2521
              data.append((False, "Invalid result from the remote node"))
2522

    
2523
      for ((inst, _), status) in zip(disks, data):
2524
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2525

    
2526
    # Add empty entries for diskless instances.
2527
    for inst in diskless_instances:
2528
      assert inst not in instdisk
2529
      instdisk[inst] = {}
2530

    
2531
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2532
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2533
                      compat.all(isinstance(s, (tuple, list)) and
2534
                                 len(s) == 2 for s in statuses)
2535
                      for inst, nnames in instdisk.items()
2536
                      for nname, statuses in nnames.items())
2537
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2538

    
2539
    return instdisk
2540

    
2541
  def BuildHooksEnv(self):
2542
    """Build hooks env.
2543

2544
    Cluster-Verify hooks just ran in the post phase and their failure makes
2545
    the output be logged in the verify output and the verification to fail.
2546

2547
    """
2548
    env = {
2549
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2550
      }
2551

    
2552
    env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2553
               for node in self.my_node_info.values())
2554

    
2555
    return env
2556

    
2557
  def BuildHooksNodes(self):
2558
    """Build hooks nodes.
2559

2560
    """
2561
    return ([], self.my_node_names)
2562

    
2563
  def Exec(self, feedback_fn):
2564
    """Verify integrity of the node group, performing various test on nodes.
2565

2566
    """
2567
    # This method has too many local variables. pylint: disable-msg=R0914
2568

    
2569
    if not self.my_node_names:
2570
      # empty node group
2571
      feedback_fn("* Empty node group, skipping verification")
2572
      return True
2573

    
2574
    self.bad = False
2575
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2576
    verbose = self.op.verbose
2577
    self._feedback_fn = feedback_fn
2578

    
2579
    vg_name = self.cfg.GetVGName()
2580
    drbd_helper = self.cfg.GetDRBDHelper()
2581
    cluster = self.cfg.GetClusterInfo()
2582
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2583
    hypervisors = cluster.enabled_hypervisors
2584
    node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2585

    
2586
    i_non_redundant = [] # Non redundant instances
2587
    i_non_a_balanced = [] # Non auto-balanced instances
2588
    n_offline = 0 # Count of offline nodes
2589
    n_drained = 0 # Count of nodes being drained
2590
    node_vol_should = {}
2591

    
2592
    # FIXME: verify OS list
2593

    
2594
    # File verification
2595
    filemap = _ComputeAncillaryFiles(cluster, False)
2596

    
2597
    # do local checksums
2598
    master_node = self.master_node = self.cfg.GetMasterNode()
2599
    master_ip = self.cfg.GetMasterIP()
2600

    
2601
    feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2602

    
2603
    # We will make nodes contact all nodes in their group, and one node from
2604
    # every other group.
2605
    # TODO: should it be a *random* node, different every time?
2606
    online_nodes = [node.name for node in node_data_list if not node.offline]
2607
    other_group_nodes = {}
2608

    
2609
    for name in sorted(self.all_node_info):
2610
      node = self.all_node_info[name]
2611
      if (node.group not in other_group_nodes
2612
          and node.group != self.group_uuid
2613
          and not node.offline):
2614
        other_group_nodes[node.group] = node.name
2615

    
2616
    node_verify_param = {
2617
      constants.NV_FILELIST:
2618
        utils.UniqueSequence(filename
2619
                             for files in filemap
2620
                             for filename in files),
2621
      constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2622
      constants.NV_HYPERVISOR: hypervisors,
2623
      constants.NV_HVPARAMS:
2624
        _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2625
      constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2626
                                 for node in node_data_list
2627
                                 if not node.offline],
2628
      constants.NV_INSTANCELIST: hypervisors,
2629
      constants.NV_VERSION: None,
2630
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2631
      constants.NV_NODESETUP: None,
2632
      constants.NV_TIME: None,
2633
      constants.NV_MASTERIP: (master_node, master_ip),
2634
      constants.NV_OSLIST: None,
2635
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2636
      }
2637

    
2638
    if vg_name is not None:
2639
      node_verify_param[constants.NV_VGLIST] = None
2640
      node_verify_param[constants.NV_LVLIST] = vg_name
2641
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2642
      node_verify_param[constants.NV_DRBDLIST] = None
2643

    
2644
    if drbd_helper:
2645
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2646

    
2647
    # bridge checks
2648
    # FIXME: this needs to be changed per node-group, not cluster-wide
2649
    bridges = set()
2650
    default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2651
    if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2652
      bridges.add(default_nicpp[constants.NIC_LINK])
2653
    for instance in self.my_inst_info.values():
2654
      for nic in instance.nics:
2655
        full_nic = cluster.SimpleFillNIC(nic.nicparams)
2656
        if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2657
          bridges.add(full_nic[constants.NIC_LINK])
2658

    
2659
    if bridges:
2660
      node_verify_param[constants.NV_BRIDGES] = list(bridges)
2661

    
2662
    # Build our expected cluster state
2663
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2664
                                                 name=node.name,
2665
                                                 vm_capable=node.vm_capable))
2666
                      for node in node_data_list)
2667

    
2668
    # Gather OOB paths
2669
    oob_paths = []
2670
    for node in self.all_node_info.values():
2671
      path = _SupportsOob(self.cfg, node)
2672
      if path and path not in oob_paths:
2673
        oob_paths.append(path)
2674

    
2675
    if oob_paths:
2676
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2677

    
2678
    for instance in self.my_inst_names:
2679
      inst_config = self.my_inst_info[instance]
2680

    
2681
      for nname in inst_config.all_nodes:
2682
        if nname not in node_image:
2683
          gnode = self.NodeImage(name=nname)
2684
          gnode.ghost = (nname not in self.all_node_info)
2685
          node_image[nname] = gnode
2686

    
2687
      inst_config.MapLVsByNode(node_vol_should)
2688

    
2689
      pnode = inst_config.primary_node
2690
      node_image[pnode].pinst.append(instance)
2691

    
2692
      for snode in inst_config.secondary_nodes:
2693
        nimg = node_image[snode]
2694
        nimg.sinst.append(instance)
2695
        if pnode not in nimg.sbp:
2696
          nimg.sbp[pnode] = []
2697
        nimg.sbp[pnode].append(instance)
2698

    
2699
    # At this point, we have the in-memory data structures complete,
2700
    # except for the runtime information, which we'll gather next
2701

    
2702
    # Due to the way our RPC system works, exact response times cannot be
2703
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2704
    # time before and after executing the request, we can at least have a time
2705
    # window.
2706
    nvinfo_starttime = time.time()
2707
    all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2708
                                           node_verify_param,
2709
                                           self.cfg.GetClusterName())
2710
    nvinfo_endtime = time.time()
2711

    
2712
    if self.extra_lv_nodes and vg_name is not None:
2713
      extra_lv_nvinfo = \
2714
          self.rpc.call_node_verify(self.extra_lv_nodes,
2715
                                    {constants.NV_LVLIST: vg_name},
2716
                                    self.cfg.GetClusterName())
2717
    else:
2718
      extra_lv_nvinfo = {}
2719

    
2720
    all_drbd_map = self.cfg.ComputeDRBDMap()
2721

    
2722
    feedback_fn("* Gathering disk information (%s nodes)" %
2723
                len(self.my_node_names))
2724
    instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2725
                                     self.my_inst_info)
2726

    
2727
    feedback_fn("* Verifying configuration file consistency")
2728

    
2729
    # If not all nodes are being checked, we need to make sure the master node
2730
    # and a non-checked vm_capable node are in the list.
2731
    absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2732
    if absent_nodes:
2733
      vf_nvinfo = all_nvinfo.copy()
2734
      vf_node_info = list(self.my_node_info.values())
2735
      additional_nodes = []
2736
      if master_node not in self.my_node_info:
2737
        additional_nodes.append(master_node)
2738
        vf_node_info.append(self.all_node_info[master_node])
2739
      # Add the first vm_capable node we find which is not included
2740
      for node in absent_nodes:
2741
        nodeinfo = self.all_node_info[node]
2742
        if nodeinfo.vm_capable and not nodeinfo.offline:
2743
          additional_nodes.append(node)
2744
          vf_node_info.append(self.all_node_info[node])
2745
          break
2746
      key = constants.NV_FILELIST
2747
      vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2748
                                                 {key: node_verify_param[key]},
2749
                                                 self.cfg.GetClusterName()))
2750
    else:
2751
      vf_nvinfo = all_nvinfo
2752
      vf_node_info = self.my_node_info.values()
2753

    
2754
    self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2755

    
2756
    feedback_fn("* Verifying node status")
2757

    
2758
    refos_img = None
2759

    
2760
    for node_i in node_data_list:
2761
      node = node_i.name
2762
      nimg = node_image[node]
2763

    
2764
      if node_i.offline:
2765
        if verbose:
2766
          feedback_fn("* Skipping offline node %s" % (node,))
2767
        n_offline += 1
2768
        continue
2769

    
2770
      if node == master_node:
2771
        ntype = "master"
2772
      elif node_i.master_candidate:
2773
        ntype = "master candidate"
2774
      elif node_i.drained:
2775
        ntype = "drained"
2776
        n_drained += 1
2777
      else:
2778
        ntype = "regular"
2779
      if verbose:
2780
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2781

    
2782
      msg = all_nvinfo[node].fail_msg
2783
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2784
      if msg:
2785
        nimg.rpc_fail = True
2786
        continue
2787

    
2788
      nresult = all_nvinfo[node].payload
2789

    
2790
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2791
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2792
      self._VerifyNodeNetwork(node_i, nresult)
2793
      self._VerifyOob(node_i, nresult)
2794

    
2795
      if nimg.vm_capable:
2796
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2797
        self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2798
                             all_drbd_map)
2799

    
2800
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2801
        self._UpdateNodeInstances(node_i, nresult, nimg)
2802
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2803
        self._UpdateNodeOS(node_i, nresult, nimg)
2804

    
2805
        if not nimg.os_fail:
2806
          if refos_img is None:
2807
            refos_img = nimg
2808
          self._VerifyNodeOS(node_i, nimg, refos_img)
2809
        self._VerifyNodeBridges(node_i, nresult, bridges)
2810

    
2811
        # Check whether all running instancies are primary for the node. (This
2812
        # can no longer be done from _VerifyInstance below, since some of the
2813
        # wrong instances could be from other node groups.)
2814
        non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2815

    
2816
        for inst in non_primary_inst:
2817
          test = inst in self.all_inst_info
2818
          _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2819
                   "instance should not run on node %s", node_i.name)
2820
          _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2821
                   "node is running unknown instance %s", inst)
2822

    
2823
    for node, result in extra_lv_nvinfo.items():
2824
      self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2825
                              node_image[node], vg_name)
2826

    
2827
    feedback_fn("* Verifying instance status")
2828
    for instance in self.my_inst_names:
2829
      if verbose:
2830
        feedback_fn("* Verifying instance %s" % instance)
2831
      inst_config = self.my_inst_info[instance]
2832
      self._VerifyInstance(instance, inst_config, node_image,
2833
                           instdisk[instance])
2834
      inst_nodes_offline = []
2835

    
2836
      pnode = inst_config.primary_node
2837
      pnode_img = node_image[pnode]
2838
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2839
               self.ENODERPC, pnode, "instance %s, connection to"
2840
               " primary node failed", instance)
2841

    
2842
      _ErrorIf(inst_config.admin_up and pnode_img.offline,
2843
               self.EINSTANCEBADNODE, instance,
2844
               "instance is marked as running and lives on offline node %s",
2845
               inst_config.primary_node)
2846

    
2847
      # If the instance is non-redundant we cannot survive losing its primary
2848
      # node, so we are not N+1 compliant. On the other hand we have no disk
2849
      # templates with more than one secondary so that situation is not well
2850
      # supported either.
2851
      # FIXME: does not support file-backed instances
2852
      if not inst_config.secondary_nodes:
2853
        i_non_redundant.append(instance)
2854

    
2855
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2856
               instance, "instance has multiple secondary nodes: %s",
2857
               utils.CommaJoin(inst_config.secondary_nodes),
2858
               code=self.ETYPE_WARNING)
2859

    
2860
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2861
        pnode = inst_config.primary_node
2862
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2863
        instance_groups = {}
2864

    
2865
        for node in instance_nodes:
2866
          instance_groups.setdefault(self.all_node_info[node].group,
2867
                                     []).append(node)
2868

    
2869
        pretty_list = [
2870
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2871
          # Sort so that we always list the primary node first.
2872
          for group, nodes in sorted(instance_groups.items(),
2873
                                     key=lambda (_, nodes): pnode in nodes,
2874
                                     reverse=True)]
2875

    
2876
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2877
                      instance, "instance has primary and secondary nodes in"
2878
                      " different groups: %s", utils.CommaJoin(pretty_list),
2879
                      code=self.ETYPE_WARNING)
2880

    
2881
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2882
        i_non_a_balanced.append(instance)
2883

    
2884
      for snode in inst_config.secondary_nodes:
2885
        s_img = node_image[snode]
2886
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2887
                 "instance %s, connection to secondary node failed", instance)
2888

    
2889
        if s_img.offline:
2890
          inst_nodes_offline.append(snode)
2891

    
2892
      # warn that the instance lives on offline nodes
2893
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2894
               "instance has offline secondary node(s) %s",
2895
               utils.CommaJoin(inst_nodes_offline))
2896
      # ... or ghost/non-vm_capable nodes
2897
      for node in inst_config.all_nodes:
2898
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2899
                 "instance lives on ghost node %s", node)
2900
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2901
                 instance, "instance lives on non-vm_capable node %s", node)
2902

    
2903
    feedback_fn("* Verifying orphan volumes")
2904
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2905

    
2906
    # We will get spurious "unknown volume" warnings if any node of this group
2907
    # is secondary for an instance whose primary is in another group. To avoid
2908
    # them, we find these instances and add their volumes to node_vol_should.
2909
    for inst in self.all_inst_info.values():
2910
      for secondary in inst.secondary_nodes:
2911
        if (secondary in self.my_node_info
2912
            and inst.name not in self.my_inst_info):
2913
          inst.MapLVsByNode(node_vol_should)
2914
          break
2915

    
2916
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2917

    
2918
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2919
      feedback_fn("* Verifying N+1 Memory redundancy")
2920
      self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2921

    
2922
    feedback_fn("* Other Notes")
2923
    if i_non_redundant:
2924
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2925
                  % len(i_non_redundant))
2926

    
2927
    if i_non_a_balanced:
2928
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2929
                  % len(i_non_a_balanced))
2930

    
2931
    if n_offline:
2932
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2933

    
2934
    if n_drained:
2935
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2936

    
2937
    return not self.bad
2938

    
2939
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2940
    """Analyze the post-hooks' result
2941

2942
    This method analyses the hook result, handles it, and sends some
2943
    nicely-formatted feedback back to the user.
2944

2945
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2946
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2947
    @param hooks_results: the results of the multi-node hooks rpc call
2948
    @param feedback_fn: function used send feedback back to the caller
2949
    @param lu_result: previous Exec result
2950
    @return: the new Exec result, based on the previous result
2951
        and hook results
2952

2953
    """
2954
    # We only really run POST phase hooks, only for non-empty groups,
2955
    # and are only interested in their results
2956
    if not self.my_node_names:
2957
      # empty node group
2958
      pass
2959
    elif phase == constants.HOOKS_PHASE_POST:
2960
      # Used to change hooks' output to proper indentation
2961
      feedback_fn("* Hooks Results")
2962
      assert hooks_results, "invalid result from hooks"
2963

    
2964
      for node_name in hooks_results:
2965
        res = hooks_results[node_name]
2966
        msg = res.fail_msg
2967
        test = msg and not res.offline
2968
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2969
                      "Communication failure in hooks execution: %s", msg)
2970
        if res.offline or msg:
2971
          # No need to investigate payload if node is offline or gave an error.
2972
          # override manually lu_result here as _ErrorIf only
2973
          # overrides self.bad
2974
          lu_result = 1
2975
          continue
2976
        for script, hkr, output in res.payload:
2977
          test = hkr == constants.HKR_FAIL
2978
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2979
                        "Script %s failed, output:", script)
2980
          if test:
2981
            output = self._HOOKS_INDENT_RE.sub("      ", output)
2982
            feedback_fn("%s" % output)
2983
            lu_result = 0
2984

    
2985
    return lu_result
2986

    
2987

    
2988
class LUClusterVerifyDisks(NoHooksLU):
2989
  """Verifies the cluster disks status.
2990

2991
  """
2992
  REQ_BGL = False
2993

    
2994
  def ExpandNames(self):
2995
    self.share_locks = _ShareAll()
2996
    self.needed_locks = {
2997
      locking.LEVEL_NODEGROUP: locking.ALL_SET,
2998
      }
2999

    
3000
  def Exec(self, feedback_fn):
3001
    group_names = self.owned_locks(locking.LEVEL_NODEGROUP)
3002

    
3003
    # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
3004
    return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
3005
                           for group in group_names])
3006

    
3007

    
3008
class LUGroupVerifyDisks(NoHooksLU):
3009
  """Verifies the status of all disks in a node group.
3010

3011
  """
3012
  REQ_BGL = False
3013

    
3014
  def ExpandNames(self):
3015
    # Raises errors.OpPrereqError on its own if group can't be found
3016
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
3017

    
3018
    self.share_locks = _ShareAll()
3019
    self.needed_locks = {
3020
      locking.LEVEL_INSTANCE: [],
3021
      locking.LEVEL_NODEGROUP: [],
3022
      locking.LEVEL_NODE: [],
3023
      }
3024

    
3025
  def DeclareLocks(self, level):
3026
    if level == locking.LEVEL_INSTANCE:
3027
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
3028

    
3029
      # Lock instances optimistically, needs verification once node and group
3030
      # locks have been acquired
3031
      self.needed_locks[locking.LEVEL_INSTANCE] = \
3032
        self.cfg.GetNodeGroupInstances(self.group_uuid)
3033

    
3034
    elif level == locking.LEVEL_NODEGROUP:
3035
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
3036

    
3037
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
3038
        set([self.group_uuid] +
3039
            # Lock all groups used by instances optimistically; this requires
3040
            # going via the node before it's locked, requiring verification
3041
            # later on
3042
            [group_uuid
3043
             for instance_name in self.owned_locks(locking.LEVEL_INSTANCE)
3044
             for group_uuid in self.cfg.GetInstanceNodeGroups(instance_name)])
3045

    
3046
    elif level == locking.LEVEL_NODE:
3047
      # This will only lock the nodes in the group to be verified which contain
3048
      # actual instances
3049
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
3050
      self._LockInstancesNodes()
3051

    
3052
      # Lock all nodes in group to be verified
3053
      assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
3054
      member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
3055
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
3056

    
3057
  def CheckPrereq(self):
3058
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
3059
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
3060
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
3061

    
3062
    assert self.group_uuid in owned_groups
3063

    
3064
    # Check if locked instances are still correct
3065
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
3066

    
3067
    # Get instance information
3068
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
3069

    
3070
    # Check if node groups for locked instances are still correct
3071
    for (instance_name, inst) in self.instances.items():
3072
      assert owned_nodes.issuperset(inst.all_nodes), \
3073
        "Instance %s's nodes changed while we kept the lock" % instance_name
3074

    
3075
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
3076
                                             owned_groups)
3077

    
3078
      assert self.group_uuid in inst_groups, \
3079
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
3080

    
3081
  def Exec(self, feedback_fn):
3082
    """Verify integrity of cluster disks.
3083

3084
    @rtype: tuple of three items
3085
    @return: a tuple of (dict of node-to-node_error, list of instances
3086
        which need activate-disks, dict of instance: (node, volume) for
3087
        missing volumes
3088

3089
    """
3090
    res_nodes = {}
3091
    res_instances = set()
3092
    res_missing = {}
3093

    
3094
    nv_dict = _MapInstanceDisksToNodes([inst
3095
                                        for inst in self.instances.values()
3096
                                        if inst.admin_up])
3097

    
3098
    if nv_dict:
3099
      nodes = utils.NiceSort(set(self.owned_locks(locking.LEVEL_NODE)) &
3100
                             set(self.cfg.GetVmCapableNodeList()))
3101

    
3102
      node_lvs = self.rpc.call_lv_list(nodes, [])
3103

    
3104
      for (node, node_res) in node_lvs.items():
3105
        if node_res.offline:
3106
          continue
3107

    
3108
        msg = node_res.fail_msg
3109
        if msg:
3110
          logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3111
          res_nodes[node] = msg
3112
          continue
3113

    
3114
        for lv_name, (_, _, lv_online) in node_res.payload.items():
3115
          inst = nv_dict.pop((node, lv_name), None)
3116
          if not (lv_online or inst is None):
3117
            res_instances.add(inst)
3118

    
3119
      # any leftover items in nv_dict are missing LVs, let's arrange the data
3120
      # better
3121
      for key, inst in nv_dict.iteritems():
3122
        res_missing.setdefault(inst, []).append(key)
3123

    
3124
    return (res_nodes, list(res_instances), res_missing)
3125

    
3126

    
3127
class LUClusterRepairDiskSizes(NoHooksLU):
3128
  """Verifies the cluster disks sizes.
3129

3130
  """
3131
  REQ_BGL = False
3132

    
3133
  def ExpandNames(self):
3134
    if self.op.instances:
3135
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
3136
      self.needed_locks = {
3137
        locking.LEVEL_NODE: [],
3138
        locking.LEVEL_INSTANCE: self.wanted_names,
3139
        }
3140
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3141
    else:
3142
      self.wanted_names = None
3143
      self.needed_locks = {
3144
        locking.LEVEL_NODE: locking.ALL_SET,
3145
        locking.LEVEL_INSTANCE: locking.ALL_SET,
3146
        }
3147
    self.share_locks = _ShareAll()
3148

    
3149
  def DeclareLocks(self, level):
3150
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
3151
      self._LockInstancesNodes(primary_only=True)
3152

    
3153
  def CheckPrereq(self):
3154
    """Check prerequisites.
3155

3156
    This only checks the optional instance list against the existing names.
3157

3158
    """
3159
    if self.wanted_names is None:
3160
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
3161

    
3162
    self.wanted_instances = \
3163
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3164

    
3165
  def _EnsureChildSizes(self, disk):
3166
    """Ensure children of the disk have the needed disk size.
3167

3168
    This is valid mainly for DRBD8 and fixes an issue where the
3169
    children have smaller disk size.
3170

3171
    @param disk: an L{ganeti.objects.Disk} object
3172

3173
    """
3174
    if disk.dev_type == constants.LD_DRBD8:
3175
      assert disk.children, "Empty children for DRBD8?"
3176
      fchild = disk.children[0]
3177
      mismatch = fchild.size < disk.size
3178
      if mismatch:
3179
        self.LogInfo("Child disk has size %d, parent %d, fixing",
3180
                     fchild.size, disk.size)
3181
        fchild.size = disk.size
3182

    
3183
      # and we recurse on this child only, not on the metadev
3184
      return self._EnsureChildSizes(fchild) or mismatch
3185
    else:
3186
      return False
3187

    
3188
  def Exec(self, feedback_fn):
3189
    """Verify the size of cluster disks.
3190

3191
    """
3192
    # TODO: check child disks too
3193
    # TODO: check differences in size between primary/secondary nodes
3194
    per_node_disks = {}
3195
    for instance in self.wanted_instances:
3196
      pnode = instance.primary_node
3197
      if pnode not in per_node_disks:
3198
        per_node_disks[pnode] = []
3199
      for idx, disk in enumerate(instance.disks):
3200
        per_node_disks[pnode].append((instance, idx, disk))
3201

    
3202
    changed = []
3203
    for node, dskl in per_node_disks.items():
3204
      newl = [v[2].Copy() for v in dskl]
3205
      for dsk in newl:
3206
        self.cfg.SetDiskID(dsk, node)
3207
      result = self.rpc.call_blockdev_getsize(node, newl)
3208
      if result.fail_msg:
3209
        self.LogWarning("Failure in blockdev_getsize call to node"
3210
                        " %s, ignoring", node)
3211
        continue
3212
      if len(result.payload) != len(dskl):
3213
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
3214
                        " result.payload=%s", node, len(dskl), result.payload)
3215
        self.LogWarning("Invalid result from node %s, ignoring node results",
3216
                        node)
3217
        continue
3218
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
3219
        if size is None:
3220
          self.LogWarning("Disk %d of instance %s did not return size"
3221
                          " information, ignoring", idx, instance.name)
3222
          continue
3223
        if not isinstance(size, (int, long)):
3224
          self.LogWarning("Disk %d of instance %s did not return valid"
3225
                          " size information, ignoring", idx, instance.name)
3226
          continue
3227
        size = size >> 20
3228
        if size != disk.size:
3229
          self.LogInfo("Disk %d of instance %s has mismatched size,"
3230
                       " correcting: recorded %d, actual %d", idx,
3231
                       instance.name, disk.size, size)
3232
          disk.size = size
3233
          self.cfg.Update(instance, feedback_fn)
3234
          changed.append((instance.name, idx, size))
3235
        if self._EnsureChildSizes(disk):
3236
          self.cfg.Update(instance, feedback_fn)
3237
          changed.append((instance.name, idx, disk.size))
3238
    return changed
3239

    
3240

    
3241
class LUClusterRename(LogicalUnit):
3242
  """Rename the cluster.
3243

3244
  """
3245
  HPATH = "cluster-rename"
3246
  HTYPE = constants.HTYPE_CLUSTER
3247

    
3248
  def BuildHooksEnv(self):
3249
    """Build hooks env.
3250

3251
    """
3252
    return {
3253
      "OP_TARGET": self.cfg.GetClusterName(),
3254
      "NEW_NAME": self.op.name,
3255
      }
3256

    
3257
  def BuildHooksNodes(self):
3258
    """Build hooks nodes.
3259

3260
    """
3261
    return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3262

    
3263
  def CheckPrereq(self):
3264
    """Verify that the passed name is a valid one.
3265

3266
    """
3267
    hostname = netutils.GetHostname(name=self.op.name,
3268
                                    family=self.cfg.GetPrimaryIPFamily())
3269

    
3270
    new_name = hostname.name
3271
    self.ip = new_ip = hostname.ip
3272
    old_name = self.cfg.GetClusterName()
3273
    old_ip = self.cfg.GetMasterIP()
3274
    if new_name == old_name and new_ip == old_ip:
3275
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
3276
                                 " cluster has changed",
3277
                                 errors.ECODE_INVAL)
3278
    if new_ip != old_ip:
3279
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3280
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
3281
                                   " reachable on the network" %
3282
                                   new_ip, errors.ECODE_NOTUNIQUE)
3283

    
3284
    self.op.name = new_name
3285

    
3286
  def Exec(self, feedback_fn):
3287
    """Rename the cluster.
3288

3289
    """
3290
    clustername = self.op.name
3291
    ip = self.ip
3292

    
3293
    # shutdown the master IP
3294
    master = self.cfg.GetMasterNode()
3295
    result = self.rpc.call_node_stop_master(master, False)
3296
    result.Raise("Could not disable the master role")
3297

    
3298
    try:
3299
      cluster = self.cfg.GetClusterInfo()
3300
      cluster.cluster_name = clustername
3301
      cluster.master_ip = ip
3302
      self.cfg.Update(cluster, feedback_fn)
3303

    
3304
      # update the known hosts file
3305
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3306
      node_list = self.cfg.GetOnlineNodeList()
3307
      try:
3308
        node_list.remove(master)
3309
      except ValueError:
3310
        pass
3311
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3312
    finally:
3313
      result = self.rpc.call_node_start_master(master, False, False)
3314
      msg = result.fail_msg
3315
      if msg:
3316
        self.LogWarning("Could not re-enable the master role on"
3317
                        " the master, please restart manually: %s", msg)
3318

    
3319
    return clustername
3320

    
3321

    
3322
class LUClusterSetParams(LogicalUnit):
3323
  """Change the parameters of the cluster.
3324

3325
  """
3326
  HPATH = "cluster-modify"
3327
  HTYPE = constants.HTYPE_CLUSTER
3328
  REQ_BGL = False
3329

    
3330
  def CheckArguments(self):
3331
    """Check parameters
3332

3333
    """
3334
    if self.op.uid_pool:
3335
      uidpool.CheckUidPool(self.op.uid_pool)
3336

    
3337
    if self.op.add_uids:
3338
      uidpool.CheckUidPool(self.op.add_uids)
3339

    
3340
    if self.op.remove_uids:
3341
      uidpool.CheckUidPool(self.op.remove_uids)
3342

    
3343
  def ExpandNames(self):
3344
    # FIXME: in the future maybe other cluster params won't require checking on
3345
    # all nodes to be modified.
3346
    self.needed_locks = {
3347
      locking.LEVEL_NODE: locking.ALL_SET,
3348
    }
3349
    self.share_locks[locking.LEVEL_NODE] = 1
3350

    
3351
  def BuildHooksEnv(self):
3352
    """Build hooks env.
3353

3354
    """
3355
    return {
3356
      "OP_TARGET": self.cfg.GetClusterName(),
3357
      "NEW_VG_NAME": self.op.vg_name,
3358
      }
3359

    
3360
  def BuildHooksNodes(self):
3361
    """Build hooks nodes.
3362

3363
    """
3364
    mn = self.cfg.GetMasterNode()
3365
    return ([mn], [mn])
3366

    
3367
  def CheckPrereq(self):
3368
    """Check prerequisites.
3369

3370
    This checks whether the given params don't conflict and
3371
    if the given volume group is valid.
3372

3373
    """
3374
    if self.op.vg_name is not None and not self.op.vg_name:
3375
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3376
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3377
                                   " instances exist", errors.ECODE_INVAL)
3378

    
3379
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
3380
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3381
        raise errors.OpPrereqError("Cannot disable drbd helper while"
3382
                                   " drbd-based instances exist",
3383
                                   errors.ECODE_INVAL)
3384

    
3385
    node_list = self.owned_locks(locking.LEVEL_NODE)
3386

    
3387
    # if vg_name not None, checks given volume group on all nodes
3388
    if self.op.vg_name:
3389
      vglist = self.rpc.call_vg_list(node_list)
3390
      for node in node_list:
3391
        msg = vglist[node].fail_msg
3392
        if msg:
3393
          # ignoring down node
3394
          self.LogWarning("Error while gathering data on node %s"
3395
                          " (ignoring node): %s", node, msg)
3396
          continue
3397
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3398
                                              self.op.vg_name,
3399
                                              constants.MIN_VG_SIZE)
3400
        if vgstatus:
3401
          raise errors.OpPrereqError("Error on node '%s': %s" %
3402
                                     (node, vgstatus), errors.ECODE_ENVIRON)
3403

    
3404
    if self.op.drbd_helper:
3405
      # checks given drbd helper on all nodes
3406
      helpers = self.rpc.call_drbd_helper(node_list)
3407
      for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3408
        if ninfo.offline:
3409
          self.LogInfo("Not checking drbd helper on offline node %s", node)
3410
          continue
3411
        msg = helpers[node].fail_msg
3412
        if msg:
3413
          raise errors.OpPrereqError("Error checking drbd helper on node"
3414
                                     " '%s': %s" % (node, msg),
3415
                                     errors.ECODE_ENVIRON)
3416
        node_helper = helpers[node].payload
3417
        if node_helper != self.op.drbd_helper:
3418
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3419
                                     (node, node_helper), errors.ECODE_ENVIRON)
3420

    
3421
    self.cluster = cluster = self.cfg.GetClusterInfo()
3422
    # validate params changes
3423
    if self.op.beparams:
3424
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3425
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3426

    
3427
    if self.op.ndparams:
3428
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3429
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3430

    
3431
      # TODO: we need a more general way to handle resetting
3432
      # cluster-level parameters to default values
3433
      if self.new_ndparams["oob_program"] == "":
3434
        self.new_ndparams["oob_program"] = \
3435
            constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3436

    
3437
    if self.op.nicparams:
3438
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3439
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3440
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
3441
      nic_errors = []
3442

    
3443
      # check all instances for consistency
3444
      for instance in self.cfg.GetAllInstancesInfo().values():
3445
        for nic_idx, nic in enumerate(instance.nics):
3446
          params_copy = copy.deepcopy(nic.nicparams)
3447
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
3448

    
3449
          # check parameter syntax
3450
          try:
3451
            objects.NIC.CheckParameterSyntax(params_filled)
3452
          except errors.ConfigurationError, err:
3453
            nic_errors.append("Instance %s, nic/%d: %s" %
3454
                              (instance.name, nic_idx, err))
3455

    
3456
          # if we're moving instances to routed, check that they have an ip
3457
          target_mode = params_filled[constants.NIC_MODE]
3458
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3459
            nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3460
                              " address" % (instance.name, nic_idx))
3461
      if nic_errors:
3462
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3463
                                   "\n".join(nic_errors))
3464

    
3465
    # hypervisor list/parameters
3466
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3467
    if self.op.hvparams:
3468
      for hv_name, hv_dict in self.op.hvparams.items():
3469
        if hv_name not in self.new_hvparams:
3470
          self.new_hvparams[hv_name] = hv_dict
3471
        else:
3472
          self.new_hvparams[hv_name].update(hv_dict)
3473

    
3474
    # os hypervisor parameters
3475
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3476
    if self.op.os_hvp:
3477
      for os_name, hvs in self.op.os_hvp.items():
3478
        if os_name not in self.new_os_hvp:
3479
          self.new_os_hvp[os_name] = hvs
3480
        else:
3481
          for hv_name, hv_dict in hvs.items():
3482
            if hv_name not in self.new_os_hvp[os_name]:
3483
              self.new_os_hvp[os_name][hv_name] = hv_dict
3484
            else:
3485
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
3486

    
3487
    # os parameters
3488
    self.new_osp = objects.FillDict(cluster.osparams, {})
3489
    if self.op.osparams:
3490
      for os_name, osp in self.op.osparams.items():
3491
        if os_name not in self.new_osp:
3492
          self.new_osp[os_name] = {}
3493

    
3494
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3495
                                                  use_none=True)
3496

    
3497
        if not self.new_osp[os_name]:
3498
          # we removed all parameters
3499
          del self.new_osp[os_name]
3500
        else:
3501
          # check the parameter validity (remote check)
3502
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3503
                         os_name, self.new_osp[os_name])
3504

    
3505
    # changes to the hypervisor list
3506
    if self.op.enabled_hypervisors is not None:
3507
      self.hv_list = self.op.enabled_hypervisors
3508
      for hv in self.hv_list:
3509
        # if the hypervisor doesn't already exist in the cluster
3510
        # hvparams, we initialize it to empty, and then (in both
3511
        # cases) we make sure to fill the defaults, as we might not
3512
        # have a complete defaults list if the hypervisor wasn't
3513
        # enabled before
3514
        if hv not in new_hvp:
3515
          new_hvp[hv] = {}
3516
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3517
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3518
    else:
3519
      self.hv_list = cluster.enabled_hypervisors
3520

    
3521
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
3522
      # either the enabled list has changed, or the parameters have, validate
3523
      for hv_name, hv_params in self.new_hvparams.items():
3524
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
3525
            (self.op.enabled_hypervisors and
3526
             hv_name in self.op.enabled_hypervisors)):
3527
          # either this is a new hypervisor, or its parameters have changed
3528
          hv_class = hypervisor.GetHypervisor(hv_name)
3529
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3530
          hv_class.CheckParameterSyntax(hv_params)
3531
          _CheckHVParams(self, node_list, hv_name, hv_params)
3532

    
3533
    if self.op.os_hvp:
3534
      # no need to check any newly-enabled hypervisors, since the
3535
      # defaults have already been checked in the above code-block
3536
      for os_name, os_hvp in self.new_os_hvp.items():
3537
        for hv_name, hv_params in os_hvp.items():
3538
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3539
          # we need to fill in the new os_hvp on top of the actual hv_p
3540
          cluster_defaults = self.new_hvparams.get(hv_name, {})
3541
          new_osp = objects.FillDict(cluster_defaults, hv_params)
3542
          hv_class = hypervisor.GetHypervisor(hv_name)
3543
          hv_class.CheckParameterSyntax(new_osp)
3544
          _CheckHVParams(self, node_list, hv_name, new_osp)
3545

    
3546
    if self.op.default_iallocator:
3547
      alloc_script = utils.FindFile(self.op.default_iallocator,
3548
                                    constants.IALLOCATOR_SEARCH_PATH,
3549
                                    os.path.isfile)
3550
      if alloc_script is None:
3551
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3552
                                   " specified" % self.op.default_iallocator,
3553
                                   errors.ECODE_INVAL)
3554

    
3555
  def Exec(self, feedback_fn):
3556
    """Change the parameters of the cluster.
3557

3558
    """
3559
    if self.op.vg_name is not None:
3560
      new_volume = self.op.vg_name
3561
      if not new_volume:
3562
        new_volume = None
3563
      if new_volume != self.cfg.GetVGName():
3564
        self.cfg.SetVGName(new_volume)
3565
      else:
3566
        feedback_fn("Cluster LVM configuration already in desired"
3567
                    " state, not changing")
3568
    if self.op.drbd_helper is not None:
3569
      new_helper = self.op.drbd_helper
3570
      if not new_helper:
3571
        new_helper = None
3572
      if new_helper != self.cfg.GetDRBDHelper():
3573
        self.cfg.SetDRBDHelper(new_helper)
3574
      else:
3575
        feedback_fn("Cluster DRBD helper already in desired state,"
3576
                    " not changing")
3577
    if self.op.hvparams:
3578
      self.cluster.hvparams = self.new_hvparams
3579
    if self.op.os_hvp:
3580
      self.cluster.os_hvp = self.new_os_hvp
3581
    if self.op.enabled_hypervisors is not None:
3582
      self.cluster.hvparams = self.new_hvparams
3583
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3584
    if self.op.beparams:
3585
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3586
    if self.op.nicparams:
3587
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3588
    if self.op.osparams:
3589
      self.cluster.osparams = self.new_osp
3590
    if self.op.ndparams:
3591
      self.cluster.ndparams = self.new_ndparams
3592

    
3593
    if self.op.candidate_pool_size is not None:
3594
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
3595
      # we need to update the pool size here, otherwise the save will fail
3596
      _AdjustCandidatePool(self, [])
3597

    
3598
    if self.op.maintain_node_health is not None:
3599
      self.cluster.maintain_node_health = self.op.maintain_node_health
3600

    
3601
    if self.op.prealloc_wipe_disks is not None:
3602
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3603

    
3604
    if self.op.add_uids is not None:
3605
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3606

    
3607
    if self.op.remove_uids is not None:
3608
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3609

    
3610
    if self.op.uid_pool is not None:
3611
      self.cluster.uid_pool = self.op.uid_pool
3612

    
3613
    if self.op.default_iallocator is not None:
3614
      self.cluster.default_iallocator = self.op.default_iallocator
3615

    
3616
    if self.op.reserved_lvs is not None:
3617
      self.cluster.reserved_lvs = self.op.reserved_lvs
3618

    
3619
    def helper_os(aname, mods, desc):
3620
      desc += " OS list"
3621
      lst = getattr(self.cluster, aname)
3622
      for key, val in mods:
3623
        if key == constants.DDM_ADD:
3624
          if val in lst:
3625
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3626
          else:
3627
            lst.append(val)
3628
        elif key == constants.DDM_REMOVE:
3629
          if val in lst:
3630
            lst.remove(val)
3631
          else:
3632
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3633
        else:
3634
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3635

    
3636
    if self.op.hidden_os:
3637
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3638

    
3639
    if self.op.blacklisted_os:
3640
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3641

    
3642
    if self.op.master_netdev:
3643
      master = self.cfg.GetMasterNode()
3644
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3645
                  self.cluster.master_netdev)
3646
      result = self.rpc.call_node_stop_master(master, False)
3647
      result.Raise("Could not disable the master ip")
3648
      feedback_fn("Changing master_netdev from %s to %s" %
3649
                  (self.cluster.master_netdev, self.op.master_netdev))
3650
      self.cluster.master_netdev = self.op.master_netdev
3651

    
3652
    self.cfg.Update(self.cluster, feedback_fn)
3653

    
3654
    if self.op.master_netdev:
3655
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3656
                  self.op.master_netdev)
3657
      result = self.rpc.call_node_start_master(master, False, False)
3658
      if result.fail_msg:
3659
        self.LogWarning("Could not re-enable the master ip on"
3660
                        " the master, please restart manually: %s",
3661
                        result.fail_msg)
3662

    
3663

    
3664
def _UploadHelper(lu, nodes, fname):
3665
  """Helper for uploading a file and showing warnings.
3666

3667
  """
3668
  if os.path.exists(fname):
3669
    result = lu.rpc.call_upload_file(nodes, fname)
3670
    for to_node, to_result in result.items():
3671
      msg = to_result.fail_msg
3672
      if msg:
3673
        msg = ("Copy of file %s to node %s failed: %s" %
3674
               (fname, to_node, msg))
3675
        lu.proc.LogWarning(msg)
3676

    
3677

    
3678
def _ComputeAncillaryFiles(cluster, redist):
3679
  """Compute files external to Ganeti which need to be consistent.
3680

3681
  @type redist: boolean
3682
  @param redist: Whether to include files which need to be redistributed
3683

3684
  """
3685
  # Compute files for all nodes
3686
  files_all = set([
3687
    constants.SSH_KNOWN_HOSTS_FILE,
3688
    constants.CONFD_HMAC_KEY,
3689
    constants.CLUSTER_DOMAIN_SECRET_FILE,
3690
    ])
3691

    
3692
  if not redist:
3693
    files_all.update(constants.ALL_CERT_FILES)
3694
    files_all.update(ssconf.SimpleStore().GetFileList())
3695

    
3696
  if cluster.modify_etc_hosts:
3697
    files_all.add(constants.ETC_HOSTS)
3698

    
3699
  # Files which must either exist on all nodes or on none
3700
  files_all_opt = set([
3701
    constants.RAPI_USERS_FILE,
3702
    ])
3703

    
3704
  # Files which should only be on master candidates
3705
  files_mc = set()
3706
  if not redist:
3707
    files_mc.add(constants.CLUSTER_CONF_FILE)
3708

    
3709
  # Files which should only be on VM-capable nodes
3710
  files_vm = set(filename
3711
    for hv_name in cluster.enabled_hypervisors
3712
    for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3713

    
3714
  # Filenames must be unique
3715
  assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3716
          sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3717
         "Found file listed in more than one file list"
3718

    
3719
  return (files_all, files_all_opt, files_mc, files_vm)
3720

    
3721

    
3722
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3723
  """Distribute additional files which are part of the cluster configuration.
3724

3725
  ConfigWriter takes care of distributing the config and ssconf files, but
3726
  there are more files which should be distributed to all nodes. This function
3727
  makes sure those are copied.
3728

3729
  @param lu: calling logical unit
3730
  @param additional_nodes: list of nodes not in the config to distribute to
3731
  @type additional_vm: boolean
3732
  @param additional_vm: whether the additional nodes are vm-capable or not
3733

3734
  """
3735
  # Gather target nodes
3736
  cluster = lu.cfg.GetClusterInfo()
3737
  master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3738

    
3739
  online_nodes = lu.cfg.GetOnlineNodeList()
3740
  vm_nodes = lu.cfg.GetVmCapableNodeList()
3741

    
3742
  if additional_nodes is not None:
3743
    online_nodes.extend(additional_nodes)
3744
    if additional_vm:
3745
      vm_nodes.extend(additional_nodes)
3746

    
3747
  # Never distribute to master node
3748
  for nodelist in [online_nodes, vm_nodes]:
3749
    if master_info.name in nodelist:
3750
      nodelist.remove(master_info.name)
3751

    
3752
  # Gather file lists
3753
  (files_all, files_all_opt, files_mc, files_vm) = \
3754
    _ComputeAncillaryFiles(cluster, True)
3755

    
3756
  # Never re-distribute configuration file from here
3757
  assert not (constants.CLUSTER_CONF_FILE in files_all or
3758
              constants.CLUSTER_CONF_FILE in files_vm)
3759
  assert not files_mc, "Master candidates not handled in this function"
3760

    
3761
  filemap = [
3762
    (online_nodes, files_all),
3763
    (online_nodes, files_all_opt),
3764
    (vm_nodes, files_vm),
3765
    ]
3766

    
3767
  # Upload the files
3768
  for (node_list, files) in filemap:
3769
    for fname in files:
3770
      _UploadHelper(lu, node_list, fname)
3771

    
3772

    
3773
class LUClusterRedistConf(NoHooksLU):
3774
  """Force the redistribution of cluster configuration.
3775

3776
  This is a very simple LU.
3777

3778
  """
3779
  REQ_BGL = False
3780

    
3781
  def ExpandNames(self):
3782
    self.needed_locks = {
3783
      locking.LEVEL_NODE: locking.ALL_SET,
3784
    }
3785
    self.share_locks[locking.LEVEL_NODE] = 1
3786

    
3787
  def Exec(self, feedback_fn):
3788
    """Redistribute the configuration.
3789

3790
    """
3791
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3792
    _RedistributeAncillaryFiles(self)
3793

    
3794

    
3795
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3796
  """Sleep and poll for an instance's disk to sync.
3797

3798
  """
3799
  if not instance.disks or disks is not None and not disks:
3800
    return True
3801

    
3802
  disks = _ExpandCheckDisks(instance, disks)
3803

    
3804
  if not oneshot:
3805
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3806

    
3807
  node = instance.primary_node
3808

    
3809
  for dev in disks:
3810
    lu.cfg.SetDiskID(dev, node)
3811

    
3812
  # TODO: Convert to utils.Retry
3813

    
3814
  retries = 0
3815
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3816
  while True:
3817
    max_time = 0
3818
    done = True
3819
    cumul_degraded = False
3820
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3821
    msg = rstats.fail_msg
3822
    if msg:
3823
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3824
      retries += 1
3825
      if retries >= 10:
3826
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3827
                                 " aborting." % node)
3828
      time.sleep(6)
3829
      continue
3830
    rstats = rstats.payload
3831
    retries = 0
3832
    for i, mstat in enumerate(rstats):
3833
      if mstat is None:
3834
        lu.LogWarning("Can't compute data for node %s/%s",
3835
                           node, disks[i].iv_name)
3836
        continue
3837

    
3838
      cumul_degraded = (cumul_degraded or
3839
                        (mstat.is_degraded and mstat.sync_percent is None))
3840
      if mstat.sync_percent is not None:
3841
        done = False
3842
        if mstat.estimated_time is not None:
3843
          rem_time = ("%s remaining (estimated)" %
3844
                      utils.FormatSeconds(mstat.estimated_time))
3845
          max_time = mstat.estimated_time
3846
        else:
3847
          rem_time = "no time estimate"
3848
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3849
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3850

    
3851
    # if we're done but degraded, let's do a few small retries, to
3852
    # make sure we see a stable and not transient situation; therefore
3853
    # we force restart of the loop
3854
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3855
      logging.info("Degraded disks found, %d retries left", degr_retries)
3856
      degr_retries -= 1
3857
      time.sleep(1)
3858
      continue
3859

    
3860
    if done or oneshot:
3861
      break
3862

    
3863
    time.sleep(min(60, max_time))
3864

    
3865
  if done:
3866
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3867
  return not cumul_degraded
3868

    
3869

    
3870
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3871
  """Check that mirrors are not degraded.
3872

3873
  The ldisk parameter, if True, will change the test from the
3874
  is_degraded attribute (which represents overall non-ok status for
3875
  the device(s)) to the ldisk (representing the local storage status).
3876

3877
  """
3878
  lu.cfg.SetDiskID(dev, node)
3879

    
3880
  result = True
3881

    
3882
  if on_primary or dev.AssembleOnSecondary():
3883
    rstats = lu.rpc.call_blockdev_find(node, dev)
3884
    msg = rstats.fail_msg
3885
    if msg:
3886
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3887
      result = False
3888
    elif not rstats.payload:
3889
      lu.LogWarning("Can't find disk on node %s", node)
3890
      result = False
3891
    else:
3892
      if ldisk:
3893
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3894
      else:
3895
        result = result and not rstats.payload.is_degraded
3896

    
3897
  if dev.children:
3898
    for child in dev.children:
3899
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3900

    
3901
  return result
3902

    
3903

    
3904
class LUOobCommand(NoHooksLU):
3905
  """Logical unit for OOB handling.
3906

3907
  """
3908
  REG_BGL = False
3909
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3910

    
3911
  def ExpandNames(self):
3912
    """Gather locks we need.
3913

3914
    """
3915
    if self.op.node_names:
3916
      self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3917
      lock_names = self.op.node_names
3918
    else:
3919
      lock_names = locking.ALL_SET
3920

    
3921
    self.needed_locks = {
3922
      locking.LEVEL_NODE: lock_names,
3923
      }
3924

    
3925
  def CheckPrereq(self):
3926
    """Check prerequisites.
3927

3928
    This checks:
3929
     - the node exists in the configuration
3930
     - OOB is supported
3931

3932
    Any errors are signaled by raising errors.OpPrereqError.
3933

3934
    """
3935
    self.nodes = []
3936
    self.master_node = self.cfg.GetMasterNode()
3937

    
3938
    assert self.op.power_delay >= 0.0
3939

    
3940
    if self.op.node_names:
3941
      if (self.op.command in self._SKIP_MASTER and
3942
          self.master_node in self.op.node_names):
3943
        master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3944
        master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3945

    
3946
        if master_oob_handler:
3947
          additional_text = ("run '%s %s %s' if you want to operate on the"
3948
                             " master regardless") % (master_oob_handler,
3949
                                                      self.op.command,
3950
                                                      self.master_node)
3951
        else:
3952
          additional_text = "it does not support out-of-band operations"
3953

    
3954
        raise errors.OpPrereqError(("Operating on the master node %s is not"
3955
                                    " allowed for %s; %s") %
3956
                                   (self.master_node, self.op.command,
3957
                                    additional_text), errors.ECODE_INVAL)
3958
    else:
3959
      self.op.node_names = self.cfg.GetNodeList()
3960
      if self.op.command in self._SKIP_MASTER:
3961
        self.op.node_names.remove(self.master_node)
3962

    
3963
    if self.op.command in self._SKIP_MASTER:
3964
      assert self.master_node not in self.op.node_names
3965

    
3966
    for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3967
      if node is None:
3968
        raise errors.OpPrereqError("Node %s not found" % node_name,
3969
                                   errors.ECODE_NOENT)
3970
      else:
3971
        self.nodes.append(node)
3972

    
3973
      if (not self.op.ignore_status and
3974
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3975
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3976
                                    " not marked offline") % node_name,
3977
                                   errors.ECODE_STATE)
3978

    
3979
  def Exec(self, feedback_fn):
3980
    """Execute OOB and return result if we expect any.
3981

3982
    """
3983
    master_node = self.master_node
3984
    ret = []
3985

    
3986
    for idx, node in enumerate(utils.NiceSort(self.nodes,
3987
                                              key=lambda node: node.name)):
3988
      node_entry = [(constants.RS_NORMAL, node.name)]
3989
      ret.append(node_entry)
3990

    
3991
      oob_program = _SupportsOob(self.cfg, node)
3992

    
3993
      if not oob_program:
3994
        node_entry.append((constants.RS_UNAVAIL, None))
3995
        continue
3996

    
3997
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3998
                   self.op.command, oob_program, node.name)
3999
      result = self.rpc.call_run_oob(master_node, oob_program,
4000
                                     self.op.command, node.name,
4001
                                     self.op.timeout)
4002

    
4003
      if result.fail_msg:
4004
        self.LogWarning("Out-of-band RPC failed on node '%s': %s",
4005
                        node.name, result.fail_msg)
4006
        node_entry.append((constants.RS_NODATA, None))
4007
      else:
4008
        try:
4009
          self._CheckPayload(result)
4010
        except errors.OpExecError, err:
4011
          self.LogWarning("Payload returned by node '%s' is not valid: %s",
4012
                          node.name, err)
4013
          node_entry.append((constants.RS_NODATA, None))
4014
        else:
4015
          if self.op.command == constants.OOB_HEALTH:
4016
            # For health we should log important events
4017
            for item, status in result.payload:
4018
              if status in [constants.OOB_STATUS_WARNING,
4019
                            constants.OOB_STATUS_CRITICAL]:
4020
                self.LogWarning("Item '%s' on node '%s' has status '%s'",
4021
                                item, node.name, status)
4022

    
4023
          if self.op.command == constants.OOB_POWER_ON:
4024
            node.powered = True
4025
          elif self.op.command == constants.OOB_POWER_OFF:
4026
            node.powered = False
4027
          elif self.op.command == constants.OOB_POWER_STATUS:
4028
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
4029
            if powered != node.powered:
4030
              logging.warning(("Recorded power state (%s) of node '%s' does not"
4031
                               " match actual power state (%s)"), node.powered,
4032
                              node.name, powered)
4033

    
4034
          # For configuration changing commands we should update the node
4035
          if self.op.command in (constants.OOB_POWER_ON,
4036
                                 constants.OOB_POWER_OFF):
4037
            self.cfg.Update(node, feedback_fn)
4038

    
4039
          node_entry.append((constants.RS_NORMAL, result.payload))
4040

    
4041
          if (self.op.command == constants.OOB_POWER_ON and
4042
              idx < len(self.nodes) - 1):
4043
            time.sleep(self.op.power_delay)
4044

    
4045
    return ret
4046

    
4047
  def _CheckPayload(self, result):
4048
    """Checks if the payload is valid.
4049

4050
    @param result: RPC result
4051
    @raises errors.OpExecError: If payload is not valid
4052

4053
    """
4054
    errs = []
4055
    if self.op.command == constants.OOB_HEALTH:
4056
      if not isinstance(result.payload, list):
4057
        errs.append("command 'health' is expected to return a list but got %s" %
4058
                    type(result.payload))
4059
      else:
4060
        for item, status in result.payload:
4061
          if status not in constants.OOB_STATUSES:
4062
            errs.append("health item '%s' has invalid status '%s'" %
4063
                        (item, status))
4064

    
4065
    if self.op.command == constants.OOB_POWER_STATUS:
4066
      if not isinstance(result.payload, dict):
4067
        errs.append("power-status is expected to return a dict but got %s" %
4068
                    type(result.payload))
4069

    
4070
    if self.op.command in [
4071
        constants.OOB_POWER_ON,
4072
        constants.OOB_POWER_OFF,
4073
        constants.OOB_POWER_CYCLE,
4074
        ]:
4075
      if result.payload is not None:
4076
        errs.append("%s is expected to not return payload but got '%s'" %
4077
                    (self.op.command, result.payload))
4078

    
4079
    if errs:
4080
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4081
                               utils.CommaJoin(errs))
4082

    
4083
class _OsQuery(_QueryBase):
4084
  FIELDS = query.OS_FIELDS
4085

    
4086
  def ExpandNames(self, lu):
4087
    # Lock all nodes in shared mode
4088
    # Temporary removal of locks, should be reverted later
4089
    # TODO: reintroduce locks when they are lighter-weight
4090
    lu.needed_locks = {}
4091
    #self.share_locks[locking.LEVEL_NODE] = 1
4092
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4093

    
4094
    # The following variables interact with _QueryBase._GetNames
4095
    if self.names:
4096
      self.wanted = self.names
4097
    else:
4098
      self.wanted = locking.ALL_SET
4099

    
4100
    self.do_locking = self.use_locking
4101

    
4102
  def DeclareLocks(self, lu, level):
4103
    pass
4104

    
4105
  @staticmethod
4106
  def _DiagnoseByOS(rlist):
4107
    """Remaps a per-node return list into an a per-os per-node dictionary
4108

4109
    @param rlist: a map with node names as keys and OS objects as values
4110

4111
    @rtype: dict
4112
    @return: a dictionary with osnames as keys and as value another
4113
        map, with nodes as keys and tuples of (path, status, diagnose,
4114
        variants, parameters, api_versions) as values, eg::
4115

4116
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4117
                                     (/srv/..., False, "invalid api")],
4118
                           "node2": [(/srv/..., True, "", [], [])]}
4119
          }
4120

4121
    """
4122
    all_os = {}
4123
    # we build here the list of nodes that didn't fail the RPC (at RPC
4124
    # level), so that nodes with a non-responding node daemon don't
4125
    # make all OSes invalid
4126
    good_nodes = [node_name for node_name in rlist
4127
                  if not rlist[node_name].fail_msg]
4128
    for node_name, nr in rlist.items():
4129
      if nr.fail_msg or not nr.payload:
4130
        continue
4131
      for (name, path, status, diagnose, variants,
4132
           params, api_versions) in nr.payload:
4133
        if name not in all_os:
4134
          # build a list of nodes for this os containing empty lists
4135
          # for each node in node_list
4136
          all_os[name] = {}
4137
          for nname in good_nodes:
4138
            all_os[name][nname] = []
4139
        # convert params from [name, help] to (name, help)
4140
        params = [tuple(v) for v in params]
4141
        all_os[name][node_name].append((path, status, diagnose,
4142
                                        variants, params, api_versions))
4143
    return all_os
4144

    
4145
  def _GetQueryData(self, lu):
4146
    """Computes the list of nodes and their attributes.
4147

4148
    """
4149
    # Locking is not used
4150
    assert not (compat.any(lu.glm.is_owned(level)
4151
                           for level in locking.LEVELS
4152
                           if level != locking.LEVEL_CLUSTER) or
4153
                self.do_locking or self.use_locking)
4154

    
4155
    valid_nodes = [node.name
4156
                   for node in lu.cfg.GetAllNodesInfo().values()
4157
                   if not node.offline and node.vm_capable]
4158
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4159
    cluster = lu.cfg.GetClusterInfo()
4160

    
4161
    data = {}
4162

    
4163
    for (os_name, os_data) in pol.items():
4164
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4165
                          hidden=(os_name in cluster.hidden_os),
4166
                          blacklisted=(os_name in cluster.blacklisted_os))
4167

    
4168
      variants = set()
4169
      parameters = set()
4170
      api_versions = set()
4171

    
4172
      for idx, osl in enumerate(os_data.values()):
4173
        info.valid = bool(info.valid and osl and osl[0][1])
4174
        if not info.valid:
4175
          break
4176

    
4177
        (node_variants, node_params, node_api) = osl[0][3:6]
4178
        if idx == 0:
4179
          # First entry
4180
          variants.update(node_variants)
4181
          parameters.update(node_params)
4182
          api_versions.update(node_api)
4183
        else:
4184
          # Filter out inconsistent values
4185
          variants.intersection_update(node_variants)
4186
          parameters.intersection_update(node_params)
4187
          api_versions.intersection_update(node_api)
4188

    
4189
      info.variants = list(variants)
4190
      info.parameters = list(parameters)
4191
      info.api_versions = list(api_versions)
4192

    
4193
      data[os_name] = info
4194

    
4195
    # Prepare data in requested order
4196
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4197
            if name in data]
4198

    
4199

    
4200
class LUOsDiagnose(NoHooksLU):
4201
  """Logical unit for OS diagnose/query.
4202

4203
  """
4204
  REQ_BGL = False
4205

    
4206
  @staticmethod
4207
  def _BuildFilter(fields, names):
4208
    """Builds a filter for querying OSes.
4209

4210
    """
4211
    name_filter = qlang.MakeSimpleFilter("name", names)
4212

    
4213
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4214
    # respective field is not requested
4215
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4216
                     for fname in ["hidden", "blacklisted"]
4217
                     if fname not in fields]
4218
    if "valid" not in fields:
4219
      status_filter.append([qlang.OP_TRUE, "valid"])
4220

    
4221
    if status_filter:
4222
      status_filter.insert(0, qlang.OP_AND)
4223
    else:
4224
      status_filter = None
4225

    
4226
    if name_filter and status_filter:
4227
      return [qlang.OP_AND, name_filter, status_filter]
4228
    elif name_filter:
4229
      return name_filter
4230
    else:
4231
      return status_filter
4232

    
4233
  def CheckArguments(self):
4234
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4235
                       self.op.output_fields, False)
4236

    
4237
  def ExpandNames(self):
4238
    self.oq.ExpandNames(self)
4239

    
4240
  def Exec(self, feedback_fn):
4241
    return self.oq.OldStyleQuery(self)
4242

    
4243

    
4244
class LUNodeRemove(LogicalUnit):
4245
  """Logical unit for removing a node.
4246

4247
  """
4248
  HPATH = "node-remove"
4249
  HTYPE = constants.HTYPE_NODE
4250

    
4251
  def BuildHooksEnv(self):
4252
    """Build hooks env.
4253

4254
    This doesn't run on the target node in the pre phase as a failed
4255
    node would then be impossible to remove.
4256

4257
    """
4258
    return {
4259
      "OP_TARGET": self.op.node_name,
4260
      "NODE_NAME": self.op.node_name,
4261
      }
4262

    
4263
  def BuildHooksNodes(self):
4264
    """Build hooks nodes.
4265

4266
    """
4267
    all_nodes = self.cfg.GetNodeList()
4268
    try:
4269
      all_nodes.remove(self.op.node_name)
4270
    except ValueError:
4271
      logging.warning("Node '%s', which is about to be removed, was not found"
4272
                      " in the list of all nodes", self.op.node_name)
4273
    return (all_nodes, all_nodes)
4274

    
4275
  def CheckPrereq(self):
4276
    """Check prerequisites.
4277

4278
    This checks:
4279
     - the node exists in the configuration
4280
     - it does not have primary or secondary instances
4281
     - it's not the master
4282

4283
    Any errors are signaled by raising errors.OpPrereqError.
4284

4285
    """
4286
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4287
    node = self.cfg.GetNodeInfo(self.op.node_name)
4288
    assert node is not None
4289

    
4290
    masternode = self.cfg.GetMasterNode()
4291
    if node.name == masternode:
4292
      raise errors.OpPrereqError("Node is the master node, failover to another"
4293
                                 " node is required", errors.ECODE_INVAL)
4294

    
4295
    for instance_name, instance in self.cfg.GetAllInstancesInfo():
4296
      if node.name in instance.all_nodes:
4297
        raise errors.OpPrereqError("Instance %s is still running on the node,"
4298
                                   " please remove first" % instance_name,
4299
                                   errors.ECODE_INVAL)
4300
    self.op.node_name = node.name
4301
    self.node = node
4302

    
4303
  def Exec(self, feedback_fn):
4304
    """Removes the node from the cluster.
4305

4306
    """
4307
    node = self.node
4308
    logging.info("Stopping the node daemon and removing configs from node %s",
4309
                 node.name)
4310

    
4311
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4312

    
4313
    # Promote nodes to master candidate as needed
4314
    _AdjustCandidatePool(self, exceptions=[node.name])
4315
    self.context.RemoveNode(node.name)
4316

    
4317
    # Run post hooks on the node before it's removed
4318
    _RunPostHook(self, node.name)
4319

    
4320
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4321
    msg = result.fail_msg
4322
    if msg:
4323
      self.LogWarning("Errors encountered on the remote node while leaving"
4324
                      " the cluster: %s", msg)
4325

    
4326
    # Remove node from our /etc/hosts
4327
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4328
      master_node = self.cfg.GetMasterNode()
4329
      result = self.rpc.call_etc_hosts_modify(master_node,
4330
                                              constants.ETC_HOSTS_REMOVE,
4331
                                              node.name, None)
4332
      result.Raise("Can't update hosts file with new host data")
4333
      _RedistributeAncillaryFiles(self)
4334

    
4335

    
4336
class _NodeQuery(_QueryBase):
4337
  FIELDS = query.NODE_FIELDS
4338

    
4339
  def ExpandNames(self, lu):
4340
    lu.needed_locks = {}
4341
    lu.share_locks[locking.LEVEL_NODE] = 1
4342

    
4343
    if self.names:
4344
      self.wanted = _GetWantedNodes(lu, self.names)
4345
    else:
4346
      self.wanted = locking.ALL_SET
4347

    
4348
    self.do_locking = (self.use_locking and
4349
                       query.NQ_LIVE in self.requested_data)
4350

    
4351
    if self.do_locking:
4352
      # if we don't request only static fields, we need to lock the nodes
4353
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4354

    
4355
  def DeclareLocks(self, lu, level):
4356
    pass
4357

    
4358
  def _GetQueryData(self, lu):
4359
    """Computes the list of nodes and their attributes.
4360

4361
    """
4362
    all_info = lu.cfg.GetAllNodesInfo()
4363

    
4364
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4365

    
4366
    # Gather data as requested
4367
    if query.NQ_LIVE in self.requested_data:
4368
      # filter out non-vm_capable nodes
4369
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4370

    
4371
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4372
                                        lu.cfg.GetHypervisorType())
4373
      live_data = dict((name, nresult.payload)
4374
                       for (name, nresult) in node_data.items()
4375
                       if not nresult.fail_msg and nresult.payload)
4376
    else:
4377
      live_data = None
4378

    
4379
    if query.NQ_INST in self.requested_data:
4380
      node_to_primary = dict([(name, set()) for name in nodenames])
4381
      node_to_secondary = dict([(name, set()) for name in nodenames])
4382

    
4383
      inst_data = lu.cfg.GetAllInstancesInfo()
4384

    
4385
      for inst in inst_data.values():
4386
        if inst.primary_node in node_to_primary:
4387
          node_to_primary[inst.primary_node].add(inst.name)
4388
        for secnode in inst.secondary_nodes:
4389
          if secnode in node_to_secondary:
4390
            node_to_secondary[secnode].add(inst.name)
4391
    else:
4392
      node_to_primary = None
4393
      node_to_secondary = None
4394

    
4395
    if query.NQ_OOB in self.requested_data:
4396
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4397
                         for name, node in all_info.iteritems())
4398
    else:
4399
      oob_support = None
4400

    
4401
    if query.NQ_GROUP in self.requested_data:
4402
      groups = lu.cfg.GetAllNodeGroupsInfo()
4403
    else:
4404
      groups = {}
4405

    
4406
    return query.NodeQueryData([all_info[name] for name in nodenames],
4407
                               live_data, lu.cfg.GetMasterNode(),
4408
                               node_to_primary, node_to_secondary, groups,
4409
                               oob_support, lu.cfg.GetClusterInfo())
4410

    
4411

    
4412
class LUNodeQuery(NoHooksLU):
4413
  """Logical unit for querying nodes.
4414

4415
  """
4416
  # pylint: disable-msg=W0142
4417
  REQ_BGL = False
4418

    
4419
  def CheckArguments(self):
4420
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4421
                         self.op.output_fields, self.op.use_locking)
4422

    
4423
  def ExpandNames(self):
4424
    self.nq.ExpandNames(self)
4425

    
4426
  def Exec(self, feedback_fn):
4427
    return self.nq.OldStyleQuery(self)
4428

    
4429

    
4430
class LUNodeQueryvols(NoHooksLU):
4431
  """Logical unit for getting volumes on node(s).
4432

4433
  """
4434
  REQ_BGL = False
4435
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4436
  _FIELDS_STATIC = utils.FieldSet("node")
4437

    
4438
  def CheckArguments(self):
4439
    _CheckOutputFields(static=self._FIELDS_STATIC,
4440
                       dynamic=self._FIELDS_DYNAMIC,
4441
                       selected=self.op.output_fields)
4442

    
4443
  def ExpandNames(self):
4444
    self.needed_locks = {}
4445
    self.share_locks[locking.LEVEL_NODE] = 1
4446
    if not self.op.nodes:
4447
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4448
    else:
4449
      self.needed_locks[locking.LEVEL_NODE] = \
4450
        _GetWantedNodes(self, self.op.nodes)
4451

    
4452
  def Exec(self, feedback_fn):
4453
    """Computes the list of nodes and their attributes.
4454

4455
    """
4456
    nodenames = self.owned_locks(locking.LEVEL_NODE)
4457
    volumes = self.rpc.call_node_volumes(nodenames)
4458

    
4459
    ilist = self.cfg.GetAllInstancesInfo()
4460
    vol2inst = _MapInstanceDisksToNodes(ilist.values())
4461

    
4462
    output = []
4463
    for node in nodenames:
4464
      nresult = volumes[node]
4465
      if nresult.offline:
4466
        continue
4467
      msg = nresult.fail_msg
4468
      if msg:
4469
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4470
        continue
4471

    
4472
      node_vols = sorted(nresult.payload,
4473
                         key=operator.itemgetter("dev"))
4474

    
4475
      for vol in node_vols:
4476
        node_output = []
4477
        for field in self.op.output_fields:
4478
          if field == "node":
4479
            val = node
4480
          elif field == "phys":
4481
            val = vol["dev"]
4482
          elif field == "vg":
4483
            val = vol["vg"]
4484
          elif field == "name":
4485
            val = vol["name"]
4486
          elif field == "size":
4487
            val = int(float(vol["size"]))
4488
          elif field == "instance":
4489
            val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4490
          else:
4491
            raise errors.ParameterError(field)
4492
          node_output.append(str(val))
4493

    
4494
        output.append(node_output)
4495

    
4496
    return output
4497

    
4498

    
4499
class LUNodeQueryStorage(NoHooksLU):
4500
  """Logical unit for getting information on storage units on node(s).
4501

4502
  """
4503
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4504
  REQ_BGL = False
4505

    
4506
  def CheckArguments(self):
4507
    _CheckOutputFields(static=self._FIELDS_STATIC,
4508
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4509
                       selected=self.op.output_fields)
4510

    
4511
  def ExpandNames(self):
4512
    self.needed_locks = {}
4513
    self.share_locks[locking.LEVEL_NODE] = 1
4514

    
4515
    if self.op.nodes:
4516
      self.needed_locks[locking.LEVEL_NODE] = \
4517
        _GetWantedNodes(self, self.op.nodes)
4518
    else:
4519
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4520

    
4521
  def Exec(self, feedback_fn):
4522
    """Computes the list of nodes and their attributes.
4523

4524
    """
4525
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
4526

    
4527
    # Always get name to sort by
4528
    if constants.SF_NAME in self.op.output_fields:
4529
      fields = self.op.output_fields[:]
4530
    else:
4531
      fields = [constants.SF_NAME] + self.op.output_fields
4532

    
4533
    # Never ask for node or type as it's only known to the LU
4534
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
4535
      while extra in fields:
4536
        fields.remove(extra)
4537

    
4538
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4539
    name_idx = field_idx[constants.SF_NAME]
4540

    
4541
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4542
    data = self.rpc.call_storage_list(self.nodes,
4543
                                      self.op.storage_type, st_args,
4544
                                      self.op.name, fields)
4545

    
4546
    result = []
4547

    
4548
    for node in utils.NiceSort(self.nodes):
4549
      nresult = data[node]
4550
      if nresult.offline:
4551
        continue
4552

    
4553
      msg = nresult.fail_msg
4554
      if msg:
4555
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4556
        continue
4557

    
4558
      rows = dict([(row[name_idx], row) for row in nresult.payload])
4559

    
4560
      for name in utils.NiceSort(rows.keys()):
4561
        row = rows[name]
4562

    
4563
        out = []
4564

    
4565
        for field in self.op.output_fields:
4566
          if field == constants.SF_NODE:
4567
            val = node
4568
          elif field == constants.SF_TYPE:
4569
            val = self.op.storage_type
4570
          elif field in field_idx:
4571
            val = row[field_idx[field]]
4572
          else:
4573
            raise errors.ParameterError(field)
4574

    
4575
          out.append(val)
4576

    
4577
        result.append(out)
4578

    
4579
    return result
4580

    
4581

    
4582
class _InstanceQuery(_QueryBase):
4583
  FIELDS = query.INSTANCE_FIELDS
4584

    
4585
  def ExpandNames(self, lu):
4586
    lu.needed_locks = {}
4587
    lu.share_locks = _ShareAll()
4588

    
4589
    if self.names:
4590
      self.wanted = _GetWantedInstances(lu, self.names)
4591
    else:
4592
      self.wanted = locking.ALL_SET
4593

    
4594
    self.do_locking = (self.use_locking and
4595
                       query.IQ_LIVE in self.requested_data)
4596
    if self.do_locking:
4597
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4598
      lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4599
      lu.needed_locks[locking.LEVEL_NODE] = []
4600
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4601

    
4602
    self.do_grouplocks = (self.do_locking and
4603
                          query.IQ_NODES in self.requested_data)
4604

    
4605
  def DeclareLocks(self, lu, level):
4606
    if self.do_locking:
4607
      if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4608
        assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4609

    
4610
        # Lock all groups used by instances optimistically; this requires going
4611
        # via the node before it's locked, requiring verification later on
4612
        lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4613
          set(group_uuid
4614
              for instance_name in lu.owned_locks(locking.LEVEL_INSTANCE)
4615
              for group_uuid in lu.cfg.GetInstanceNodeGroups(instance_name))
4616
      elif level == locking.LEVEL_NODE:
4617
        lu._LockInstancesNodes() # pylint: disable-msg=W0212
4618

    
4619
  @staticmethod
4620
  def _CheckGroupLocks(lu):
4621
    owned_instances = frozenset(lu.owned_locks(locking.LEVEL_INSTANCE))
4622
    owned_groups = frozenset(lu.owned_locks(locking.LEVEL_NODEGROUP))
4623

    
4624
    # Check if node groups for locked instances are still correct
4625
    for instance_name in owned_instances:
4626
      _CheckInstanceNodeGroups(lu.cfg, instance_name, owned_groups)
4627

    
4628
  def _GetQueryData(self, lu):
4629
    """Computes the list of instances and their attributes.
4630

4631
    """
4632
    if self.do_grouplocks:
4633
      self._CheckGroupLocks(lu)
4634

    
4635
    cluster = lu.cfg.GetClusterInfo()
4636
    all_info = lu.cfg.GetAllInstancesInfo()
4637

    
4638
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4639

    
4640
    instance_list = [all_info[name] for name in instance_names]
4641
    nodes = frozenset(itertools.chain(*(inst.all_nodes
4642
                                        for inst in instance_list)))
4643
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4644
    bad_nodes = []
4645
    offline_nodes = []
4646
    wrongnode_inst = set()
4647

    
4648
    # Gather data as requested
4649
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4650
      live_data = {}
4651
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4652
      for name in nodes:
4653
        result = node_data[name]
4654
        if result.offline:
4655
          # offline nodes will be in both lists
4656
          assert result.fail_msg
4657
          offline_nodes.append(name)
4658
        if result.fail_msg:
4659
          bad_nodes.append(name)
4660
        elif result.payload:
4661
          for inst in result.payload:
4662
            if inst in all_info:
4663
              if all_info[inst].primary_node == name:
4664
                live_data.update(result.payload)
4665
              else:
4666
                wrongnode_inst.add(inst)
4667
            else:
4668
              # orphan instance; we don't list it here as we don't
4669
              # handle this case yet in the output of instance listing
4670
              logging.warning("Orphan instance '%s' found on node %s",
4671
                              inst, name)
4672
        # else no instance is alive
4673
    else:
4674
      live_data = {}
4675

    
4676
    if query.IQ_DISKUSAGE in self.requested_data:
4677
      disk_usage = dict((inst.name,
4678
                         _ComputeDiskSize(inst.disk_template,
4679
                                          [{constants.IDISK_SIZE: disk.size}
4680
                                           for disk in inst.disks]))
4681
                        for inst in instance_list)
4682
    else:
4683
      disk_usage = None
4684

    
4685
    if query.IQ_CONSOLE in self.requested_data:
4686
      consinfo = {}
4687
      for inst in instance_list:
4688
        if inst.name in live_data:
4689
          # Instance is running
4690
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4691
        else:
4692
          consinfo[inst.name] = None
4693
      assert set(consinfo.keys()) == set(instance_names)
4694
    else:
4695
      consinfo = None
4696

    
4697
    if query.IQ_NODES in self.requested_data:
4698
      node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4699
                                            instance_list)))
4700
      nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4701
      groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4702
                    for uuid in set(map(operator.attrgetter("group"),
4703
                                        nodes.values())))
4704
    else:
4705
      nodes = None
4706
      groups = None
4707

    
4708
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4709
                                   disk_usage, offline_nodes, bad_nodes,
4710
                                   live_data, wrongnode_inst, consinfo,
4711
                                   nodes, groups)
4712

    
4713

    
4714
class LUQuery(NoHooksLU):
4715
  """Query for resources/items of a certain kind.
4716

4717
  """
4718
  # pylint: disable-msg=W0142
4719
  REQ_BGL = False
4720

    
4721
  def CheckArguments(self):
4722
    qcls = _GetQueryImplementation(self.op.what)
4723

    
4724
    self.impl = qcls(self.op.filter, self.op.fields, False)
4725

    
4726
  def ExpandNames(self):
4727
    self.impl.ExpandNames(self)
4728

    
4729
  def DeclareLocks(self, level):
4730
    self.impl.DeclareLocks(self, level)
4731

    
4732
  def Exec(self, feedback_fn):
4733
    return self.impl.NewStyleQuery(self)
4734

    
4735

    
4736
class LUQueryFields(NoHooksLU):
4737
  """Query for resources/items of a certain kind.
4738

4739
  """
4740
  # pylint: disable-msg=W0142
4741
  REQ_BGL = False
4742

    
4743
  def CheckArguments(self):
4744
    self.qcls = _GetQueryImplementation(self.op.what)
4745

    
4746
  def ExpandNames(self):
4747
    self.needed_locks = {}
4748

    
4749
  def Exec(self, feedback_fn):
4750
    return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4751

    
4752

    
4753
class LUNodeModifyStorage(NoHooksLU):
4754
  """Logical unit for modifying a storage volume on a node.
4755

4756
  """
4757
  REQ_BGL = False
4758

    
4759
  def CheckArguments(self):
4760
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4761

    
4762
    storage_type = self.op.storage_type
4763

    
4764
    try:
4765
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4766
    except KeyError:
4767
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4768
                                 " modified" % storage_type,
4769
                                 errors.ECODE_INVAL)
4770

    
4771
    diff = set(self.op.changes.keys()) - modifiable
4772
    if diff:
4773
      raise errors.OpPrereqError("The following fields can not be modified for"
4774
                                 " storage units of type '%s': %r" %
4775
                                 (storage_type, list(diff)),
4776
                                 errors.ECODE_INVAL)
4777

    
4778
  def ExpandNames(self):
4779
    self.needed_locks = {
4780
      locking.LEVEL_NODE: self.op.node_name,
4781
      }
4782

    
4783
  def Exec(self, feedback_fn):
4784
    """Computes the list of nodes and their attributes.
4785

4786
    """
4787
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4788
    result = self.rpc.call_storage_modify(self.op.node_name,
4789
                                          self.op.storage_type, st_args,
4790
                                          self.op.name, self.op.changes)
4791
    result.Raise("Failed to modify storage unit '%s' on %s" %
4792
                 (self.op.name, self.op.node_name))
4793

    
4794

    
4795
class LUNodeAdd(LogicalUnit):
4796
  """Logical unit for adding node to the cluster.
4797

4798
  """
4799
  HPATH = "node-add"
4800
  HTYPE = constants.HTYPE_NODE
4801
  _NFLAGS = ["master_capable", "vm_capable"]
4802

    
4803
  def CheckArguments(self):
4804
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4805
    # validate/normalize the node name
4806
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4807
                                         family=self.primary_ip_family)
4808
    self.op.node_name = self.hostname.name
4809

    
4810
    if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4811
      raise errors.OpPrereqError("Cannot readd the master node",
4812
                                 errors.ECODE_STATE)
4813

    
4814
    if self.op.readd and self.op.group:
4815
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4816
                                 " being readded", errors.ECODE_INVAL)
4817

    
4818
  def BuildHooksEnv(self):
4819
    """Build hooks env.
4820

4821
    This will run on all nodes before, and on all nodes + the new node after.
4822

4823
    """
4824
    return {
4825
      "OP_TARGET": self.op.node_name,
4826
      "NODE_NAME": self.op.node_name,
4827
      "NODE_PIP": self.op.primary_ip,
4828
      "NODE_SIP": self.op.secondary_ip,
4829
      "MASTER_CAPABLE": str(self.op.master_capable),
4830
      "VM_CAPABLE": str(self.op.vm_capable),
4831
      }
4832

    
4833
  def BuildHooksNodes(self):
4834
    """Build hooks nodes.
4835

4836
    """
4837
    # Exclude added node
4838
    pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4839
    post_nodes = pre_nodes + [self.op.node_name, ]
4840

    
4841
    return (pre_nodes, post_nodes)
4842

    
4843
  def CheckPrereq(self):
4844
    """Check prerequisites.
4845

4846
    This checks:
4847
     - the new node is not already in the config
4848
     - it is resolvable
4849
     - its parameters (single/dual homed) matches the cluster
4850

4851
    Any errors are signaled by raising errors.OpPrereqError.
4852

4853
    """
4854
    cfg = self.cfg
4855
    hostname = self.hostname
4856
    node = hostname.name
4857
    primary_ip = self.op.primary_ip = hostname.ip
4858
    if self.op.secondary_ip is None:
4859
      if self.primary_ip_family == netutils.IP6Address.family:
4860
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4861
                                   " IPv4 address must be given as secondary",
4862
                                   errors.ECODE_INVAL)
4863
      self.op.secondary_ip = primary_ip
4864

    
4865
    secondary_ip = self.op.secondary_ip
4866
    if not netutils.IP4Address.IsValid(secondary_ip):
4867
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4868
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4869

    
4870
    node_list = cfg.GetNodeList()
4871
    if not self.op.readd and node in node_list:
4872
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4873
                                 node, errors.ECODE_EXISTS)
4874
    elif self.op.readd and node not in node_list:
4875
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4876
                                 errors.ECODE_NOENT)
4877

    
4878
    self.changed_primary_ip = False
4879

    
4880
    for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4881
      if self.op.readd and node == existing_node_name:
4882
        if existing_node.secondary_ip != secondary_ip:
4883
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4884
                                     " address configuration as before",
4885
                                     errors.ECODE_INVAL)
4886
        if existing_node.primary_ip != primary_ip:
4887
          self.changed_primary_ip = True
4888

    
4889
        continue
4890

    
4891
      if (existing_node.primary_ip == primary_ip or
4892
          existing_node.secondary_ip == primary_ip or
4893
          existing_node.primary_ip == secondary_ip or
4894
          existing_node.secondary_ip == secondary_ip):
4895
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4896
                                   " existing node %s" % existing_node.name,
4897
                                   errors.ECODE_NOTUNIQUE)
4898

    
4899
    # After this 'if' block, None is no longer a valid value for the
4900
    # _capable op attributes
4901
    if self.op.readd:
4902
      old_node = self.cfg.GetNodeInfo(node)
4903
      assert old_node is not None, "Can't retrieve locked node %s" % node
4904
      for attr in self._NFLAGS:
4905
        if getattr(self.op, attr) is None:
4906
          setattr(self.op, attr, getattr(old_node, attr))
4907
    else:
4908
      for attr in self._NFLAGS:
4909
        if getattr(self.op, attr) is None:
4910
          setattr(self.op, attr, True)
4911

    
4912
    if self.op.readd and not self.op.vm_capable:
4913
      pri, sec = cfg.GetNodeInstances(node)
4914
      if pri or sec:
4915
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4916
                                   " flag set to false, but it already holds"
4917
                                   " instances" % node,
4918
                                   errors.ECODE_STATE)
4919

    
4920
    # check that the type of the node (single versus dual homed) is the
4921
    # same as for the master
4922
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4923
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4924
    newbie_singlehomed = secondary_ip == primary_ip
4925
    if master_singlehomed != newbie_singlehomed:
4926
      if master_singlehomed:
4927
        raise errors.OpPrereqError("The master has no secondary ip but the"
4928
                                   " new node has one",
4929
                                   errors.ECODE_INVAL)
4930
      else:
4931
        raise errors.OpPrereqError("The master has a secondary ip but the"
4932
                                   " new node doesn't have one",
4933
                                   errors.ECODE_INVAL)
4934

    
4935
    # checks reachability
4936
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4937
      raise errors.OpPrereqError("Node not reachable by ping",
4938
                                 errors.ECODE_ENVIRON)
4939

    
4940
    if not newbie_singlehomed:
4941
      # check reachability from my secondary ip to newbie's secondary ip
4942
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4943
                           source=myself.secondary_ip):
4944
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4945
                                   " based ping to node daemon port",
4946
                                   errors.ECODE_ENVIRON)
4947

    
4948
    if self.op.readd:
4949
      exceptions = [node]
4950
    else:
4951
      exceptions = []
4952

    
4953
    if self.op.master_capable:
4954
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4955
    else:
4956
      self.master_candidate = False
4957

    
4958
    if self.op.readd:
4959
      self.new_node = old_node
4960
    else:
4961
      node_group = cfg.LookupNodeGroup(self.op.group)
4962
      self.new_node = objects.Node(name=node,
4963
                                   primary_ip=primary_ip,
4964
                                   secondary_ip=secondary_ip,
4965
                                   master_candidate=self.master_candidate,
4966
                                   offline=False, drained=False,
4967
                                   group=node_group)
4968

    
4969
    if self.op.ndparams:
4970
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4971

    
4972
  def Exec(self, feedback_fn):
4973
    """Adds the new node to the cluster.
4974

4975
    """
4976
    new_node = self.new_node
4977
    node = new_node.name
4978

    
4979
    # We adding a new node so we assume it's powered
4980
    new_node.powered = True
4981

    
4982
    # for re-adds, reset the offline/drained/master-candidate flags;
4983
    # we need to reset here, otherwise offline would prevent RPC calls
4984
    # later in the procedure; this also means that if the re-add
4985
    # fails, we are left with a non-offlined, broken node
4986
    if self.op.readd:
4987
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4988
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4989
      # if we demote the node, we do cleanup later in the procedure
4990
      new_node.master_candidate = self.master_candidate
4991
      if self.changed_primary_ip:
4992
        new_node.primary_ip = self.op.primary_ip
4993

    
4994
    # copy the master/vm_capable flags
4995
    for attr in self._NFLAGS:
4996
      setattr(new_node, attr, getattr(self.op, attr))
4997

    
4998
    # notify the user about any possible mc promotion
4999
    if new_node.master_candidate:
5000
      self.LogInfo("Node will be a master candidate")
5001

    
5002
    if self.op.ndparams:
5003
      new_node.ndparams = self.op.ndparams
5004
    else:
5005
      new_node.ndparams = {}
5006

    
5007
    # check connectivity
5008
    result = self.rpc.call_version([node])[node]
5009
    result.Raise("Can't get version information from node %s" % node)
5010
    if constants.PROTOCOL_VERSION == result.payload:
5011
      logging.info("Communication to node %s fine, sw version %s match",
5012
                   node, result.payload)
5013
    else:
5014
      raise errors.OpExecError("Version mismatch master version %s,"
5015
                               " node version %s" %
5016
                               (constants.PROTOCOL_VERSION, result.payload))
5017

    
5018
    # Add node to our /etc/hosts, and add key to known_hosts
5019
    if self.cfg.GetClusterInfo().modify_etc_hosts:
5020
      master_node = self.cfg.GetMasterNode()
5021
      result = self.rpc.call_etc_hosts_modify(master_node,
5022
                                              constants.ETC_HOSTS_ADD,
5023
                                              self.hostname.name,
5024
                                              self.hostname.ip)
5025
      result.Raise("Can't update hosts file with new host data")
5026

    
5027
    if new_node.secondary_ip != new_node.primary_ip:
5028
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
5029
                               False)
5030

    
5031
    node_verify_list = [self.cfg.GetMasterNode()]
5032
    node_verify_param = {
5033
      constants.NV_NODELIST: [node],
5034
      # TODO: do a node-net-test as well?
5035
    }
5036

    
5037
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
5038
                                       self.cfg.GetClusterName())
5039
    for verifier in node_verify_list:
5040
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
5041
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
5042
      if nl_payload:
5043
        for failed in nl_payload:
5044
          feedback_fn("ssh/hostname verification failed"
5045
                      " (checking from %s): %s" %
5046
                      (verifier, nl_payload[failed]))
5047
        raise errors.OpExecError("ssh/hostname verification failed")
5048

    
5049
    if self.op.readd:
5050
      _RedistributeAncillaryFiles(self)
5051
      self.context.ReaddNode(new_node)
5052
      # make sure we redistribute the config
5053
      self.cfg.Update(new_node, feedback_fn)
5054
      # and make sure the new node will not have old files around
5055
      if not new_node.master_candidate:
5056
        result = self.rpc.call_node_demote_from_mc(new_node.name)
5057
        msg = result.fail_msg
5058
        if msg:
5059
          self.LogWarning("Node failed to demote itself from master"
5060
                          " candidate status: %s" % msg)
5061
    else:
5062
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
5063
                                  additional_vm=self.op.vm_capable)
5064
      self.context.AddNode(new_node, self.proc.GetECId())
5065

    
5066

    
5067
class LUNodeSetParams(LogicalUnit):
5068
  """Modifies the parameters of a node.
5069

5070
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5071
      to the node role (as _ROLE_*)
5072
  @cvar _R2F: a dictionary from node role to tuples of flags
5073
  @cvar _FLAGS: a list of attribute names corresponding to the flags
5074

5075
  """
5076
  HPATH = "node-modify"
5077
  HTYPE = constants.HTYPE_NODE
5078
  REQ_BGL = False
5079
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5080
  _F2R = {
5081
    (True, False, False): _ROLE_CANDIDATE,
5082
    (False, True, False): _ROLE_DRAINED,
5083
    (False, False, True): _ROLE_OFFLINE,
5084
    (False, False, False): _ROLE_REGULAR,
5085
    }
5086
  _R2F = dict((v, k) for k, v in _F2R.items())
5087
  _FLAGS = ["master_candidate", "drained", "offline"]
5088

    
5089
  def CheckArguments(self):
5090
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5091
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5092
                self.op.master_capable, self.op.vm_capable,
5093
                self.op.secondary_ip, self.op.ndparams]
5094
    if all_mods.count(None) == len(all_mods):
5095
      raise errors.OpPrereqError("Please pass at least one modification",
5096
                                 errors.ECODE_INVAL)
5097
    if all_mods.count(True) > 1:
5098
      raise errors.OpPrereqError("Can't set the node into more than one"
5099
                                 " state at the same time",
5100
                                 errors.ECODE_INVAL)
5101

    
5102
    # Boolean value that tells us whether we might be demoting from MC
5103
    self.might_demote = (self.op.master_candidate == False or
5104
                         self.op.offline == True or
5105
                         self.op.drained == True or
5106
                         self.op.master_capable == False)
5107

    
5108
    if self.op.secondary_ip:
5109
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5110
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5111
                                   " address" % self.op.secondary_ip,
5112
                                   errors.ECODE_INVAL)
5113

    
5114
    self.lock_all = self.op.auto_promote and self.might_demote
5115
    self.lock_instances = self.op.secondary_ip is not None
5116

    
5117
  def ExpandNames(self):
5118
    if self.lock_all:
5119
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5120
    else:
5121
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5122

    
5123
    if self.lock_instances:
5124
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5125

    
5126
  def DeclareLocks(self, level):
5127
    # If we have locked all instances, before waiting to lock nodes, release
5128
    # all the ones living on nodes unrelated to the current operation.
5129
    if level == locking.LEVEL_NODE and self.lock_instances:
5130
      self.affected_instances = []
5131
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5132
        instances_keep = []
5133

    
5134
        # Build list of instances to release
5135
        locked_i = self.owned_locks(locking.LEVEL_INSTANCE)
5136
        for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5137
          if (instance.disk_template in constants.DTS_INT_MIRROR and
5138
              self.op.node_name in instance.all_nodes):
5139
            instances_keep.append(instance_name)
5140
            self.affected_instances.append(instance)
5141

    
5142
        _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5143

    
5144
        assert (set(self.owned_locks(locking.LEVEL_INSTANCE)) ==
5145
                set(instances_keep))
5146

    
5147
  def BuildHooksEnv(self):
5148
    """Build hooks env.
5149

5150
    This runs on the master node.
5151

5152
    """
5153
    return {
5154
      "OP_TARGET": self.op.node_name,
5155
      "MASTER_CANDIDATE": str(self.op.master_candidate),
5156
      "OFFLINE": str(self.op.offline),
5157
      "DRAINED": str(self.op.drained),
5158
      "MASTER_CAPABLE": str(self.op.master_capable),
5159
      "VM_CAPABLE": str(self.op.vm_capable),
5160
      }
5161

    
5162
  def BuildHooksNodes(self):
5163
    """Build hooks nodes.
5164

5165
    """
5166
    nl = [self.cfg.GetMasterNode(), self.op.node_name]
5167
    return (nl, nl)
5168

    
5169
  def CheckPrereq(self):
5170
    """Check prerequisites.
5171

5172
    This only checks the instance list against the existing names.
5173

5174
    """
5175
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5176

    
5177
    if (self.op.master_candidate is not None or
5178
        self.op.drained is not None or
5179
        self.op.offline is not None):
5180
      # we can't change the master's node flags
5181
      if self.op.node_name == self.cfg.GetMasterNode():
5182
        raise errors.OpPrereqError("The master role can be changed"
5183
                                   " only via master-failover",
5184
                                   errors.ECODE_INVAL)
5185

    
5186
    if self.op.master_candidate and not node.master_capable:
5187
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5188
                                 " it a master candidate" % node.name,
5189
                                 errors.ECODE_STATE)
5190

    
5191
    if self.op.vm_capable == False:
5192
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5193
      if ipri or isec:
5194
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5195
                                   " the vm_capable flag" % node.name,
5196
                                   errors.ECODE_STATE)
5197

    
5198
    if node.master_candidate and self.might_demote and not self.lock_all:
5199
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
5200
      # check if after removing the current node, we're missing master
5201
      # candidates
5202
      (mc_remaining, mc_should, _) = \
5203
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5204
      if mc_remaining < mc_should:
5205
        raise errors.OpPrereqError("Not enough master candidates, please"
5206
                                   " pass auto promote option to allow"
5207
                                   " promotion", errors.ECODE_STATE)
5208

    
5209
    self.old_flags = old_flags = (node.master_candidate,
5210
                                  node.drained, node.offline)
5211
    assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5212
    self.old_role = old_role = self._F2R[old_flags]
5213

    
5214
    # Check for ineffective changes
5215
    for attr in self._FLAGS:
5216
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5217
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5218
        setattr(self.op, attr, None)
5219

    
5220
    # Past this point, any flag change to False means a transition
5221
    # away from the respective state, as only real changes are kept
5222

    
5223
    # TODO: We might query the real power state if it supports OOB
5224
    if _SupportsOob(self.cfg, node):
5225
      if self.op.offline is False and not (node.powered or
5226
                                           self.op.powered == True):
5227
        raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5228
                                    " offline status can be reset") %
5229
                                   self.op.node_name)
5230
    elif self.op.powered is not None:
5231
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
5232
                                  " as it does not support out-of-band"
5233
                                  " handling") % self.op.node_name)
5234

    
5235
    # If we're being deofflined/drained, we'll MC ourself if needed
5236
    if (self.op.drained == False or self.op.offline == False or
5237
        (self.op.master_capable and not node.master_capable)):
5238
      if _DecideSelfPromotion(self):
5239
        self.op.master_candidate = True
5240
        self.LogInfo("Auto-promoting node to master candidate")
5241

    
5242
    # If we're no longer master capable, we'll demote ourselves from MC
5243
    if self.op.master_capable == False and node.master_candidate:
5244
      self.LogInfo("Demoting from master candidate")
5245
      self.op.master_candidate = False
5246

    
5247
    # Compute new role
5248
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5249
    if self.op.master_candidate:
5250
      new_role = self._ROLE_CANDIDATE
5251
    elif self.op.drained:
5252
      new_role = self._ROLE_DRAINED
5253
    elif self.op.offline:
5254
      new_role = self._ROLE_OFFLINE
5255
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5256
      # False is still in new flags, which means we're un-setting (the
5257
      # only) True flag
5258
      new_role = self._ROLE_REGULAR
5259
    else: # no new flags, nothing, keep old role
5260
      new_role = old_role
5261

    
5262
    self.new_role = new_role
5263

    
5264
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
5265
      # Trying to transition out of offline status
5266
      result = self.rpc.call_version([node.name])[node.name]
5267
      if result.fail_msg:
5268
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5269
                                   " to report its version: %s" %
5270
                                   (node.name, result.fail_msg),
5271
                                   errors.ECODE_STATE)
5272
      else:
5273
        self.LogWarning("Transitioning node from offline to online state"
5274
                        " without using re-add. Please make sure the node"
5275
                        " is healthy!")
5276

    
5277
    if self.op.secondary_ip:
5278
      # Ok even without locking, because this can't be changed by any LU
5279
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5280
      master_singlehomed = master.secondary_ip == master.primary_ip
5281
      if master_singlehomed and self.op.secondary_ip:
5282
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5283
                                   " homed cluster", errors.ECODE_INVAL)
5284

    
5285
      if node.offline:
5286
        if self.affected_instances:
5287
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
5288
                                     " node has instances (%s) configured"
5289
                                     " to use it" % self.affected_instances)
5290
      else:
5291
        # On online nodes, check that no instances are running, and that
5292
        # the node has the new ip and we can reach it.
5293
        for instance in self.affected_instances:
5294
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
5295

    
5296
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5297
        if master.name != node.name:
5298
          # check reachability from master secondary ip to new secondary ip
5299
          if not netutils.TcpPing(self.op.secondary_ip,
5300
                                  constants.DEFAULT_NODED_PORT,
5301
                                  source=master.secondary_ip):
5302
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5303
                                       " based ping to node daemon port",
5304
                                       errors.ECODE_ENVIRON)
5305

    
5306
    if self.op.ndparams:
5307
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5308
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5309
      self.new_ndparams = new_ndparams
5310

    
5311
  def Exec(self, feedback_fn):
5312
    """Modifies a node.
5313

5314
    """
5315
    node = self.node
5316
    old_role = self.old_role
5317
    new_role = self.new_role
5318

    
5319
    result = []
5320

    
5321
    if self.op.ndparams:
5322
      node.ndparams = self.new_ndparams
5323

    
5324
    if self.op.powered is not None:
5325
      node.powered = self.op.powered
5326

    
5327
    for attr in ["master_capable", "vm_capable"]:
5328
      val = getattr(self.op, attr)
5329
      if val is not None:
5330
        setattr(node, attr, val)
5331
        result.append((attr, str(val)))
5332

    
5333
    if new_role != old_role:
5334
      # Tell the node to demote itself, if no longer MC and not offline
5335
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5336
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5337
        if msg:
5338
          self.LogWarning("Node failed to demote itself: %s", msg)
5339

    
5340
      new_flags = self._R2F[new_role]
5341
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5342
        if of != nf:
5343
          result.append((desc, str(nf)))
5344
      (node.master_candidate, node.drained, node.offline) = new_flags
5345

    
5346
      # we locked all nodes, we adjust the CP before updating this node
5347
      if self.lock_all:
5348
        _AdjustCandidatePool(self, [node.name])
5349

    
5350
    if self.op.secondary_ip:
5351
      node.secondary_ip = self.op.secondary_ip
5352
      result.append(("secondary_ip", self.op.secondary_ip))
5353

    
5354
    # this will trigger configuration file update, if needed
5355
    self.cfg.Update(node, feedback_fn)
5356

    
5357
    # this will trigger job queue propagation or cleanup if the mc
5358
    # flag changed
5359
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5360
      self.context.ReaddNode(node)
5361

    
5362
    return result
5363

    
5364

    
5365
class LUNodePowercycle(NoHooksLU):
5366
  """Powercycles a node.
5367

5368
  """
5369
  REQ_BGL = False
5370

    
5371
  def CheckArguments(self):
5372
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5373
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5374
      raise errors.OpPrereqError("The node is the master and the force"
5375
                                 " parameter was not set",
5376
                                 errors.ECODE_INVAL)
5377

    
5378
  def ExpandNames(self):
5379
    """Locking for PowercycleNode.
5380

5381
    This is a last-resort option and shouldn't block on other
5382
    jobs. Therefore, we grab no locks.
5383

5384
    """
5385
    self.needed_locks = {}
5386

    
5387
  def Exec(self, feedback_fn):
5388
    """Reboots a node.
5389

5390
    """
5391
    result = self.rpc.call_node_powercycle(self.op.node_name,
5392
                                           self.cfg.GetHypervisorType())
5393
    result.Raise("Failed to schedule the reboot")
5394
    return result.payload
5395

    
5396

    
5397
class LUClusterQuery(NoHooksLU):
5398
  """Query cluster configuration.
5399

5400
  """
5401
  REQ_BGL = False
5402

    
5403
  def ExpandNames(self):
5404
    self.needed_locks = {}
5405

    
5406
  def Exec(self, feedback_fn):
5407
    """Return cluster config.
5408

5409
    """
5410
    cluster = self.cfg.GetClusterInfo()
5411
    os_hvp = {}
5412

    
5413
    # Filter just for enabled hypervisors
5414
    for os_name, hv_dict in cluster.os_hvp.items():
5415
      os_hvp[os_name] = {}
5416
      for hv_name, hv_params in hv_dict.items():
5417
        if hv_name in cluster.enabled_hypervisors:
5418
          os_hvp[os_name][hv_name] = hv_params
5419

    
5420
    # Convert ip_family to ip_version
5421
    primary_ip_version = constants.IP4_VERSION
5422
    if cluster.primary_ip_family == netutils.IP6Address.family:
5423
      primary_ip_version = constants.IP6_VERSION
5424

    
5425
    result = {
5426
      "software_version": constants.RELEASE_VERSION,
5427
      "protocol_version": constants.PROTOCOL_VERSION,
5428
      "config_version": constants.CONFIG_VERSION,
5429
      "os_api_version": max(constants.OS_API_VERSIONS),
5430
      "export_version": constants.EXPORT_VERSION,
5431
      "architecture": (platform.architecture()[0], platform.machine()),
5432
      "name": cluster.cluster_name,
5433
      "master": cluster.master_node,
5434
      "default_hypervisor": cluster.enabled_hypervisors[0],
5435
      "enabled_hypervisors": cluster.enabled_hypervisors,
5436
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5437
                        for hypervisor_name in cluster.enabled_hypervisors]),
5438
      "os_hvp": os_hvp,
5439
      "beparams": cluster.beparams,
5440
      "osparams": cluster.osparams,
5441
      "nicparams": cluster.nicparams,
5442
      "ndparams": cluster.ndparams,
5443
      "candidate_pool_size": cluster.candidate_pool_size,
5444
      "master_netdev": cluster.master_netdev,
5445
      "volume_group_name": cluster.volume_group_name,
5446
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
5447
      "file_storage_dir": cluster.file_storage_dir,
5448
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
5449
      "maintain_node_health": cluster.maintain_node_health,
5450
      "ctime": cluster.ctime,
5451
      "mtime": cluster.mtime,
5452
      "uuid": cluster.uuid,
5453
      "tags": list(cluster.GetTags()),
5454
      "uid_pool": cluster.uid_pool,
5455
      "default_iallocator": cluster.default_iallocator,
5456
      "reserved_lvs": cluster.reserved_lvs,
5457
      "primary_ip_version": primary_ip_version,
5458
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5459
      "hidden_os": cluster.hidden_os,
5460
      "blacklisted_os": cluster.blacklisted_os,
5461
      }
5462

    
5463
    return result
5464

    
5465

    
5466
class LUClusterConfigQuery(NoHooksLU):
5467
  """Return configuration values.
5468

5469
  """
5470
  REQ_BGL = False
5471
  _FIELDS_DYNAMIC = utils.FieldSet()
5472
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5473
                                  "watcher_pause", "volume_group_name")
5474

    
5475
  def CheckArguments(self):
5476
    _CheckOutputFields(static=self._FIELDS_STATIC,
5477
                       dynamic=self._FIELDS_DYNAMIC,
5478
                       selected=self.op.output_fields)
5479

    
5480
  def ExpandNames(self):
5481
    self.needed_locks = {}
5482

    
5483
  def Exec(self, feedback_fn):
5484
    """Dump a representation of the cluster config to the standard output.
5485

5486
    """
5487
    values = []
5488
    for field in self.op.output_fields:
5489
      if field == "cluster_name":
5490
        entry = self.cfg.GetClusterName()
5491
      elif field == "master_node":
5492
        entry = self.cfg.GetMasterNode()
5493
      elif field == "drain_flag":
5494
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5495
      elif field == "watcher_pause":
5496
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5497
      elif field == "volume_group_name":
5498
        entry = self.cfg.GetVGName()
5499
      else:
5500
        raise errors.ParameterError(field)
5501
      values.append(entry)
5502
    return values
5503

    
5504

    
5505
class LUInstanceActivateDisks(NoHooksLU):
5506
  """Bring up an instance's disks.
5507

5508
  """
5509
  REQ_BGL = False
5510

    
5511
  def ExpandNames(self):
5512
    self._ExpandAndLockInstance()
5513
    self.needed_locks[locking.LEVEL_NODE] = []
5514
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5515

    
5516
  def DeclareLocks(self, level):
5517
    if level == locking.LEVEL_NODE:
5518
      self._LockInstancesNodes()
5519

    
5520
  def CheckPrereq(self):
5521
    """Check prerequisites.
5522

5523
    This checks that the instance is in the cluster.
5524

5525
    """
5526
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5527
    assert self.instance is not None, \
5528
      "Cannot retrieve locked instance %s" % self.op.instance_name
5529
    _CheckNodeOnline(self, self.instance.primary_node)
5530

    
5531
  def Exec(self, feedback_fn):
5532
    """Activate the disks.
5533

5534
    """
5535
    disks_ok, disks_info = \
5536
              _AssembleInstanceDisks(self, self.instance,
5537
                                     ignore_size=self.op.ignore_size)
5538
    if not disks_ok:
5539
      raise errors.OpExecError("Cannot activate block devices")
5540

    
5541
    return disks_info
5542

    
5543

    
5544
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5545
                           ignore_size=False):
5546
  """Prepare the block devices for an instance.
5547

5548
  This sets up the block devices on all nodes.
5549

5550
  @type lu: L{LogicalUnit}
5551
  @param lu: the logical unit on whose behalf we execute
5552
  @type instance: L{objects.Instance}
5553
  @param instance: the instance for whose disks we assemble
5554
  @type disks: list of L{objects.Disk} or None
5555
  @param disks: which disks to assemble (or all, if None)
5556
  @type ignore_secondaries: boolean
5557
  @param ignore_secondaries: if true, errors on secondary nodes
5558
      won't result in an error return from the function
5559
  @type ignore_size: boolean
5560
  @param ignore_size: if true, the current known size of the disk
5561
      will not be used during the disk activation, useful for cases
5562
      when the size is wrong
5563
  @return: False if the operation failed, otherwise a list of
5564
      (host, instance_visible_name, node_visible_name)
5565
      with the mapping from node devices to instance devices
5566

5567
  """
5568
  device_info = []
5569
  disks_ok = True
5570
  iname = instance.name
5571
  disks = _ExpandCheckDisks(instance, disks)
5572

    
5573
  # With the two passes mechanism we try to reduce the window of
5574
  # opportunity for the race condition of switching DRBD to primary
5575
  # before handshaking occured, but we do not eliminate it
5576

    
5577
  # The proper fix would be to wait (with some limits) until the
5578
  # connection has been made and drbd transitions from WFConnection
5579
  # into any other network-connected state (Connected, SyncTarget,
5580
  # SyncSource, etc.)
5581

    
5582
  # 1st pass, assemble on all nodes in secondary mode
5583
  for idx, inst_disk in enumerate(disks):
5584
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5585
      if ignore_size:
5586
        node_disk = node_disk.Copy()
5587
        node_disk.UnsetSize()
5588
      lu.cfg.SetDiskID(node_disk, node)
5589
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5590
      msg = result.fail_msg
5591
      if msg:
5592
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5593
                           " (is_primary=False, pass=1): %s",
5594
                           inst_disk.iv_name, node, msg)
5595
        if not ignore_secondaries:
5596
          disks_ok = False
5597

    
5598
  # FIXME: race condition on drbd migration to primary
5599

    
5600
  # 2nd pass, do only the primary node
5601
  for idx, inst_disk in enumerate(disks):
5602
    dev_path = None
5603

    
5604
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5605
      if node != instance.primary_node:
5606
        continue
5607
      if ignore_size:
5608
        node_disk = node_disk.Copy()
5609
        node_disk.UnsetSize()
5610
      lu.cfg.SetDiskID(node_disk, node)
5611
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5612
      msg = result.fail_msg
5613
      if msg:
5614
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
5615
                           " (is_primary=True, pass=2): %s",
5616
                           inst_disk.iv_name, node, msg)
5617
        disks_ok = False
5618
      else:
5619
        dev_path = result.payload
5620

    
5621
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5622

    
5623
  # leave the disks configured for the primary node
5624
  # this is a workaround that would be fixed better by
5625
  # improving the logical/physical id handling
5626
  for disk in disks:
5627
    lu.cfg.SetDiskID(disk, instance.primary_node)
5628

    
5629
  return disks_ok, device_info
5630

    
5631

    
5632
def _StartInstanceDisks(lu, instance, force):
5633
  """Start the disks of an instance.
5634

5635
  """
5636
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5637
                                           ignore_secondaries=force)
5638
  if not disks_ok:
5639
    _ShutdownInstanceDisks(lu, instance)
5640
    if force is not None and not force:
5641
      lu.proc.LogWarning("", hint="If the message above refers to a"
5642
                         " secondary node,"
5643
                         " you can retry the operation using '--force'.")
5644
    raise errors.OpExecError("Disk consistency error")
5645

    
5646

    
5647
class LUInstanceDeactivateDisks(NoHooksLU):
5648
  """Shutdown an instance's disks.
5649

5650
  """
5651
  REQ_BGL = False
5652

    
5653
  def ExpandNames(self):
5654
    self._ExpandAndLockInstance()
5655
    self.needed_locks[locking.LEVEL_NODE] = []
5656
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5657

    
5658
  def DeclareLocks(self, level):
5659
    if level == locking.LEVEL_NODE:
5660
      self._LockInstancesNodes()
5661

    
5662
  def CheckPrereq(self):
5663
    """Check prerequisites.
5664

5665
    This checks that the instance is in the cluster.
5666

5667
    """
5668
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5669
    assert self.instance is not None, \
5670
      "Cannot retrieve locked instance %s" % self.op.instance_name
5671

    
5672
  def Exec(self, feedback_fn):
5673
    """Deactivate the disks
5674

5675
    """
5676
    instance = self.instance
5677
    if self.op.force:
5678
      _ShutdownInstanceDisks(self, instance)
5679
    else:
5680
      _SafeShutdownInstanceDisks(self, instance)
5681

    
5682

    
5683
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5684
  """Shutdown block devices of an instance.
5685

5686
  This function checks if an instance is running, before calling
5687
  _ShutdownInstanceDisks.
5688

5689
  """
5690
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5691
  _ShutdownInstanceDisks(lu, instance, disks=disks)
5692

    
5693

    
5694
def _ExpandCheckDisks(instance, disks):
5695
  """Return the instance disks selected by the disks list
5696

5697
  @type disks: list of L{objects.Disk} or None
5698
  @param disks: selected disks
5699
  @rtype: list of L{objects.Disk}
5700
  @return: selected instance disks to act on
5701

5702
  """
5703
  if disks is None:
5704
    return instance.disks
5705
  else:
5706
    if not set(disks).issubset(instance.disks):
5707
      raise errors.ProgrammerError("Can only act on disks belonging to the"
5708
                                   " target instance")
5709
    return disks
5710

    
5711

    
5712
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5713
  """Shutdown block devices of an instance.
5714

5715
  This does the shutdown on all nodes of the instance.
5716

5717
  If the ignore_primary is false, errors on the primary node are
5718
  ignored.
5719

5720
  """
5721
  all_result = True
5722
  disks = _ExpandCheckDisks(instance, disks)
5723

    
5724
  for disk in disks:
5725
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5726
      lu.cfg.SetDiskID(top_disk, node)
5727
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5728
      msg = result.fail_msg
5729
      if msg:
5730
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5731
                      disk.iv_name, node, msg)
5732
        if ((node == instance.primary_node and not ignore_primary) or
5733
            (node != instance.primary_node and not result.offline)):
5734
          all_result = False
5735
  return all_result
5736

    
5737

    
5738
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5739
  """Checks if a node has enough free memory.
5740

5741
  This function check if a given node has the needed amount of free
5742
  memory. In case the node has less memory or we cannot get the
5743
  information from the node, this function raise an OpPrereqError
5744
  exception.
5745

5746
  @type lu: C{LogicalUnit}
5747
  @param lu: a logical unit from which we get configuration data
5748
  @type node: C{str}
5749
  @param node: the node to check
5750
  @type reason: C{str}
5751
  @param reason: string to use in the error message
5752
  @type requested: C{int}
5753
  @param requested: the amount of memory in MiB to check for
5754
  @type hypervisor_name: C{str}
5755
  @param hypervisor_name: the hypervisor to ask for memory stats
5756
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5757
      we cannot check the node
5758

5759
  """
5760
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5761
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5762
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5763
  free_mem = nodeinfo[node].payload.get("memory_free", None)
5764
  if not isinstance(free_mem, int):
5765
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5766
                               " was '%s'" % (node, free_mem),
5767
                               errors.ECODE_ENVIRON)
5768
  if requested > free_mem:
5769
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5770
                               " needed %s MiB, available %s MiB" %
5771
                               (node, reason, requested, free_mem),
5772
                               errors.ECODE_NORES)
5773

    
5774

    
5775
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5776
  """Checks if nodes have enough free disk space in the all VGs.
5777

5778
  This function check if all given nodes have the needed amount of
5779
  free disk. In case any node has less disk or we cannot get the
5780
  information from the node, this function raise an OpPrereqError
5781
  exception.
5782

5783
  @type lu: C{LogicalUnit}
5784
  @param lu: a logical unit from which we get configuration data
5785
  @type nodenames: C{list}
5786
  @param nodenames: the list of node names to check
5787
  @type req_sizes: C{dict}
5788
  @param req_sizes: the hash of vg and corresponding amount of disk in
5789
      MiB to check for
5790
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5791
      or we cannot check the node
5792

5793
  """
5794
  for vg, req_size in req_sizes.items():
5795
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5796

    
5797

    
5798
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5799
  """Checks if nodes have enough free disk space in the specified VG.
5800

5801
  This function check if all given nodes have the needed amount of
5802
  free disk. In case any node has less disk or we cannot get the
5803
  information from the node, this function raise an OpPrereqError
5804
  exception.
5805

5806
  @type lu: C{LogicalUnit}
5807
  @param lu: a logical unit from which we get configuration data
5808
  @type nodenames: C{list}
5809
  @param nodenames: the list of node names to check
5810
  @type vg: C{str}
5811
  @param vg: the volume group to check
5812
  @type requested: C{int}
5813
  @param requested: the amount of disk in MiB to check for
5814
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5815
      or we cannot check the node
5816

5817
  """
5818
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5819
  for node in nodenames:
5820
    info = nodeinfo[node]
5821
    info.Raise("Cannot get current information from node %s" % node,
5822
               prereq=True, ecode=errors.ECODE_ENVIRON)
5823
    vg_free = info.payload.get("vg_free", None)
5824
    if not isinstance(vg_free, int):
5825
      raise errors.OpPrereqError("Can't compute free disk space on node"
5826
                                 " %s for vg %s, result was '%s'" %
5827
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5828
    if requested > vg_free:
5829
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5830
                                 " vg %s: required %d MiB, available %d MiB" %
5831
                                 (node, vg, requested, vg_free),
5832
                                 errors.ECODE_NORES)
5833

    
5834

    
5835
class LUInstanceStartup(LogicalUnit):
5836
  """Starts an instance.
5837

5838
  """
5839
  HPATH = "instance-start"
5840
  HTYPE = constants.HTYPE_INSTANCE
5841
  REQ_BGL = False
5842

    
5843
  def CheckArguments(self):
5844
    # extra beparams
5845
    if self.op.beparams:
5846
      # fill the beparams dict
5847
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5848

    
5849
  def ExpandNames(self):
5850
    self._ExpandAndLockInstance()
5851

    
5852
  def BuildHooksEnv(self):
5853
    """Build hooks env.
5854

5855
    This runs on master, primary and secondary nodes of the instance.
5856

5857
    """
5858
    env = {
5859
      "FORCE": self.op.force,
5860
      }
5861

    
5862
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5863

    
5864
    return env
5865

    
5866
  def BuildHooksNodes(self):
5867
    """Build hooks nodes.
5868

5869
    """
5870
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5871
    return (nl, nl)
5872

    
5873
  def CheckPrereq(self):
5874
    """Check prerequisites.
5875

5876
    This checks that the instance is in the cluster.
5877

5878
    """
5879
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5880
    assert self.instance is not None, \
5881
      "Cannot retrieve locked instance %s" % self.op.instance_name
5882

    
5883
    # extra hvparams
5884
    if self.op.hvparams:
5885
      # check hypervisor parameter syntax (locally)
5886
      cluster = self.cfg.GetClusterInfo()
5887
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5888
      filled_hvp = cluster.FillHV(instance)
5889
      filled_hvp.update(self.op.hvparams)
5890
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5891
      hv_type.CheckParameterSyntax(filled_hvp)
5892
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5893

    
5894
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5895

    
5896
    if self.primary_offline and self.op.ignore_offline_nodes:
5897
      self.proc.LogWarning("Ignoring offline primary node")
5898

    
5899
      if self.op.hvparams or self.op.beparams:
5900
        self.proc.LogWarning("Overridden parameters are ignored")
5901
    else:
5902
      _CheckNodeOnline(self, instance.primary_node)
5903

    
5904
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5905

    
5906
      # check bridges existence
5907
      _CheckInstanceBridgesExist(self, instance)
5908

    
5909
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5910
                                                instance.name,
5911
                                                instance.hypervisor)
5912
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5913
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5914
      if not remote_info.payload: # not running already
5915
        _CheckNodeFreeMemory(self, instance.primary_node,
5916
                             "starting instance %s" % instance.name,
5917
                             bep[constants.BE_MEMORY], instance.hypervisor)
5918

    
5919
  def Exec(self, feedback_fn):
5920
    """Start the instance.
5921

5922
    """
5923
    instance = self.instance
5924
    force = self.op.force
5925

    
5926
    if not self.op.no_remember:
5927
      self.cfg.MarkInstanceUp(instance.name)
5928

    
5929
    if self.primary_offline:
5930
      assert self.op.ignore_offline_nodes
5931
      self.proc.LogInfo("Primary node offline, marked instance as started")
5932
    else:
5933
      node_current = instance.primary_node
5934

    
5935
      _StartInstanceDisks(self, instance, force)
5936

    
5937
      result = self.rpc.call_instance_start(node_current, instance,
5938
                                            self.op.hvparams, self.op.beparams,
5939
                                            self.op.startup_paused)
5940
      msg = result.fail_msg
5941
      if msg:
5942
        _ShutdownInstanceDisks(self, instance)
5943
        raise errors.OpExecError("Could not start instance: %s" % msg)
5944

    
5945

    
5946
class LUInstanceReboot(LogicalUnit):
5947
  """Reboot an instance.
5948

5949
  """
5950
  HPATH = "instance-reboot"
5951
  HTYPE = constants.HTYPE_INSTANCE
5952
  REQ_BGL = False
5953

    
5954
  def ExpandNames(self):
5955
    self._ExpandAndLockInstance()
5956

    
5957
  def BuildHooksEnv(self):
5958
    """Build hooks env.
5959

5960
    This runs on master, primary and secondary nodes of the instance.
5961

5962
    """
5963
    env = {
5964
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5965
      "REBOOT_TYPE": self.op.reboot_type,
5966
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5967
      }
5968

    
5969
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5970

    
5971
    return env
5972

    
5973
  def BuildHooksNodes(self):
5974
    """Build hooks nodes.
5975

5976
    """
5977
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5978
    return (nl, nl)
5979

    
5980
  def CheckPrereq(self):
5981
    """Check prerequisites.
5982

5983
    This checks that the instance is in the cluster.
5984

5985
    """
5986
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5987
    assert self.instance is not None, \
5988
      "Cannot retrieve locked instance %s" % self.op.instance_name
5989

    
5990
    _CheckNodeOnline(self, instance.primary_node)
5991

    
5992
    # check bridges existence
5993
    _CheckInstanceBridgesExist(self, instance)
5994

    
5995
  def Exec(self, feedback_fn):
5996
    """Reboot the instance.
5997

5998
    """
5999
    instance = self.instance
6000
    ignore_secondaries = self.op.ignore_secondaries
6001
    reboot_type = self.op.reboot_type
6002

    
6003
    remote_info = self.rpc.call_instance_info(instance.primary_node,
6004
                                              instance.name,
6005
                                              instance.hypervisor)
6006
    remote_info.Raise("Error checking node %s" % instance.primary_node)
6007
    instance_running = bool(remote_info.payload)
6008

    
6009
    node_current = instance.primary_node
6010

    
6011
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
6012
                                            constants.INSTANCE_REBOOT_HARD]:
6013
      for disk in instance.disks:
6014
        self.cfg.SetDiskID(disk, node_current)
6015
      result = self.rpc.call_instance_reboot(node_current, instance,
6016
                                             reboot_type,
6017
                                             self.op.shutdown_timeout)
6018
      result.Raise("Could not reboot instance")
6019
    else:
6020
      if instance_running:
6021
        result = self.rpc.call_instance_shutdown(node_current, instance,
6022
                                                 self.op.shutdown_timeout)
6023
        result.Raise("Could not shutdown instance for full reboot")
6024
        _ShutdownInstanceDisks(self, instance)
6025
      else:
6026
        self.LogInfo("Instance %s was already stopped, starting now",
6027
                     instance.name)
6028
      _StartInstanceDisks(self, instance, ignore_secondaries)
6029
      result = self.rpc.call_instance_start(node_current, instance,
6030
                                            None, None, False)
6031
      msg = result.fail_msg
6032
      if msg:
6033
        _ShutdownInstanceDisks(self, instance)
6034
        raise errors.OpExecError("Could not start instance for"
6035
                                 " full reboot: %s" % msg)
6036

    
6037
    self.cfg.MarkInstanceUp(instance.name)
6038

    
6039

    
6040
class LUInstanceShutdown(LogicalUnit):
6041
  """Shutdown an instance.
6042

6043
  """
6044
  HPATH = "instance-stop"
6045
  HTYPE = constants.HTYPE_INSTANCE
6046
  REQ_BGL = False
6047

    
6048
  def ExpandNames(self):
6049
    self._ExpandAndLockInstance()
6050

    
6051
  def BuildHooksEnv(self):
6052
    """Build hooks env.
6053

6054
    This runs on master, primary and secondary nodes of the instance.
6055

6056
    """
6057
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6058
    env["TIMEOUT"] = self.op.timeout
6059
    return env
6060

    
6061
  def BuildHooksNodes(self):
6062
    """Build hooks nodes.
6063

6064
    """
6065
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6066
    return (nl, nl)
6067

    
6068
  def CheckPrereq(self):
6069
    """Check prerequisites.
6070

6071
    This checks that the instance is in the cluster.
6072

6073
    """
6074
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6075
    assert self.instance is not None, \
6076
      "Cannot retrieve locked instance %s" % self.op.instance_name
6077

    
6078
    self.primary_offline = \
6079
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
6080

    
6081
    if self.primary_offline and self.op.ignore_offline_nodes:
6082
      self.proc.LogWarning("Ignoring offline primary node")
6083
    else:
6084
      _CheckNodeOnline(self, self.instance.primary_node)
6085

    
6086
  def Exec(self, feedback_fn):
6087
    """Shutdown the instance.
6088

6089
    """
6090
    instance = self.instance
6091
    node_current = instance.primary_node
6092
    timeout = self.op.timeout
6093

    
6094
    if not self.op.no_remember:
6095
      self.cfg.MarkInstanceDown(instance.name)
6096

    
6097
    if self.primary_offline:
6098
      assert self.op.ignore_offline_nodes
6099
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
6100
    else:
6101
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6102
      msg = result.fail_msg
6103
      if msg:
6104
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6105

    
6106
      _ShutdownInstanceDisks(self, instance)
6107

    
6108

    
6109
class LUInstanceReinstall(LogicalUnit):
6110
  """Reinstall an instance.
6111

6112
  """
6113
  HPATH = "instance-reinstall"
6114
  HTYPE = constants.HTYPE_INSTANCE
6115
  REQ_BGL = False
6116

    
6117
  def ExpandNames(self):
6118
    self._ExpandAndLockInstance()
6119

    
6120
  def BuildHooksEnv(self):
6121
    """Build hooks env.
6122

6123
    This runs on master, primary and secondary nodes of the instance.
6124

6125
    """
6126
    return _BuildInstanceHookEnvByObject(self, self.instance)
6127

    
6128
  def BuildHooksNodes(self):
6129
    """Build hooks nodes.
6130

6131
    """
6132
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6133
    return (nl, nl)
6134

    
6135
  def CheckPrereq(self):
6136
    """Check prerequisites.
6137

6138
    This checks that the instance is in the cluster and is not running.
6139

6140
    """
6141
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6142
    assert instance is not None, \
6143
      "Cannot retrieve locked instance %s" % self.op.instance_name
6144
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6145
                     " offline, cannot reinstall")
6146
    for node in instance.secondary_nodes:
6147
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
6148
                       " cannot reinstall")
6149

    
6150
    if instance.disk_template == constants.DT_DISKLESS:
6151
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6152
                                 self.op.instance_name,
6153
                                 errors.ECODE_INVAL)
6154
    _CheckInstanceDown(self, instance, "cannot reinstall")
6155

    
6156
    if self.op.os_type is not None:
6157
      # OS verification
6158
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6159
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6160
      instance_os = self.op.os_type
6161
    else:
6162
      instance_os = instance.os
6163

    
6164
    nodelist = list(instance.all_nodes)
6165

    
6166
    if self.op.osparams:
6167
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6168
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6169
      self.os_inst = i_osdict # the new dict (without defaults)
6170
    else:
6171
      self.os_inst = None
6172

    
6173
    self.instance = instance
6174

    
6175
  def Exec(self, feedback_fn):
6176
    """Reinstall the instance.
6177

6178
    """
6179
    inst = self.instance
6180

    
6181
    if self.op.os_type is not None:
6182
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6183
      inst.os = self.op.os_type
6184
      # Write to configuration
6185
      self.cfg.Update(inst, feedback_fn)
6186

    
6187
    _StartInstanceDisks(self, inst, None)
6188
    try:
6189
      feedback_fn("Running the instance OS create scripts...")
6190
      # FIXME: pass debug option from opcode to backend
6191
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6192
                                             self.op.debug_level,
6193
                                             osparams=self.os_inst)
6194
      result.Raise("Could not install OS for instance %s on node %s" %
6195
                   (inst.name, inst.primary_node))
6196
    finally:
6197
      _ShutdownInstanceDisks(self, inst)
6198

    
6199

    
6200
class LUInstanceRecreateDisks(LogicalUnit):
6201
  """Recreate an instance's missing disks.
6202

6203
  """
6204
  HPATH = "instance-recreate-disks"
6205
  HTYPE = constants.HTYPE_INSTANCE
6206
  REQ_BGL = False
6207

    
6208
  def CheckArguments(self):
6209
    # normalise the disk list
6210
    self.op.disks = sorted(frozenset(self.op.disks))
6211

    
6212
  def ExpandNames(self):
6213
    self._ExpandAndLockInstance()
6214
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6215
    if self.op.nodes:
6216
      self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6217
      self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6218
    else:
6219
      self.needed_locks[locking.LEVEL_NODE] = []
6220

    
6221
  def DeclareLocks(self, level):
6222
    if level == locking.LEVEL_NODE:
6223
      # if we replace the nodes, we only need to lock the old primary,
6224
      # otherwise we need to lock all nodes for disk re-creation
6225
      primary_only = bool(self.op.nodes)
6226
      self._LockInstancesNodes(primary_only=primary_only)
6227

    
6228
  def BuildHooksEnv(self):
6229
    """Build hooks env.
6230

6231
    This runs on master, primary and secondary nodes of the instance.
6232

6233
    """
6234
    return _BuildInstanceHookEnvByObject(self, self.instance)
6235

    
6236
  def BuildHooksNodes(self):
6237
    """Build hooks nodes.
6238

6239
    """
6240
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6241
    return (nl, nl)
6242

    
6243
  def CheckPrereq(self):
6244
    """Check prerequisites.
6245

6246
    This checks that the instance is in the cluster and is not running.
6247

6248
    """
6249
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6250
    assert instance is not None, \
6251
      "Cannot retrieve locked instance %s" % self.op.instance_name
6252
    if self.op.nodes:
6253
      if len(self.op.nodes) != len(instance.all_nodes):
6254
        raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6255
                                   " %d replacement nodes were specified" %
6256
                                   (instance.name, len(instance.all_nodes),
6257
                                    len(self.op.nodes)),
6258
                                   errors.ECODE_INVAL)
6259
      assert instance.disk_template != constants.DT_DRBD8 or \
6260
          len(self.op.nodes) == 2
6261
      assert instance.disk_template != constants.DT_PLAIN or \
6262
          len(self.op.nodes) == 1
6263
      primary_node = self.op.nodes[0]
6264
    else:
6265
      primary_node = instance.primary_node
6266
    _CheckNodeOnline(self, primary_node)
6267

    
6268
    if instance.disk_template == constants.DT_DISKLESS:
6269
      raise errors.OpPrereqError("Instance '%s' has no disks" %
6270
                                 self.op.instance_name, errors.ECODE_INVAL)
6271
    # if we replace nodes *and* the old primary is offline, we don't
6272
    # check
6273
    assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6274
    old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6275
    if not (self.op.nodes and old_pnode.offline):
6276
      _CheckInstanceDown(self, instance, "cannot recreate disks")
6277

    
6278
    if not self.op.disks:
6279
      self.op.disks = range(len(instance.disks))
6280
    else:
6281
      for idx in self.op.disks:
6282
        if idx >= len(instance.disks):
6283
          raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6284
                                     errors.ECODE_INVAL)
6285
    if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6286
      raise errors.OpPrereqError("Can't recreate disks partially and"
6287
                                 " change the nodes at the same time",
6288
                                 errors.ECODE_INVAL)
6289
    self.instance = instance
6290

    
6291
  def Exec(self, feedback_fn):
6292
    """Recreate the disks.
6293

6294
    """
6295
    instance = self.instance
6296

    
6297
    to_skip = []
6298
    mods = [] # keeps track of needed logical_id changes
6299

    
6300
    for idx, disk in enumerate(instance.disks):
6301
      if idx not in self.op.disks: # disk idx has not been passed in
6302
        to_skip.append(idx)
6303
        continue
6304
      # update secondaries for disks, if needed
6305
      if self.op.nodes:
6306
        if disk.dev_type == constants.LD_DRBD8:
6307
          # need to update the nodes and minors
6308
          assert len(self.op.nodes) == 2
6309
          assert len(disk.logical_id) == 6 # otherwise disk internals
6310
                                           # have changed
6311
          (_, _, old_port, _, _, old_secret) = disk.logical_id
6312
          new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6313
          new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6314
                    new_minors[0], new_minors[1], old_secret)
6315
          assert len(disk.logical_id) == len(new_id)
6316
          mods.append((idx, new_id))
6317

    
6318
    # now that we have passed all asserts above, we can apply the mods
6319
    # in a single run (to avoid partial changes)
6320
    for idx, new_id in mods:
6321
      instance.disks[idx].logical_id = new_id
6322

    
6323
    # change primary node, if needed
6324
    if self.op.nodes:
6325
      instance.primary_node = self.op.nodes[0]
6326
      self.LogWarning("Changing the instance's nodes, you will have to"
6327
                      " remove any disks left on the older nodes manually")
6328

    
6329
    if self.op.nodes:
6330
      self.cfg.Update(instance, feedback_fn)
6331

    
6332
    _CreateDisks(self, instance, to_skip=to_skip)
6333

    
6334

    
6335
class LUInstanceRename(LogicalUnit):
6336
  """Rename an instance.
6337

6338
  """
6339
  HPATH = "instance-rename"
6340
  HTYPE = constants.HTYPE_INSTANCE
6341

    
6342
  def CheckArguments(self):
6343
    """Check arguments.
6344

6345
    """
6346
    if self.op.ip_check and not self.op.name_check:
6347
      # TODO: make the ip check more flexible and not depend on the name check
6348
      raise errors.OpPrereqError("IP address check requires a name check",
6349
                                 errors.ECODE_INVAL)
6350

    
6351
  def BuildHooksEnv(self):
6352
    """Build hooks env.
6353

6354
    This runs on master, primary and secondary nodes of the instance.
6355

6356
    """
6357
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6358
    env["INSTANCE_NEW_NAME"] = self.op.new_name
6359
    return env
6360

    
6361
  def BuildHooksNodes(self):
6362
    """Build hooks nodes.
6363

6364
    """
6365
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6366
    return (nl, nl)
6367

    
6368
  def CheckPrereq(self):
6369
    """Check prerequisites.
6370

6371
    This checks that the instance is in the cluster and is not running.
6372

6373
    """
6374
    self.op.instance_name = _ExpandInstanceName(self.cfg,
6375
                                                self.op.instance_name)
6376
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6377
    assert instance is not None
6378
    _CheckNodeOnline(self, instance.primary_node)
6379
    _CheckInstanceDown(self, instance, "cannot rename")
6380
    self.instance = instance
6381

    
6382
    new_name = self.op.new_name
6383
    if self.op.name_check:
6384
      hostname = netutils.GetHostname(name=new_name)
6385
      if hostname != new_name:
6386
        self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6387
                     hostname.name)
6388
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6389
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6390
                                    " same as given hostname '%s'") %
6391
                                    (hostname.name, self.op.new_name),
6392
                                    errors.ECODE_INVAL)
6393
      new_name = self.op.new_name = hostname.name
6394
      if (self.op.ip_check and
6395
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6396
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6397
                                   (hostname.ip, new_name),
6398
                                   errors.ECODE_NOTUNIQUE)
6399

    
6400
    instance_list = self.cfg.GetInstanceList()
6401
    if new_name in instance_list and new_name != instance.name:
6402
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6403
                                 new_name, errors.ECODE_EXISTS)
6404

    
6405
  def Exec(self, feedback_fn):
6406
    """Rename the instance.
6407

6408
    """
6409
    inst = self.instance
6410
    old_name = inst.name
6411

    
6412
    rename_file_storage = False
6413
    if (inst.disk_template in constants.DTS_FILEBASED and
6414
        self.op.new_name != inst.name):
6415
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6416
      rename_file_storage = True
6417

    
6418
    self.cfg.RenameInstance(inst.name, self.op.new_name)
6419
    # Change the instance lock. This is definitely safe while we hold the BGL.
6420
    # Otherwise the new lock would have to be added in acquired mode.
6421
    assert self.REQ_BGL
6422
    self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6423
    self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6424

    
6425
    # re-read the instance from the configuration after rename
6426
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
6427

    
6428
    if rename_file_storage:
6429
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6430
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6431
                                                     old_file_storage_dir,
6432
                                                     new_file_storage_dir)
6433
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
6434
                   " (but the instance has been renamed in Ganeti)" %
6435
                   (inst.primary_node, old_file_storage_dir,
6436
                    new_file_storage_dir))
6437

    
6438
    _StartInstanceDisks(self, inst, None)
6439
    try:
6440
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6441
                                                 old_name, self.op.debug_level)
6442
      msg = result.fail_msg
6443
      if msg:
6444
        msg = ("Could not run OS rename script for instance %s on node %s"
6445
               " (but the instance has been renamed in Ganeti): %s" %
6446
               (inst.name, inst.primary_node, msg))
6447
        self.proc.LogWarning(msg)
6448
    finally:
6449
      _ShutdownInstanceDisks(self, inst)
6450

    
6451
    return inst.name
6452

    
6453

    
6454
class LUInstanceRemove(LogicalUnit):
6455
  """Remove an instance.
6456

6457
  """
6458
  HPATH = "instance-remove"
6459
  HTYPE = constants.HTYPE_INSTANCE
6460
  REQ_BGL = False
6461

    
6462
  def ExpandNames(self):
6463
    self._ExpandAndLockInstance()
6464
    self.needed_locks[locking.LEVEL_NODE] = []
6465
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6466

    
6467
  def DeclareLocks(self, level):
6468
    if level == locking.LEVEL_NODE:
6469
      self._LockInstancesNodes()
6470

    
6471
  def BuildHooksEnv(self):
6472
    """Build hooks env.
6473

6474
    This runs on master, primary and secondary nodes of the instance.
6475

6476
    """
6477
    env = _BuildInstanceHookEnvByObject(self, self.instance)
6478
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6479
    return env
6480

    
6481
  def BuildHooksNodes(self):
6482
    """Build hooks nodes.
6483

6484
    """
6485
    nl = [self.cfg.GetMasterNode()]
6486
    nl_post = list(self.instance.all_nodes) + nl
6487
    return (nl, nl_post)
6488

    
6489
  def CheckPrereq(self):
6490
    """Check prerequisites.
6491

6492
    This checks that the instance is in the cluster.
6493

6494
    """
6495
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6496
    assert self.instance is not None, \
6497
      "Cannot retrieve locked instance %s" % self.op.instance_name
6498

    
6499
  def Exec(self, feedback_fn):
6500
    """Remove the instance.
6501

6502
    """
6503
    instance = self.instance
6504
    logging.info("Shutting down instance %s on node %s",
6505
                 instance.name, instance.primary_node)
6506

    
6507
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6508
                                             self.op.shutdown_timeout)
6509
    msg = result.fail_msg
6510
    if msg:
6511
      if self.op.ignore_failures:
6512
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
6513
      else:
6514
        raise errors.OpExecError("Could not shutdown instance %s on"
6515
                                 " node %s: %s" %
6516
                                 (instance.name, instance.primary_node, msg))
6517

    
6518
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6519

    
6520

    
6521
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6522
  """Utility function to remove an instance.
6523

6524
  """
6525
  logging.info("Removing block devices for instance %s", instance.name)
6526

    
6527
  if not _RemoveDisks(lu, instance):
6528
    if not ignore_failures:
6529
      raise errors.OpExecError("Can't remove instance's disks")
6530
    feedback_fn("Warning: can't remove instance's disks")
6531

    
6532
  logging.info("Removing instance %s out of cluster config", instance.name)
6533

    
6534
  lu.cfg.RemoveInstance(instance.name)
6535

    
6536
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6537
    "Instance lock removal conflict"
6538

    
6539
  # Remove lock for the instance
6540
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6541

    
6542

    
6543
class LUInstanceQuery(NoHooksLU):
6544
  """Logical unit for querying instances.
6545

6546
  """
6547
  # pylint: disable-msg=W0142
6548
  REQ_BGL = False
6549

    
6550
  def CheckArguments(self):
6551
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6552
                             self.op.output_fields, self.op.use_locking)
6553

    
6554
  def ExpandNames(self):
6555
    self.iq.ExpandNames(self)
6556

    
6557
  def DeclareLocks(self, level):
6558
    self.iq.DeclareLocks(self, level)
6559

    
6560
  def Exec(self, feedback_fn):
6561
    return self.iq.OldStyleQuery(self)
6562

    
6563

    
6564
class LUInstanceFailover(LogicalUnit):
6565
  """Failover an instance.
6566

6567
  """
6568
  HPATH = "instance-failover"
6569
  HTYPE = constants.HTYPE_INSTANCE
6570
  REQ_BGL = False
6571

    
6572
  def CheckArguments(self):
6573
    """Check the arguments.
6574

6575
    """
6576
    self.iallocator = getattr(self.op, "iallocator", None)
6577
    self.target_node = getattr(self.op, "target_node", None)
6578

    
6579
  def ExpandNames(self):
6580
    self._ExpandAndLockInstance()
6581

    
6582
    if self.op.target_node is not None:
6583
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6584

    
6585
    self.needed_locks[locking.LEVEL_NODE] = []
6586
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6587

    
6588
    ignore_consistency = self.op.ignore_consistency
6589
    shutdown_timeout = self.op.shutdown_timeout
6590
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6591
                                       cleanup=False,
6592
                                       failover=True,
6593
                                       ignore_consistency=ignore_consistency,
6594
                                       shutdown_timeout=shutdown_timeout)
6595
    self.tasklets = [self._migrater]
6596

    
6597
  def DeclareLocks(self, level):
6598
    if level == locking.LEVEL_NODE:
6599
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6600
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6601
        if self.op.target_node is None:
6602
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6603
        else:
6604
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6605
                                                   self.op.target_node]
6606
        del self.recalculate_locks[locking.LEVEL_NODE]
6607
      else:
6608
        self._LockInstancesNodes()
6609

    
6610
  def BuildHooksEnv(self):
6611
    """Build hooks env.
6612

6613
    This runs on master, primary and secondary nodes of the instance.
6614

6615
    """
6616
    instance = self._migrater.instance
6617
    source_node = instance.primary_node
6618
    target_node = self.op.target_node
6619
    env = {
6620
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6621
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6622
      "OLD_PRIMARY": source_node,
6623
      "NEW_PRIMARY": target_node,
6624
      }
6625

    
6626
    if instance.disk_template in constants.DTS_INT_MIRROR:
6627
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6628
      env["NEW_SECONDARY"] = source_node
6629
    else:
6630
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6631

    
6632
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6633

    
6634
    return env
6635

    
6636
  def BuildHooksNodes(self):
6637
    """Build hooks nodes.
6638

6639
    """
6640
    instance = self._migrater.instance
6641
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6642
    return (nl, nl + [instance.primary_node])
6643

    
6644

    
6645
class LUInstanceMigrate(LogicalUnit):
6646
  """Migrate an instance.
6647

6648
  This is migration without shutting down, compared to the failover,
6649
  which is done with shutdown.
6650

6651
  """
6652
  HPATH = "instance-migrate"
6653
  HTYPE = constants.HTYPE_INSTANCE
6654
  REQ_BGL = False
6655

    
6656
  def ExpandNames(self):
6657
    self._ExpandAndLockInstance()
6658

    
6659
    if self.op.target_node is not None:
6660
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6661

    
6662
    self.needed_locks[locking.LEVEL_NODE] = []
6663
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6664

    
6665
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
6666
                                       cleanup=self.op.cleanup,
6667
                                       failover=False,
6668
                                       fallback=self.op.allow_failover)
6669
    self.tasklets = [self._migrater]
6670

    
6671
  def DeclareLocks(self, level):
6672
    if level == locking.LEVEL_NODE:
6673
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6674
      if instance.disk_template in constants.DTS_EXT_MIRROR:
6675
        if self.op.target_node is None:
6676
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6677
        else:
6678
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6679
                                                   self.op.target_node]
6680
        del self.recalculate_locks[locking.LEVEL_NODE]
6681
      else:
6682
        self._LockInstancesNodes()
6683

    
6684
  def BuildHooksEnv(self):
6685
    """Build hooks env.
6686

6687
    This runs on master, primary and secondary nodes of the instance.
6688

6689
    """
6690
    instance = self._migrater.instance
6691
    source_node = instance.primary_node
6692
    target_node = self.op.target_node
6693
    env = _BuildInstanceHookEnvByObject(self, instance)
6694
    env.update({
6695
      "MIGRATE_LIVE": self._migrater.live,
6696
      "MIGRATE_CLEANUP": self.op.cleanup,
6697
      "OLD_PRIMARY": source_node,
6698
      "NEW_PRIMARY": target_node,
6699
      })
6700

    
6701
    if instance.disk_template in constants.DTS_INT_MIRROR:
6702
      env["OLD_SECONDARY"] = target_node
6703
      env["NEW_SECONDARY"] = source_node
6704
    else:
6705
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6706

    
6707
    return env
6708

    
6709
  def BuildHooksNodes(self):
6710
    """Build hooks nodes.
6711

6712
    """
6713
    instance = self._migrater.instance
6714
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6715
    return (nl, nl + [instance.primary_node])
6716

    
6717

    
6718
class LUInstanceMove(LogicalUnit):
6719
  """Move an instance by data-copying.
6720

6721
  """
6722
  HPATH = "instance-move"
6723
  HTYPE = constants.HTYPE_INSTANCE
6724
  REQ_BGL = False
6725

    
6726
  def ExpandNames(self):
6727
    self._ExpandAndLockInstance()
6728
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6729
    self.op.target_node = target_node
6730
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6731
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6732

    
6733
  def DeclareLocks(self, level):
6734
    if level == locking.LEVEL_NODE:
6735
      self._LockInstancesNodes(primary_only=True)
6736

    
6737
  def BuildHooksEnv(self):
6738
    """Build hooks env.
6739

6740
    This runs on master, primary and secondary nodes of the instance.
6741

6742
    """
6743
    env = {
6744
      "TARGET_NODE": self.op.target_node,
6745
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6746
      }
6747
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6748
    return env
6749

    
6750
  def BuildHooksNodes(self):
6751
    """Build hooks nodes.
6752

6753
    """
6754
    nl = [
6755
      self.cfg.GetMasterNode(),
6756
      self.instance.primary_node,
6757
      self.op.target_node,
6758
      ]
6759
    return (nl, nl)
6760

    
6761
  def CheckPrereq(self):
6762
    """Check prerequisites.
6763

6764
    This checks that the instance is in the cluster.
6765

6766
    """
6767
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6768
    assert self.instance is not None, \
6769
      "Cannot retrieve locked instance %s" % self.op.instance_name
6770

    
6771
    node = self.cfg.GetNodeInfo(self.op.target_node)
6772
    assert node is not None, \
6773
      "Cannot retrieve locked node %s" % self.op.target_node
6774

    
6775
    self.target_node = target_node = node.name
6776

    
6777
    if target_node == instance.primary_node:
6778
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6779
                                 (instance.name, target_node),
6780
                                 errors.ECODE_STATE)
6781

    
6782
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6783

    
6784
    for idx, dsk in enumerate(instance.disks):
6785
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6786
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6787
                                   " cannot copy" % idx, errors.ECODE_STATE)
6788

    
6789
    _CheckNodeOnline(self, target_node)
6790
    _CheckNodeNotDrained(self, target_node)
6791
    _CheckNodeVmCapable(self, target_node)
6792

    
6793
    if instance.admin_up:
6794
      # check memory requirements on the secondary node
6795
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6796
                           instance.name, bep[constants.BE_MEMORY],
6797
                           instance.hypervisor)
6798
    else:
6799
      self.LogInfo("Not checking memory on the secondary node as"
6800
                   " instance will not be started")
6801

    
6802
    # check bridge existance
6803
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6804

    
6805
  def Exec(self, feedback_fn):
6806
    """Move an instance.
6807

6808
    The move is done by shutting it down on its present node, copying
6809
    the data over (slow) and starting it on the new node.
6810

6811
    """
6812
    instance = self.instance
6813

    
6814
    source_node = instance.primary_node
6815
    target_node = self.target_node
6816

    
6817
    self.LogInfo("Shutting down instance %s on source node %s",
6818
                 instance.name, source_node)
6819

    
6820
    result = self.rpc.call_instance_shutdown(source_node, instance,
6821
                                             self.op.shutdown_timeout)
6822
    msg = result.fail_msg
6823
    if msg:
6824
      if self.op.ignore_consistency:
6825
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6826
                             " Proceeding anyway. Please make sure node"
6827
                             " %s is down. Error details: %s",
6828
                             instance.name, source_node, source_node, msg)
6829
      else:
6830
        raise errors.OpExecError("Could not shutdown instance %s on"
6831
                                 " node %s: %s" %
6832
                                 (instance.name, source_node, msg))
6833

    
6834
    # create the target disks
6835
    try:
6836
      _CreateDisks(self, instance, target_node=target_node)
6837
    except errors.OpExecError:
6838
      self.LogWarning("Device creation failed, reverting...")
6839
      try:
6840
        _RemoveDisks(self, instance, target_node=target_node)
6841
      finally:
6842
        self.cfg.ReleaseDRBDMinors(instance.name)
6843
        raise
6844

    
6845
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6846

    
6847
    errs = []
6848
    # activate, get path, copy the data over
6849
    for idx, disk in enumerate(instance.disks):
6850
      self.LogInfo("Copying data for disk %d", idx)
6851
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6852
                                               instance.name, True, idx)
6853
      if result.fail_msg:
6854
        self.LogWarning("Can't assemble newly created disk %d: %s",
6855
                        idx, result.fail_msg)
6856
        errs.append(result.fail_msg)
6857
        break
6858
      dev_path = result.payload
6859
      result = self.rpc.call_blockdev_export(source_node, disk,
6860
                                             target_node, dev_path,
6861
                                             cluster_name)
6862
      if result.fail_msg:
6863
        self.LogWarning("Can't copy data over for disk %d: %s",
6864
                        idx, result.fail_msg)
6865
        errs.append(result.fail_msg)
6866
        break
6867

    
6868
    if errs:
6869
      self.LogWarning("Some disks failed to copy, aborting")
6870
      try:
6871
        _RemoveDisks(self, instance, target_node=target_node)
6872
      finally:
6873
        self.cfg.ReleaseDRBDMinors(instance.name)
6874
        raise errors.OpExecError("Errors during disk copy: %s" %
6875
                                 (",".join(errs),))
6876

    
6877
    instance.primary_node = target_node
6878
    self.cfg.Update(instance, feedback_fn)
6879

    
6880
    self.LogInfo("Removing the disks on the original node")
6881
    _RemoveDisks(self, instance, target_node=source_node)
6882

    
6883
    # Only start the instance if it's marked as up
6884
    if instance.admin_up:
6885
      self.LogInfo("Starting instance %s on node %s",
6886
                   instance.name, target_node)
6887

    
6888
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6889
                                           ignore_secondaries=True)
6890
      if not disks_ok:
6891
        _ShutdownInstanceDisks(self, instance)
6892
        raise errors.OpExecError("Can't activate the instance's disks")
6893

    
6894
      result = self.rpc.call_instance_start(target_node, instance,
6895
                                            None, None, False)
6896
      msg = result.fail_msg
6897
      if msg:
6898
        _ShutdownInstanceDisks(self, instance)
6899
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6900
                                 (instance.name, target_node, msg))
6901

    
6902

    
6903
class LUNodeMigrate(LogicalUnit):
6904
  """Migrate all instances from a node.
6905

6906
  """
6907
  HPATH = "node-migrate"
6908
  HTYPE = constants.HTYPE_NODE
6909
  REQ_BGL = False
6910

    
6911
  def CheckArguments(self):
6912
    pass
6913

    
6914
  def ExpandNames(self):
6915
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6916

    
6917
    self.share_locks = _ShareAll()
6918
    self.needed_locks = {
6919
      locking.LEVEL_NODE: [self.op.node_name],
6920
      }
6921

    
6922
  def BuildHooksEnv(self):
6923
    """Build hooks env.
6924

6925
    This runs on the master, the primary and all the secondaries.
6926

6927
    """
6928
    return {
6929
      "NODE_NAME": self.op.node_name,
6930
      }
6931

    
6932
  def BuildHooksNodes(self):
6933
    """Build hooks nodes.
6934

6935
    """
6936
    nl = [self.cfg.GetMasterNode()]
6937
    return (nl, nl)
6938

    
6939
  def CheckPrereq(self):
6940
    pass
6941

    
6942
  def Exec(self, feedback_fn):
6943
    # Prepare jobs for migration instances
6944
    jobs = [
6945
      [opcodes.OpInstanceMigrate(instance_name=inst.name,
6946
                                 mode=self.op.mode,
6947
                                 live=self.op.live,
6948
                                 iallocator=self.op.iallocator,
6949
                                 target_node=self.op.target_node)]
6950
      for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6951
      ]
6952

    
6953
    # TODO: Run iallocator in this opcode and pass correct placement options to
6954
    # OpInstanceMigrate. Since other jobs can modify the cluster between
6955
    # running the iallocator and the actual migration, a good consistency model
6956
    # will have to be found.
6957

    
6958
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
6959
            frozenset([self.op.node_name]))
6960

    
6961
    return ResultWithJobs(jobs)
6962

    
6963

    
6964
class TLMigrateInstance(Tasklet):
6965
  """Tasklet class for instance migration.
6966

6967
  @type live: boolean
6968
  @ivar live: whether the migration will be done live or non-live;
6969
      this variable is initalized only after CheckPrereq has run
6970
  @type cleanup: boolean
6971
  @ivar cleanup: Wheater we cleanup from a failed migration
6972
  @type iallocator: string
6973
  @ivar iallocator: The iallocator used to determine target_node
6974
  @type target_node: string
6975
  @ivar target_node: If given, the target_node to reallocate the instance to
6976
  @type failover: boolean
6977
  @ivar failover: Whether operation results in failover or migration
6978
  @type fallback: boolean
6979
  @ivar fallback: Whether fallback to failover is allowed if migration not
6980
                  possible
6981
  @type ignore_consistency: boolean
6982
  @ivar ignore_consistency: Wheter we should ignore consistency between source
6983
                            and target node
6984
  @type shutdown_timeout: int
6985
  @ivar shutdown_timeout: In case of failover timeout of the shutdown
6986

6987
  """
6988
  def __init__(self, lu, instance_name, cleanup=False,
6989
               failover=False, fallback=False,
6990
               ignore_consistency=False,
6991
               shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6992
    """Initializes this class.
6993

6994
    """
6995
    Tasklet.__init__(self, lu)
6996

    
6997
    # Parameters
6998
    self.instance_name = instance_name
6999
    self.cleanup = cleanup
7000
    self.live = False # will be overridden later
7001
    self.failover = failover
7002
    self.fallback = fallback
7003
    self.ignore_consistency = ignore_consistency
7004
    self.shutdown_timeout = shutdown_timeout
7005

    
7006
  def CheckPrereq(self):
7007
    """Check prerequisites.
7008

7009
    This checks that the instance is in the cluster.
7010

7011
    """
7012
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
7013
    instance = self.cfg.GetInstanceInfo(instance_name)
7014
    assert instance is not None
7015
    self.instance = instance
7016

    
7017
    if (not self.cleanup and not instance.admin_up and not self.failover and
7018
        self.fallback):
7019
      self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
7020
                      " to failover")
7021
      self.failover = True
7022

    
7023
    if instance.disk_template not in constants.DTS_MIRRORED:
7024
      if self.failover:
7025
        text = "failovers"
7026
      else:
7027
        text = "migrations"
7028
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
7029
                                 " %s" % (instance.disk_template, text),
7030
                                 errors.ECODE_STATE)
7031

    
7032
    if instance.disk_template in constants.DTS_EXT_MIRROR:
7033
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
7034

    
7035
      if self.lu.op.iallocator:
7036
        self._RunAllocator()
7037
      else:
7038
        # We set set self.target_node as it is required by
7039
        # BuildHooksEnv
7040
        self.target_node = self.lu.op.target_node
7041

    
7042
      # self.target_node is already populated, either directly or by the
7043
      # iallocator run
7044
      target_node = self.target_node
7045
      if self.target_node == instance.primary_node:
7046
        raise errors.OpPrereqError("Cannot migrate instance %s"
7047
                                   " to its primary (%s)" %
7048
                                   (instance.name, instance.primary_node))
7049

    
7050
      if len(self.lu.tasklets) == 1:
7051
        # It is safe to release locks only when we're the only tasklet
7052
        # in the LU
7053
        _ReleaseLocks(self.lu, locking.LEVEL_NODE,
7054
                      keep=[instance.primary_node, self.target_node])
7055

    
7056
    else:
7057
      secondary_nodes = instance.secondary_nodes
7058
      if not secondary_nodes:
7059
        raise errors.ConfigurationError("No secondary node but using"
7060
                                        " %s disk template" %
7061
                                        instance.disk_template)
7062
      target_node = secondary_nodes[0]
7063
      if self.lu.op.iallocator or (self.lu.op.target_node and
7064
                                   self.lu.op.target_node != target_node):
7065
        if self.failover:
7066
          text = "failed over"
7067
        else:
7068
          text = "migrated"
7069
        raise errors.OpPrereqError("Instances with disk template %s cannot"
7070
                                   " be %s to arbitrary nodes"
7071
                                   " (neither an iallocator nor a target"
7072
                                   " node can be passed)" %
7073
                                   (instance.disk_template, text),
7074
                                   errors.ECODE_INVAL)
7075

    
7076
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
7077

    
7078
    # check memory requirements on the secondary node
7079
    if not self.failover or instance.admin_up:
7080
      _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7081
                           instance.name, i_be[constants.BE_MEMORY],
7082
                           instance.hypervisor)
7083
    else:
7084
      self.lu.LogInfo("Not checking memory on the secondary node as"
7085
                      " instance will not be started")
7086

    
7087
    # check bridge existance
7088
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7089

    
7090
    if not self.cleanup:
7091
      _CheckNodeNotDrained(self.lu, target_node)
7092
      if not self.failover:
7093
        result = self.rpc.call_instance_migratable(instance.primary_node,
7094
                                                   instance)
7095
        if result.fail_msg and self.fallback:
7096
          self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7097
                          " failover")
7098
          self.failover = True
7099
        else:
7100
          result.Raise("Can't migrate, please use failover",
7101
                       prereq=True, ecode=errors.ECODE_STATE)
7102

    
7103
    assert not (self.failover and self.cleanup)
7104

    
7105
    if not self.failover:
7106
      if self.lu.op.live is not None and self.lu.op.mode is not None:
7107
        raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7108
                                   " parameters are accepted",
7109
                                   errors.ECODE_INVAL)
7110
      if self.lu.op.live is not None:
7111
        if self.lu.op.live:
7112
          self.lu.op.mode = constants.HT_MIGRATION_LIVE
7113
        else:
7114
          self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7115
        # reset the 'live' parameter to None so that repeated
7116
        # invocations of CheckPrereq do not raise an exception
7117
        self.lu.op.live = None
7118
      elif self.lu.op.mode is None:
7119
        # read the default value from the hypervisor
7120
        i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7121
                                                skip_globals=False)
7122
        self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7123

    
7124
      self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7125
    else:
7126
      # Failover is never live
7127
      self.live = False
7128

    
7129
  def _RunAllocator(self):
7130
    """Run the allocator based on input opcode.
7131

7132
    """
7133
    ial = IAllocator(self.cfg, self.rpc,
7134
                     mode=constants.IALLOCATOR_MODE_RELOC,
7135
                     name=self.instance_name,
7136
                     # TODO See why hail breaks with a single node below
7137
                     relocate_from=[self.instance.primary_node,
7138
                                    self.instance.primary_node],
7139
                     )
7140

    
7141
    ial.Run(self.lu.op.iallocator)
7142

    
7143
    if not ial.success:
7144
      raise errors.OpPrereqError("Can't compute nodes using"
7145
                                 " iallocator '%s': %s" %
7146
                                 (self.lu.op.iallocator, ial.info),
7147
                                 errors.ECODE_NORES)
7148
    if len(ial.result) != ial.required_nodes:
7149
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7150
                                 " of nodes (%s), required %s" %
7151
                                 (self.lu.op.iallocator, len(ial.result),
7152
                                  ial.required_nodes), errors.ECODE_FAULT)
7153
    self.target_node = ial.result[0]
7154
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7155
                 self.instance_name, self.lu.op.iallocator,
7156
                 utils.CommaJoin(ial.result))
7157

    
7158
  def _WaitUntilSync(self):
7159
    """Poll with custom rpc for disk sync.
7160

7161
    This uses our own step-based rpc call.
7162

7163
    """
7164
    self.feedback_fn("* wait until resync is done")
7165
    all_done = False
7166
    while not all_done:
7167
      all_done = True
7168
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7169
                                            self.nodes_ip,
7170
                                            self.instance.disks)
7171
      min_percent = 100
7172
      for node, nres in result.items():
7173
        nres.Raise("Cannot resync disks on node %s" % node)
7174
        node_done, node_percent = nres.payload
7175
        all_done = all_done and node_done
7176
        if node_percent is not None:
7177
          min_percent = min(min_percent, node_percent)
7178
      if not all_done:
7179
        if min_percent < 100:
7180
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
7181
        time.sleep(2)
7182

    
7183
  def _EnsureSecondary(self, node):
7184
    """Demote a node to secondary.
7185

7186
    """
7187
    self.feedback_fn("* switching node %s to secondary mode" % node)
7188

    
7189
    for dev in self.instance.disks:
7190
      self.cfg.SetDiskID(dev, node)
7191

    
7192
    result = self.rpc.call_blockdev_close(node, self.instance.name,
7193
                                          self.instance.disks)
7194
    result.Raise("Cannot change disk to secondary on node %s" % node)
7195

    
7196
  def _GoStandalone(self):
7197
    """Disconnect from the network.
7198

7199
    """
7200
    self.feedback_fn("* changing into standalone mode")
7201
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7202
                                               self.instance.disks)
7203
    for node, nres in result.items():
7204
      nres.Raise("Cannot disconnect disks node %s" % node)
7205

    
7206
  def _GoReconnect(self, multimaster):
7207
    """Reconnect to the network.
7208

7209
    """
7210
    if multimaster:
7211
      msg = "dual-master"
7212
    else:
7213
      msg = "single-master"
7214
    self.feedback_fn("* changing disks into %s mode" % msg)
7215
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7216
                                           self.instance.disks,
7217
                                           self.instance.name, multimaster)
7218
    for node, nres in result.items():
7219
      nres.Raise("Cannot change disks config on node %s" % node)
7220

    
7221
  def _ExecCleanup(self):
7222
    """Try to cleanup after a failed migration.
7223

7224
    The cleanup is done by:
7225
      - check that the instance is running only on one node
7226
        (and update the config if needed)
7227
      - change disks on its secondary node to secondary
7228
      - wait until disks are fully synchronized
7229
      - disconnect from the network
7230
      - change disks into single-master mode
7231
      - wait again until disks are fully synchronized
7232

7233
    """
7234
    instance = self.instance
7235
    target_node = self.target_node
7236
    source_node = self.source_node
7237

    
7238
    # check running on only one node
7239
    self.feedback_fn("* checking where the instance actually runs"
7240
                     " (if this hangs, the hypervisor might be in"
7241
                     " a bad state)")
7242
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7243
    for node, result in ins_l.items():
7244
      result.Raise("Can't contact node %s" % node)
7245

    
7246
    runningon_source = instance.name in ins_l[source_node].payload
7247
    runningon_target = instance.name in ins_l[target_node].payload
7248

    
7249
    if runningon_source and runningon_target:
7250
      raise errors.OpExecError("Instance seems to be running on two nodes,"
7251
                               " or the hypervisor is confused; you will have"
7252
                               " to ensure manually that it runs only on one"
7253
                               " and restart this operation")
7254

    
7255
    if not (runningon_source or runningon_target):
7256
      raise errors.OpExecError("Instance does not seem to be running at all;"
7257
                               " in this case it's safer to repair by"
7258
                               " running 'gnt-instance stop' to ensure disk"
7259
                               " shutdown, and then restarting it")
7260

    
7261
    if runningon_target:
7262
      # the migration has actually succeeded, we need to update the config
7263
      self.feedback_fn("* instance running on secondary node (%s),"
7264
                       " updating config" % target_node)
7265
      instance.primary_node = target_node
7266
      self.cfg.Update(instance, self.feedback_fn)
7267
      demoted_node = source_node
7268
    else:
7269
      self.feedback_fn("* instance confirmed to be running on its"
7270
                       " primary node (%s)" % source_node)
7271
      demoted_node = target_node
7272

    
7273
    if instance.disk_template in constants.DTS_INT_MIRROR:
7274
      self._EnsureSecondary(demoted_node)
7275
      try:
7276
        self._WaitUntilSync()
7277
      except errors.OpExecError:
7278
        # we ignore here errors, since if the device is standalone, it
7279
        # won't be able to sync
7280
        pass
7281
      self._GoStandalone()
7282
      self._GoReconnect(False)
7283
      self._WaitUntilSync()
7284

    
7285
    self.feedback_fn("* done")
7286

    
7287
  def _RevertDiskStatus(self):
7288
    """Try to revert the disk status after a failed migration.
7289

7290
    """
7291
    target_node = self.target_node
7292
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7293
      return
7294

    
7295
    try:
7296
      self._EnsureSecondary(target_node)
7297
      self._GoStandalone()
7298
      self._GoReconnect(False)
7299
      self._WaitUntilSync()
7300
    except errors.OpExecError, err:
7301
      self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7302
                         " please try to recover the instance manually;"
7303
                         " error '%s'" % str(err))
7304

    
7305
  def _AbortMigration(self):
7306
    """Call the hypervisor code to abort a started migration.
7307

7308
    """
7309
    instance = self.instance
7310
    target_node = self.target_node
7311
    migration_info = self.migration_info
7312

    
7313
    abort_result = self.rpc.call_finalize_migration(target_node,
7314
                                                    instance,
7315
                                                    migration_info,
7316
                                                    False)
7317
    abort_msg = abort_result.fail_msg
7318
    if abort_msg:
7319
      logging.error("Aborting migration failed on target node %s: %s",
7320
                    target_node, abort_msg)
7321
      # Don't raise an exception here, as we stil have to try to revert the
7322
      # disk status, even if this step failed.
7323

    
7324
  def _ExecMigration(self):
7325
    """Migrate an instance.
7326

7327
    The migrate is done by:
7328
      - change the disks into dual-master mode
7329
      - wait until disks are fully synchronized again
7330
      - migrate the instance
7331
      - change disks on the new secondary node (the old primary) to secondary
7332
      - wait until disks are fully synchronized
7333
      - change disks into single-master mode
7334

7335
    """
7336
    instance = self.instance
7337
    target_node = self.target_node
7338
    source_node = self.source_node
7339

    
7340
    self.feedback_fn("* checking disk consistency between source and target")
7341
    for dev in instance.disks:
7342
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7343
        raise errors.OpExecError("Disk %s is degraded or not fully"
7344
                                 " synchronized on target node,"
7345
                                 " aborting migration" % dev.iv_name)
7346

    
7347
    # First get the migration information from the remote node
7348
    result = self.rpc.call_migration_info(source_node, instance)
7349
    msg = result.fail_msg
7350
    if msg:
7351
      log_err = ("Failed fetching source migration information from %s: %s" %
7352
                 (source_node, msg))
7353
      logging.error(log_err)
7354
      raise errors.OpExecError(log_err)
7355

    
7356
    self.migration_info = migration_info = result.payload
7357

    
7358
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7359
      # Then switch the disks to master/master mode
7360
      self._EnsureSecondary(target_node)
7361
      self._GoStandalone()
7362
      self._GoReconnect(True)
7363
      self._WaitUntilSync()
7364

    
7365
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
7366
    result = self.rpc.call_accept_instance(target_node,
7367
                                           instance,
7368
                                           migration_info,
7369
                                           self.nodes_ip[target_node])
7370

    
7371
    msg = result.fail_msg
7372
    if msg:
7373
      logging.error("Instance pre-migration failed, trying to revert"
7374
                    " disk status: %s", msg)
7375
      self.feedback_fn("Pre-migration failed, aborting")
7376
      self._AbortMigration()
7377
      self._RevertDiskStatus()
7378
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7379
                               (instance.name, msg))
7380

    
7381
    self.feedback_fn("* migrating instance to %s" % target_node)
7382
    result = self.rpc.call_instance_migrate(source_node, instance,
7383
                                            self.nodes_ip[target_node],
7384
                                            self.live)
7385
    msg = result.fail_msg
7386
    if msg:
7387
      logging.error("Instance migration failed, trying to revert"
7388
                    " disk status: %s", msg)
7389
      self.feedback_fn("Migration failed, aborting")
7390
      self._AbortMigration()
7391
      self._RevertDiskStatus()
7392
      raise errors.OpExecError("Could not migrate instance %s: %s" %
7393
                               (instance.name, msg))
7394

    
7395
    instance.primary_node = target_node
7396
    # distribute new instance config to the other nodes
7397
    self.cfg.Update(instance, self.feedback_fn)
7398

    
7399
    result = self.rpc.call_finalize_migration(target_node,
7400
                                              instance,
7401
                                              migration_info,
7402
                                              True)
7403
    msg = result.fail_msg
7404
    if msg:
7405
      logging.error("Instance migration succeeded, but finalization failed:"
7406
                    " %s", msg)
7407
      raise errors.OpExecError("Could not finalize instance migration: %s" %
7408
                               msg)
7409

    
7410
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7411
      self._EnsureSecondary(source_node)
7412
      self._WaitUntilSync()
7413
      self._GoStandalone()
7414
      self._GoReconnect(False)
7415
      self._WaitUntilSync()
7416

    
7417
    self.feedback_fn("* done")
7418

    
7419
  def _ExecFailover(self):
7420
    """Failover an instance.
7421

7422
    The failover is done by shutting it down on its present node and
7423
    starting it on the secondary.
7424

7425
    """
7426
    instance = self.instance
7427
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7428

    
7429
    source_node = instance.primary_node
7430
    target_node = self.target_node
7431

    
7432
    if instance.admin_up:
7433
      self.feedback_fn("* checking disk consistency between source and target")
7434
      for dev in instance.disks:
7435
        # for drbd, these are drbd over lvm
7436
        if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7437
          if primary_node.offline:
7438
            self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7439
                             " target node %s" %
7440
                             (primary_node.name, dev.iv_name, target_node))
7441
          elif not self.ignore_consistency:
7442
            raise errors.OpExecError("Disk %s is degraded on target node,"
7443
                                     " aborting failover" % dev.iv_name)
7444
    else:
7445
      self.feedback_fn("* not checking disk consistency as instance is not"
7446
                       " running")
7447

    
7448
    self.feedback_fn("* shutting down instance on source node")
7449
    logging.info("Shutting down instance %s on node %s",
7450
                 instance.name, source_node)
7451

    
7452
    result = self.rpc.call_instance_shutdown(source_node, instance,
7453
                                             self.shutdown_timeout)
7454
    msg = result.fail_msg
7455
    if msg:
7456
      if self.ignore_consistency or primary_node.offline:
7457
        self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7458
                           " proceeding anyway; please make sure node"
7459
                           " %s is down; error details: %s",
7460
                           instance.name, source_node, source_node, msg)
7461
      else:
7462
        raise errors.OpExecError("Could not shutdown instance %s on"
7463
                                 " node %s: %s" %
7464
                                 (instance.name, source_node, msg))
7465

    
7466
    self.feedback_fn("* deactivating the instance's disks on source node")
7467
    if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7468
      raise errors.OpExecError("Can't shut down the instance's disks")
7469

    
7470
    instance.primary_node = target_node
7471
    # distribute new instance config to the other nodes
7472
    self.cfg.Update(instance, self.feedback_fn)
7473

    
7474
    # Only start the instance if it's marked as up
7475
    if instance.admin_up:
7476
      self.feedback_fn("* activating the instance's disks on target node %s" %
7477
                       target_node)
7478
      logging.info("Starting instance %s on node %s",
7479
                   instance.name, target_node)
7480

    
7481
      disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7482
                                           ignore_secondaries=True)
7483
      if not disks_ok:
7484
        _ShutdownInstanceDisks(self.lu, instance)
7485
        raise errors.OpExecError("Can't activate the instance's disks")
7486

    
7487
      self.feedback_fn("* starting the instance on the target node %s" %
7488
                       target_node)
7489
      result = self.rpc.call_instance_start(target_node, instance, None, None,
7490
                                            False)
7491
      msg = result.fail_msg
7492
      if msg:
7493
        _ShutdownInstanceDisks(self.lu, instance)
7494
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7495
                                 (instance.name, target_node, msg))
7496

    
7497
  def Exec(self, feedback_fn):
7498
    """Perform the migration.
7499

7500
    """
7501
    self.feedback_fn = feedback_fn
7502
    self.source_node = self.instance.primary_node
7503

    
7504
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7505
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
7506
      self.target_node = self.instance.secondary_nodes[0]
7507
      # Otherwise self.target_node has been populated either
7508
      # directly, or through an iallocator.
7509

    
7510
    self.all_nodes = [self.source_node, self.target_node]
7511
    self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7512
                         in self.cfg.GetMultiNodeInfo(self.all_nodes))
7513

    
7514
    if self.failover:
7515
      feedback_fn("Failover instance %s" % self.instance.name)
7516
      self._ExecFailover()
7517
    else:
7518
      feedback_fn("Migrating instance %s" % self.instance.name)
7519

    
7520
      if self.cleanup:
7521
        return self._ExecCleanup()
7522
      else:
7523
        return self._ExecMigration()
7524

    
7525

    
7526
def _CreateBlockDev(lu, node, instance, device, force_create,
7527
                    info, force_open):
7528
  """Create a tree of block devices on a given node.
7529

7530
  If this device type has to be created on secondaries, create it and
7531
  all its children.
7532

7533
  If not, just recurse to children keeping the same 'force' value.
7534

7535
  @param lu: the lu on whose behalf we execute
7536
  @param node: the node on which to create the device
7537
  @type instance: L{objects.Instance}
7538
  @param instance: the instance which owns the device
7539
  @type device: L{objects.Disk}
7540
  @param device: the device to create
7541
  @type force_create: boolean
7542
  @param force_create: whether to force creation of this device; this
7543
      will be change to True whenever we find a device which has
7544
      CreateOnSecondary() attribute
7545
  @param info: the extra 'metadata' we should attach to the device
7546
      (this will be represented as a LVM tag)
7547
  @type force_open: boolean
7548
  @param force_open: this parameter will be passes to the
7549
      L{backend.BlockdevCreate} function where it specifies
7550
      whether we run on primary or not, and it affects both
7551
      the child assembly and the device own Open() execution
7552

7553
  """
7554
  if device.CreateOnSecondary():
7555
    force_create = True
7556

    
7557
  if device.children:
7558
    for child in device.children:
7559
      _CreateBlockDev(lu, node, instance, child, force_create,
7560
                      info, force_open)
7561

    
7562
  if not force_create:
7563
    return
7564

    
7565
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7566

    
7567

    
7568
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7569
  """Create a single block device on a given node.
7570

7571
  This will not recurse over children of the device, so they must be
7572
  created in advance.
7573

7574
  @param lu: the lu on whose behalf we execute
7575
  @param node: the node on which to create the device
7576
  @type instance: L{objects.Instance}
7577
  @param instance: the instance which owns the device
7578
  @type device: L{objects.Disk}
7579
  @param device: the device to create
7580
  @param info: the extra 'metadata' we should attach to the device
7581
      (this will be represented as a LVM tag)
7582
  @type force_open: boolean
7583
  @param force_open: this parameter will be passes to the
7584
      L{backend.BlockdevCreate} function where it specifies
7585
      whether we run on primary or not, and it affects both
7586
      the child assembly and the device own Open() execution
7587

7588
  """
7589
  lu.cfg.SetDiskID(device, node)
7590
  result = lu.rpc.call_blockdev_create(node, device, device.size,
7591
                                       instance.name, force_open, info)
7592
  result.Raise("Can't create block device %s on"
7593
               " node %s for instance %s" % (device, node, instance.name))
7594
  if device.physical_id is None:
7595
    device.physical_id = result.payload
7596

    
7597

    
7598
def _GenerateUniqueNames(lu, exts):
7599
  """Generate a suitable LV name.
7600

7601
  This will generate a logical volume name for the given instance.
7602

7603
  """
7604
  results = []
7605
  for val in exts:
7606
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7607
    results.append("%s%s" % (new_id, val))
7608
  return results
7609

    
7610

    
7611
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7612
                         iv_name, p_minor, s_minor):
7613
  """Generate a drbd8 device complete with its children.
7614

7615
  """
7616
  assert len(vgnames) == len(names) == 2
7617
  port = lu.cfg.AllocatePort()
7618
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7619
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7620
                          logical_id=(vgnames[0], names[0]))
7621
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7622
                          logical_id=(vgnames[1], names[1]))
7623
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7624
                          logical_id=(primary, secondary, port,
7625
                                      p_minor, s_minor,
7626
                                      shared_secret),
7627
                          children=[dev_data, dev_meta],
7628
                          iv_name=iv_name)
7629
  return drbd_dev
7630

    
7631

    
7632
def _GenerateDiskTemplate(lu, template_name,
7633
                          instance_name, primary_node,
7634
                          secondary_nodes, disk_info,
7635
                          file_storage_dir, file_driver,
7636
                          base_index, feedback_fn):
7637
  """Generate the entire disk layout for a given template type.
7638

7639
  """
7640
  #TODO: compute space requirements
7641

    
7642
  vgname = lu.cfg.GetVGName()
7643
  disk_count = len(disk_info)
7644
  disks = []
7645
  if template_name == constants.DT_DISKLESS:
7646
    pass
7647
  elif template_name == constants.DT_PLAIN:
7648
    if len(secondary_nodes) != 0:
7649
      raise errors.ProgrammerError("Wrong template configuration")
7650

    
7651
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7652
                                      for i in range(disk_count)])
7653
    for idx, disk in enumerate(disk_info):
7654
      disk_index = idx + base_index
7655
      vg = disk.get(constants.IDISK_VG, vgname)
7656
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7657
      disk_dev = objects.Disk(dev_type=constants.LD_LV,
7658
                              size=disk[constants.IDISK_SIZE],
7659
                              logical_id=(vg, names[idx]),
7660
                              iv_name="disk/%d" % disk_index,
7661
                              mode=disk[constants.IDISK_MODE])
7662
      disks.append(disk_dev)
7663
  elif template_name == constants.DT_DRBD8:
7664
    if len(secondary_nodes) != 1:
7665
      raise errors.ProgrammerError("Wrong template configuration")
7666
    remote_node = secondary_nodes[0]
7667
    minors = lu.cfg.AllocateDRBDMinor(
7668
      [primary_node, remote_node] * len(disk_info), instance_name)
7669

    
7670
    names = []
7671
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7672
                                               for i in range(disk_count)]):
7673
      names.append(lv_prefix + "_data")
7674
      names.append(lv_prefix + "_meta")
7675
    for idx, disk in enumerate(disk_info):
7676
      disk_index = idx + base_index
7677
      data_vg = disk.get(constants.IDISK_VG, vgname)
7678
      meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7679
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7680
                                      disk[constants.IDISK_SIZE],
7681
                                      [data_vg, meta_vg],
7682
                                      names[idx * 2:idx * 2 + 2],
7683
                                      "disk/%d" % disk_index,
7684
                                      minors[idx * 2], minors[idx * 2 + 1])
7685
      disk_dev.mode = disk[constants.IDISK_MODE]
7686
      disks.append(disk_dev)
7687
  elif template_name == constants.DT_FILE:
7688
    if len(secondary_nodes) != 0:
7689
      raise errors.ProgrammerError("Wrong template configuration")
7690

    
7691
    opcodes.RequireFileStorage()
7692

    
7693
    for idx, disk in enumerate(disk_info):
7694
      disk_index = idx + base_index
7695
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7696
                              size=disk[constants.IDISK_SIZE],
7697
                              iv_name="disk/%d" % disk_index,
7698
                              logical_id=(file_driver,
7699
                                          "%s/disk%d" % (file_storage_dir,
7700
                                                         disk_index)),
7701
                              mode=disk[constants.IDISK_MODE])
7702
      disks.append(disk_dev)
7703
  elif template_name == constants.DT_SHARED_FILE:
7704
    if len(secondary_nodes) != 0:
7705
      raise errors.ProgrammerError("Wrong template configuration")
7706

    
7707
    opcodes.RequireSharedFileStorage()
7708

    
7709
    for idx, disk in enumerate(disk_info):
7710
      disk_index = idx + base_index
7711
      disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7712
                              size=disk[constants.IDISK_SIZE],
7713
                              iv_name="disk/%d" % disk_index,
7714
                              logical_id=(file_driver,
7715
                                          "%s/disk%d" % (file_storage_dir,
7716
                                                         disk_index)),
7717
                              mode=disk[constants.IDISK_MODE])
7718
      disks.append(disk_dev)
7719
  elif template_name == constants.DT_BLOCK:
7720
    if len(secondary_nodes) != 0:
7721
      raise errors.ProgrammerError("Wrong template configuration")
7722

    
7723
    for idx, disk in enumerate(disk_info):
7724
      disk_index = idx + base_index
7725
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7726
                              size=disk[constants.IDISK_SIZE],
7727
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7728
                                          disk[constants.IDISK_ADOPT]),
7729
                              iv_name="disk/%d" % disk_index,
7730
                              mode=disk[constants.IDISK_MODE])
7731
      disks.append(disk_dev)
7732

    
7733
  else:
7734
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7735
  return disks
7736

    
7737

    
7738
def _GetInstanceInfoText(instance):
7739
  """Compute that text that should be added to the disk's metadata.
7740

7741
  """
7742
  return "originstname+%s" % instance.name
7743

    
7744

    
7745
def _CalcEta(time_taken, written, total_size):
7746
  """Calculates the ETA based on size written and total size.
7747

7748
  @param time_taken: The time taken so far
7749
  @param written: amount written so far
7750
  @param total_size: The total size of data to be written
7751
  @return: The remaining time in seconds
7752

7753
  """
7754
  avg_time = time_taken / float(written)
7755
  return (total_size - written) * avg_time
7756

    
7757

    
7758
def _WipeDisks(lu, instance):
7759
  """Wipes instance disks.
7760

7761
  @type lu: L{LogicalUnit}
7762
  @param lu: the logical unit on whose behalf we execute
7763
  @type instance: L{objects.Instance}
7764
  @param instance: the instance whose disks we should create
7765
  @return: the success of the wipe
7766

7767
  """
7768
  node = instance.primary_node
7769

    
7770
  for device in instance.disks:
7771
    lu.cfg.SetDiskID(device, node)
7772

    
7773
  logging.info("Pause sync of instance %s disks", instance.name)
7774
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7775

    
7776
  for idx, success in enumerate(result.payload):
7777
    if not success:
7778
      logging.warn("pause-sync of instance %s for disks %d failed",
7779
                   instance.name, idx)
7780

    
7781
  try:
7782
    for idx, device in enumerate(instance.disks):
7783
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7784
      # MAX_WIPE_CHUNK at max
7785
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7786
                            constants.MIN_WIPE_CHUNK_PERCENT)
7787
      # we _must_ make this an int, otherwise rounding errors will
7788
      # occur
7789
      wipe_chunk_size = int(wipe_chunk_size)
7790

    
7791
      lu.LogInfo("* Wiping disk %d", idx)
7792
      logging.info("Wiping disk %d for instance %s, node %s using"
7793
                   " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7794

    
7795
      offset = 0
7796
      size = device.size
7797
      last_output = 0
7798
      start_time = time.time()
7799

    
7800
      while offset < size:
7801
        wipe_size = min(wipe_chunk_size, size - offset)
7802
        logging.debug("Wiping disk %d, offset %s, chunk %s",
7803
                      idx, offset, wipe_size)
7804
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7805
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
7806
                     (idx, offset, wipe_size))
7807
        now = time.time()
7808
        offset += wipe_size
7809
        if now - last_output >= 60:
7810
          eta = _CalcEta(now - start_time, offset, size)
7811
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
7812
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
7813
          last_output = now
7814
  finally:
7815
    logging.info("Resume sync of instance %s disks", instance.name)
7816

    
7817
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7818

    
7819
    for idx, success in enumerate(result.payload):
7820
      if not success:
7821
        lu.LogWarning("Resume sync of disk %d failed, please have a"
7822
                      " look at the status and troubleshoot the issue", idx)
7823
        logging.warn("resume-sync of instance %s for disks %d failed",
7824
                     instance.name, idx)
7825

    
7826

    
7827
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7828
  """Create all disks for an instance.
7829

7830
  This abstracts away some work from AddInstance.
7831

7832
  @type lu: L{LogicalUnit}
7833
  @param lu: the logical unit on whose behalf we execute
7834
  @type instance: L{objects.Instance}
7835
  @param instance: the instance whose disks we should create
7836
  @type to_skip: list
7837
  @param to_skip: list of indices to skip
7838
  @type target_node: string
7839
  @param target_node: if passed, overrides the target node for creation
7840
  @rtype: boolean
7841
  @return: the success of the creation
7842

7843
  """
7844
  info = _GetInstanceInfoText(instance)
7845
  if target_node is None:
7846
    pnode = instance.primary_node
7847
    all_nodes = instance.all_nodes
7848
  else:
7849
    pnode = target_node
7850
    all_nodes = [pnode]
7851

    
7852
  if instance.disk_template in constants.DTS_FILEBASED:
7853
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7854
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7855

    
7856
    result.Raise("Failed to create directory '%s' on"
7857
                 " node %s" % (file_storage_dir, pnode))
7858

    
7859
  # Note: this needs to be kept in sync with adding of disks in
7860
  # LUInstanceSetParams
7861
  for idx, device in enumerate(instance.disks):
7862
    if to_skip and idx in to_skip:
7863
      continue
7864
    logging.info("Creating volume %s for instance %s",
7865
                 device.iv_name, instance.name)
7866
    #HARDCODE
7867
    for node in all_nodes:
7868
      f_create = node == pnode
7869
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7870

    
7871

    
7872
def _RemoveDisks(lu, instance, target_node=None):
7873
  """Remove all disks for an instance.
7874

7875
  This abstracts away some work from `AddInstance()` and
7876
  `RemoveInstance()`. Note that in case some of the devices couldn't
7877
  be removed, the removal will continue with the other ones (compare
7878
  with `_CreateDisks()`).
7879

7880
  @type lu: L{LogicalUnit}
7881
  @param lu: the logical unit on whose behalf we execute
7882
  @type instance: L{objects.Instance}
7883
  @param instance: the instance whose disks we should remove
7884
  @type target_node: string
7885
  @param target_node: used to override the node on which to remove the disks
7886
  @rtype: boolean
7887
  @return: the success of the removal
7888

7889
  """
7890
  logging.info("Removing block devices for instance %s", instance.name)
7891

    
7892
  all_result = True
7893
  for device in instance.disks:
7894
    if target_node:
7895
      edata = [(target_node, device)]
7896
    else:
7897
      edata = device.ComputeNodeTree(instance.primary_node)
7898
    for node, disk in edata:
7899
      lu.cfg.SetDiskID(disk, node)
7900
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7901
      if msg:
7902
        lu.LogWarning("Could not remove block device %s on node %s,"
7903
                      " continuing anyway: %s", device.iv_name, node, msg)
7904
        all_result = False
7905

    
7906
  if instance.disk_template == constants.DT_FILE:
7907
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7908
    if target_node:
7909
      tgt = target_node
7910
    else:
7911
      tgt = instance.primary_node
7912
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7913
    if result.fail_msg:
7914
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7915
                    file_storage_dir, instance.primary_node, result.fail_msg)
7916
      all_result = False
7917

    
7918
  return all_result
7919

    
7920

    
7921
def _ComputeDiskSizePerVG(disk_template, disks):
7922
  """Compute disk size requirements in the volume group
7923

7924
  """
7925
  def _compute(disks, payload):
7926
    """Universal algorithm.
7927

7928
    """
7929
    vgs = {}
7930
    for disk in disks:
7931
      vgs[disk[constants.IDISK_VG]] = \
7932
        vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7933

    
7934
    return vgs
7935

    
7936
  # Required free disk space as a function of disk and swap space
7937
  req_size_dict = {
7938
    constants.DT_DISKLESS: {},
7939
    constants.DT_PLAIN: _compute(disks, 0),
7940
    # 128 MB are added for drbd metadata for each disk
7941
    constants.DT_DRBD8: _compute(disks, 128),
7942
    constants.DT_FILE: {},
7943
    constants.DT_SHARED_FILE: {},
7944
  }
7945

    
7946
  if disk_template not in req_size_dict:
7947
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7948
                                 " is unknown" %  disk_template)
7949

    
7950
  return req_size_dict[disk_template]
7951

    
7952

    
7953
def _ComputeDiskSize(disk_template, disks):
7954
  """Compute disk size requirements in the volume group
7955

7956
  """
7957
  # Required free disk space as a function of disk and swap space
7958
  req_size_dict = {
7959
    constants.DT_DISKLESS: None,
7960
    constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7961
    # 128 MB are added for drbd metadata for each disk
7962
    constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7963
    constants.DT_FILE: None,
7964
    constants.DT_SHARED_FILE: 0,
7965
    constants.DT_BLOCK: 0,
7966
  }
7967

    
7968
  if disk_template not in req_size_dict:
7969
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7970
                                 " is unknown" %  disk_template)
7971

    
7972
  return req_size_dict[disk_template]
7973

    
7974

    
7975
def _FilterVmNodes(lu, nodenames):
7976
  """Filters out non-vm_capable nodes from a list.
7977

7978
  @type lu: L{LogicalUnit}
7979
  @param lu: the logical unit for which we check
7980
  @type nodenames: list
7981
  @param nodenames: the list of nodes on which we should check
7982
  @rtype: list
7983
  @return: the list of vm-capable nodes
7984

7985
  """
7986
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7987
  return [name for name in nodenames if name not in vm_nodes]
7988

    
7989

    
7990
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7991
  """Hypervisor parameter validation.
7992

7993
  This function abstract the hypervisor parameter validation to be
7994
  used in both instance create and instance modify.
7995

7996
  @type lu: L{LogicalUnit}
7997
  @param lu: the logical unit for which we check
7998
  @type nodenames: list
7999
  @param nodenames: the list of nodes on which we should check
8000
  @type hvname: string
8001
  @param hvname: the name of the hypervisor we should use
8002
  @type hvparams: dict
8003
  @param hvparams: the parameters which we need to check
8004
  @raise errors.OpPrereqError: if the parameters are not valid
8005

8006
  """
8007
  nodenames = _FilterVmNodes(lu, nodenames)
8008
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
8009
                                                  hvname,
8010
                                                  hvparams)
8011
  for node in nodenames:
8012
    info = hvinfo[node]
8013
    if info.offline:
8014
      continue
8015
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
8016

    
8017

    
8018
def _CheckOSParams(lu, required, nodenames, osname, osparams):
8019
  """OS parameters validation.
8020

8021
  @type lu: L{LogicalUnit}
8022
  @param lu: the logical unit for which we check
8023
  @type required: boolean
8024
  @param required: whether the validation should fail if the OS is not
8025
      found
8026
  @type nodenames: list
8027
  @param nodenames: the list of nodes on which we should check
8028
  @type osname: string
8029
  @param osname: the name of the hypervisor we should use
8030
  @type osparams: dict
8031
  @param osparams: the parameters which we need to check
8032
  @raise errors.OpPrereqError: if the parameters are not valid
8033

8034
  """
8035
  nodenames = _FilterVmNodes(lu, nodenames)
8036
  result = lu.rpc.call_os_validate(required, nodenames, osname,
8037
                                   [constants.OS_VALIDATE_PARAMETERS],
8038
                                   osparams)
8039
  for node, nres in result.items():
8040
    # we don't check for offline cases since this should be run only
8041
    # against the master node and/or an instance's nodes
8042
    nres.Raise("OS Parameters validation failed on node %s" % node)
8043
    if not nres.payload:
8044
      lu.LogInfo("OS %s not found on node %s, validation skipped",
8045
                 osname, node)
8046

    
8047

    
8048
class LUInstanceCreate(LogicalUnit):
8049
  """Create an instance.
8050

8051
  """
8052
  HPATH = "instance-add"
8053
  HTYPE = constants.HTYPE_INSTANCE
8054
  REQ_BGL = False
8055

    
8056
  def CheckArguments(self):
8057
    """Check arguments.
8058

8059
    """
8060
    # do not require name_check to ease forward/backward compatibility
8061
    # for tools
8062
    if self.op.no_install and self.op.start:
8063
      self.LogInfo("No-installation mode selected, disabling startup")
8064
      self.op.start = False
8065
    # validate/normalize the instance name
8066
    self.op.instance_name = \
8067
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
8068

    
8069
    if self.op.ip_check and not self.op.name_check:
8070
      # TODO: make the ip check more flexible and not depend on the name check
8071
      raise errors.OpPrereqError("Cannot do IP address check without a name"
8072
                                 " check", errors.ECODE_INVAL)
8073

    
8074
    # check nics' parameter names
8075
    for nic in self.op.nics:
8076
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8077

    
8078
    # check disks. parameter names and consistent adopt/no-adopt strategy
8079
    has_adopt = has_no_adopt = False
8080
    for disk in self.op.disks:
8081
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8082
      if constants.IDISK_ADOPT in disk:
8083
        has_adopt = True
8084
      else:
8085
        has_no_adopt = True
8086
    if has_adopt and has_no_adopt:
8087
      raise errors.OpPrereqError("Either all disks are adopted or none is",
8088
                                 errors.ECODE_INVAL)
8089
    if has_adopt:
8090
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8091
        raise errors.OpPrereqError("Disk adoption is not supported for the"
8092
                                   " '%s' disk template" %
8093
                                   self.op.disk_template,
8094
                                   errors.ECODE_INVAL)
8095
      if self.op.iallocator is not None:
8096
        raise errors.OpPrereqError("Disk adoption not allowed with an"
8097
                                   " iallocator script", errors.ECODE_INVAL)
8098
      if self.op.mode == constants.INSTANCE_IMPORT:
8099
        raise errors.OpPrereqError("Disk adoption not allowed for"
8100
                                   " instance import", errors.ECODE_INVAL)
8101
    else:
8102
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
8103
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8104
                                   " but no 'adopt' parameter given" %
8105
                                   self.op.disk_template,
8106
                                   errors.ECODE_INVAL)
8107

    
8108
    self.adopt_disks = has_adopt
8109

    
8110
    # instance name verification
8111
    if self.op.name_check:
8112
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8113
      self.op.instance_name = self.hostname1.name
8114
      # used in CheckPrereq for ip ping check
8115
      self.check_ip = self.hostname1.ip
8116
    else:
8117
      self.check_ip = None
8118

    
8119
    # file storage checks
8120
    if (self.op.file_driver and
8121
        not self.op.file_driver in constants.FILE_DRIVER):
8122
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
8123
                                 self.op.file_driver, errors.ECODE_INVAL)
8124

    
8125
    if self.op.disk_template == constants.DT_FILE:
8126
      opcodes.RequireFileStorage()
8127
    elif self.op.disk_template == constants.DT_SHARED_FILE:
8128
      opcodes.RequireSharedFileStorage()
8129

    
8130
    ### Node/iallocator related checks
8131
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8132

    
8133
    if self.op.pnode is not None:
8134
      if self.op.disk_template in constants.DTS_INT_MIRROR:
8135
        if self.op.snode is None:
8136
          raise errors.OpPrereqError("The networked disk templates need"
8137
                                     " a mirror node", errors.ECODE_INVAL)
8138
      elif self.op.snode:
8139
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8140
                        " template")
8141
        self.op.snode = None
8142

    
8143
    self._cds = _GetClusterDomainSecret()
8144

    
8145
    if self.op.mode == constants.INSTANCE_IMPORT:
8146
      # On import force_variant must be True, because if we forced it at
8147
      # initial install, our only chance when importing it back is that it
8148
      # works again!
8149
      self.op.force_variant = True
8150

    
8151
      if self.op.no_install:
8152
        self.LogInfo("No-installation mode has no effect during import")
8153

    
8154
    elif self.op.mode == constants.INSTANCE_CREATE:
8155
      if self.op.os_type is None:
8156
        raise errors.OpPrereqError("No guest OS specified",
8157
                                   errors.ECODE_INVAL)
8158
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8159
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8160
                                   " installation" % self.op.os_type,
8161
                                   errors.ECODE_STATE)
8162
      if self.op.disk_template is None:
8163
        raise errors.OpPrereqError("No disk template specified",
8164
                                   errors.ECODE_INVAL)
8165

    
8166
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8167
      # Check handshake to ensure both clusters have the same domain secret
8168
      src_handshake = self.op.source_handshake
8169
      if not src_handshake:
8170
        raise errors.OpPrereqError("Missing source handshake",
8171
                                   errors.ECODE_INVAL)
8172

    
8173
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8174
                                                           src_handshake)
8175
      if errmsg:
8176
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8177
                                   errors.ECODE_INVAL)
8178

    
8179
      # Load and check source CA
8180
      self.source_x509_ca_pem = self.op.source_x509_ca
8181
      if not self.source_x509_ca_pem:
8182
        raise errors.OpPrereqError("Missing source X509 CA",
8183
                                   errors.ECODE_INVAL)
8184

    
8185
      try:
8186
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8187
                                                    self._cds)
8188
      except OpenSSL.crypto.Error, err:
8189
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8190
                                   (err, ), errors.ECODE_INVAL)
8191

    
8192
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8193
      if errcode is not None:
8194
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8195
                                   errors.ECODE_INVAL)
8196

    
8197
      self.source_x509_ca = cert
8198

    
8199
      src_instance_name = self.op.source_instance_name
8200
      if not src_instance_name:
8201
        raise errors.OpPrereqError("Missing source instance name",
8202
                                   errors.ECODE_INVAL)
8203

    
8204
      self.source_instance_name = \
8205
          netutils.GetHostname(name=src_instance_name).name
8206

    
8207
    else:
8208
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
8209
                                 self.op.mode, errors.ECODE_INVAL)
8210

    
8211
  def ExpandNames(self):
8212
    """ExpandNames for CreateInstance.
8213

8214
    Figure out the right locks for instance creation.
8215

8216
    """
8217
    self.needed_locks = {}
8218

    
8219
    instance_name = self.op.instance_name
8220
    # this is just a preventive check, but someone might still add this
8221
    # instance in the meantime, and creation will fail at lock-add time
8222
    if instance_name in self.cfg.GetInstanceList():
8223
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8224
                                 instance_name, errors.ECODE_EXISTS)
8225

    
8226
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8227

    
8228
    if self.op.iallocator:
8229
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8230
    else:
8231
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8232
      nodelist = [self.op.pnode]
8233
      if self.op.snode is not None:
8234
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8235
        nodelist.append(self.op.snode)
8236
      self.needed_locks[locking.LEVEL_NODE] = nodelist
8237

    
8238
    # in case of import lock the source node too
8239
    if self.op.mode == constants.INSTANCE_IMPORT:
8240
      src_node = self.op.src_node
8241
      src_path = self.op.src_path
8242

    
8243
      if src_path is None:
8244
        self.op.src_path = src_path = self.op.instance_name
8245

    
8246
      if src_node is None:
8247
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8248
        self.op.src_node = None
8249
        if os.path.isabs(src_path):
8250
          raise errors.OpPrereqError("Importing an instance from an absolute"
8251
                                     " path requires a source node option",
8252
                                     errors.ECODE_INVAL)
8253
      else:
8254
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8255
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8256
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
8257
        if not os.path.isabs(src_path):
8258
          self.op.src_path = src_path = \
8259
            utils.PathJoin(constants.EXPORT_DIR, src_path)
8260

    
8261
  def _RunAllocator(self):
8262
    """Run the allocator based on input opcode.
8263

8264
    """
8265
    nics = [n.ToDict() for n in self.nics]
8266
    ial = IAllocator(self.cfg, self.rpc,
8267
                     mode=constants.IALLOCATOR_MODE_ALLOC,
8268
                     name=self.op.instance_name,
8269
                     disk_template=self.op.disk_template,
8270
                     tags=self.op.tags,
8271
                     os=self.op.os_type,
8272
                     vcpus=self.be_full[constants.BE_VCPUS],
8273
                     memory=self.be_full[constants.BE_MEMORY],
8274
                     disks=self.disks,
8275
                     nics=nics,
8276
                     hypervisor=self.op.hypervisor,
8277
                     )
8278

    
8279
    ial.Run(self.op.iallocator)
8280

    
8281
    if not ial.success:
8282
      raise errors.OpPrereqError("Can't compute nodes using"
8283
                                 " iallocator '%s': %s" %
8284
                                 (self.op.iallocator, ial.info),
8285
                                 errors.ECODE_NORES)
8286
    if len(ial.result) != ial.required_nodes:
8287
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8288
                                 " of nodes (%s), required %s" %
8289
                                 (self.op.iallocator, len(ial.result),
8290
                                  ial.required_nodes), errors.ECODE_FAULT)
8291
    self.op.pnode = ial.result[0]
8292
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8293
                 self.op.instance_name, self.op.iallocator,
8294
                 utils.CommaJoin(ial.result))
8295
    if ial.required_nodes == 2:
8296
      self.op.snode = ial.result[1]
8297

    
8298
  def BuildHooksEnv(self):
8299
    """Build hooks env.
8300

8301
    This runs on master, primary and secondary nodes of the instance.
8302

8303
    """
8304
    env = {
8305
      "ADD_MODE": self.op.mode,
8306
      }
8307
    if self.op.mode == constants.INSTANCE_IMPORT:
8308
      env["SRC_NODE"] = self.op.src_node
8309
      env["SRC_PATH"] = self.op.src_path
8310
      env["SRC_IMAGES"] = self.src_images
8311

    
8312
    env.update(_BuildInstanceHookEnv(
8313
      name=self.op.instance_name,
8314
      primary_node=self.op.pnode,
8315
      secondary_nodes=self.secondaries,
8316
      status=self.op.start,
8317
      os_type=self.op.os_type,
8318
      memory=self.be_full[constants.BE_MEMORY],
8319
      vcpus=self.be_full[constants.BE_VCPUS],
8320
      nics=_NICListToTuple(self, self.nics),
8321
      disk_template=self.op.disk_template,
8322
      disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8323
             for d in self.disks],
8324
      bep=self.be_full,
8325
      hvp=self.hv_full,
8326
      hypervisor_name=self.op.hypervisor,
8327
      tags=self.op.tags,
8328
    ))
8329

    
8330
    return env
8331

    
8332
  def BuildHooksNodes(self):
8333
    """Build hooks nodes.
8334

8335
    """
8336
    nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8337
    return nl, nl
8338

    
8339
  def _ReadExportInfo(self):
8340
    """Reads the export information from disk.
8341

8342
    It will override the opcode source node and path with the actual
8343
    information, if these two were not specified before.
8344

8345
    @return: the export information
8346

8347
    """
8348
    assert self.op.mode == constants.INSTANCE_IMPORT
8349

    
8350
    src_node = self.op.src_node
8351
    src_path = self.op.src_path
8352

    
8353
    if src_node is None:
8354
      locked_nodes = self.owned_locks(locking.LEVEL_NODE)
8355
      exp_list = self.rpc.call_export_list(locked_nodes)
8356
      found = False
8357
      for node in exp_list:
8358
        if exp_list[node].fail_msg:
8359
          continue
8360
        if src_path in exp_list[node].payload:
8361
          found = True
8362
          self.op.src_node = src_node = node
8363
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8364
                                                       src_path)
8365
          break
8366
      if not found:
8367
        raise errors.OpPrereqError("No export found for relative path %s" %
8368
                                    src_path, errors.ECODE_INVAL)
8369

    
8370
    _CheckNodeOnline(self, src_node)
8371
    result = self.rpc.call_export_info(src_node, src_path)
8372
    result.Raise("No export or invalid export found in dir %s" % src_path)
8373

    
8374
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8375
    if not export_info.has_section(constants.INISECT_EXP):
8376
      raise errors.ProgrammerError("Corrupted export config",
8377
                                   errors.ECODE_ENVIRON)
8378

    
8379
    ei_version = export_info.get(constants.INISECT_EXP, "version")
8380
    if (int(ei_version) != constants.EXPORT_VERSION):
8381
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8382
                                 (ei_version, constants.EXPORT_VERSION),
8383
                                 errors.ECODE_ENVIRON)
8384
    return export_info
8385

    
8386
  def _ReadExportParams(self, einfo):
8387
    """Use export parameters as defaults.
8388

8389
    In case the opcode doesn't specify (as in override) some instance
8390
    parameters, then try to use them from the export information, if
8391
    that declares them.
8392

8393
    """
8394
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8395

    
8396
    if self.op.disk_template is None:
8397
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
8398
        self.op.disk_template = einfo.get(constants.INISECT_INS,
8399
                                          "disk_template")
8400
      else:
8401
        raise errors.OpPrereqError("No disk template specified and the export"
8402
                                   " is missing the disk_template information",
8403
                                   errors.ECODE_INVAL)
8404

    
8405
    if not self.op.disks:
8406
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
8407
        disks = []
8408
        # TODO: import the disk iv_name too
8409
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8410
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8411
          disks.append({constants.IDISK_SIZE: disk_sz})
8412
        self.op.disks = disks
8413
      else:
8414
        raise errors.OpPrereqError("No disk info specified and the export"
8415
                                   " is missing the disk information",
8416
                                   errors.ECODE_INVAL)
8417

    
8418
    if (not self.op.nics and
8419
        einfo.has_option(constants.INISECT_INS, "nic_count")):
8420
      nics = []
8421
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8422
        ndict = {}
8423
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8424
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8425
          ndict[name] = v
8426
        nics.append(ndict)
8427
      self.op.nics = nics
8428

    
8429
    if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8430
      self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8431

    
8432
    if (self.op.hypervisor is None and
8433
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
8434
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8435

    
8436
    if einfo.has_section(constants.INISECT_HYP):
8437
      # use the export parameters but do not override the ones
8438
      # specified by the user
8439
      for name, value in einfo.items(constants.INISECT_HYP):
8440
        if name not in self.op.hvparams:
8441
          self.op.hvparams[name] = value
8442

    
8443
    if einfo.has_section(constants.INISECT_BEP):
8444
      # use the parameters, without overriding
8445
      for name, value in einfo.items(constants.INISECT_BEP):
8446
        if name not in self.op.beparams:
8447
          self.op.beparams[name] = value
8448
    else:
8449
      # try to read the parameters old style, from the main section
8450
      for name in constants.BES_PARAMETERS:
8451
        if (name not in self.op.beparams and
8452
            einfo.has_option(constants.INISECT_INS, name)):
8453
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8454

    
8455
    if einfo.has_section(constants.INISECT_OSP):
8456
      # use the parameters, without overriding
8457
      for name, value in einfo.items(constants.INISECT_OSP):
8458
        if name not in self.op.osparams:
8459
          self.op.osparams[name] = value
8460

    
8461
  def _RevertToDefaults(self, cluster):
8462
    """Revert the instance parameters to the default values.
8463

8464
    """
8465
    # hvparams
8466
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8467
    for name in self.op.hvparams.keys():
8468
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8469
        del self.op.hvparams[name]
8470
    # beparams
8471
    be_defs = cluster.SimpleFillBE({})
8472
    for name in self.op.beparams.keys():
8473
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
8474
        del self.op.beparams[name]
8475
    # nic params
8476
    nic_defs = cluster.SimpleFillNIC({})
8477
    for nic in self.op.nics:
8478
      for name in constants.NICS_PARAMETERS:
8479
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8480
          del nic[name]
8481
    # osparams
8482
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8483
    for name in self.op.osparams.keys():
8484
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
8485
        del self.op.osparams[name]
8486

    
8487
  def _CalculateFileStorageDir(self):
8488
    """Calculate final instance file storage dir.
8489

8490
    """
8491
    # file storage dir calculation/check
8492
    self.instance_file_storage_dir = None
8493
    if self.op.disk_template in constants.DTS_FILEBASED:
8494
      # build the full file storage dir path
8495
      joinargs = []
8496

    
8497
      if self.op.disk_template == constants.DT_SHARED_FILE:
8498
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
8499
      else:
8500
        get_fsd_fn = self.cfg.GetFileStorageDir
8501

    
8502
      cfg_storagedir = get_fsd_fn()
8503
      if not cfg_storagedir:
8504
        raise errors.OpPrereqError("Cluster file storage dir not defined")
8505
      joinargs.append(cfg_storagedir)
8506

    
8507
      if self.op.file_storage_dir is not None:
8508
        joinargs.append(self.op.file_storage_dir)
8509

    
8510
      joinargs.append(self.op.instance_name)
8511

    
8512
      # pylint: disable-msg=W0142
8513
      self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8514

    
8515
  def CheckPrereq(self):
8516
    """Check prerequisites.
8517

8518
    """
8519
    self._CalculateFileStorageDir()
8520

    
8521
    if self.op.mode == constants.INSTANCE_IMPORT:
8522
      export_info = self._ReadExportInfo()
8523
      self._ReadExportParams(export_info)
8524

    
8525
    if (not self.cfg.GetVGName() and
8526
        self.op.disk_template not in constants.DTS_NOT_LVM):
8527
      raise errors.OpPrereqError("Cluster does not support lvm-based"
8528
                                 " instances", errors.ECODE_STATE)
8529

    
8530
    if self.op.hypervisor is None:
8531
      self.op.hypervisor = self.cfg.GetHypervisorType()
8532

    
8533
    cluster = self.cfg.GetClusterInfo()
8534
    enabled_hvs = cluster.enabled_hypervisors
8535
    if self.op.hypervisor not in enabled_hvs:
8536
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8537
                                 " cluster (%s)" % (self.op.hypervisor,
8538
                                  ",".join(enabled_hvs)),
8539
                                 errors.ECODE_STATE)
8540

    
8541
    # Check tag validity
8542
    for tag in self.op.tags:
8543
      objects.TaggableObject.ValidateTag(tag)
8544

    
8545
    # check hypervisor parameter syntax (locally)
8546
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8547
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8548
                                      self.op.hvparams)
8549
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8550
    hv_type.CheckParameterSyntax(filled_hvp)
8551
    self.hv_full = filled_hvp
8552
    # check that we don't specify global parameters on an instance
8553
    _CheckGlobalHvParams(self.op.hvparams)
8554

    
8555
    # fill and remember the beparams dict
8556
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8557
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
8558

    
8559
    # build os parameters
8560
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8561

    
8562
    # now that hvp/bep are in final format, let's reset to defaults,
8563
    # if told to do so
8564
    if self.op.identify_defaults:
8565
      self._RevertToDefaults(cluster)
8566

    
8567
    # NIC buildup
8568
    self.nics = []
8569
    for idx, nic in enumerate(self.op.nics):
8570
      nic_mode_req = nic.get(constants.INIC_MODE, None)
8571
      nic_mode = nic_mode_req
8572
      if nic_mode is None:
8573
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8574

    
8575
      # in routed mode, for the first nic, the default ip is 'auto'
8576
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8577
        default_ip_mode = constants.VALUE_AUTO
8578
      else:
8579
        default_ip_mode = constants.VALUE_NONE
8580

    
8581
      # ip validity checks
8582
      ip = nic.get(constants.INIC_IP, default_ip_mode)
8583
      if ip is None or ip.lower() == constants.VALUE_NONE:
8584
        nic_ip = None
8585
      elif ip.lower() == constants.VALUE_AUTO:
8586
        if not self.op.name_check:
8587
          raise errors.OpPrereqError("IP address set to auto but name checks"
8588
                                     " have been skipped",
8589
                                     errors.ECODE_INVAL)
8590
        nic_ip = self.hostname1.ip
8591
      else:
8592
        if not netutils.IPAddress.IsValid(ip):
8593
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8594
                                     errors.ECODE_INVAL)
8595
        nic_ip = ip
8596

    
8597
      # TODO: check the ip address for uniqueness
8598
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8599
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
8600
                                   errors.ECODE_INVAL)
8601

    
8602
      # MAC address verification
8603
      mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8604
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8605
        mac = utils.NormalizeAndValidateMac(mac)
8606

    
8607
        try:
8608
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
8609
        except errors.ReservationError:
8610
          raise errors.OpPrereqError("MAC address %s already in use"
8611
                                     " in cluster" % mac,
8612
                                     errors.ECODE_NOTUNIQUE)
8613

    
8614
      #  Build nic parameters
8615
      link = nic.get(constants.INIC_LINK, None)
8616
      nicparams = {}
8617
      if nic_mode_req:
8618
        nicparams[constants.NIC_MODE] = nic_mode_req
8619
      if link:
8620
        nicparams[constants.NIC_LINK] = link
8621

    
8622
      check_params = cluster.SimpleFillNIC(nicparams)
8623
      objects.NIC.CheckParameterSyntax(check_params)
8624
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8625

    
8626
    # disk checks/pre-build
8627
    default_vg = self.cfg.GetVGName()
8628
    self.disks = []
8629
    for disk in self.op.disks:
8630
      mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8631
      if mode not in constants.DISK_ACCESS_SET:
8632
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8633
                                   mode, errors.ECODE_INVAL)
8634
      size = disk.get(constants.IDISK_SIZE, None)
8635
      if size is None:
8636
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8637
      try:
8638
        size = int(size)
8639
      except (TypeError, ValueError):
8640
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8641
                                   errors.ECODE_INVAL)
8642

    
8643
      data_vg = disk.get(constants.IDISK_VG, default_vg)
8644
      new_disk = {
8645
        constants.IDISK_SIZE: size,
8646
        constants.IDISK_MODE: mode,
8647
        constants.IDISK_VG: data_vg,
8648
        constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8649
        }
8650
      if constants.IDISK_ADOPT in disk:
8651
        new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8652
      self.disks.append(new_disk)
8653

    
8654
    if self.op.mode == constants.INSTANCE_IMPORT:
8655

    
8656
      # Check that the new instance doesn't have less disks than the export
8657
      instance_disks = len(self.disks)
8658
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8659
      if instance_disks < export_disks:
8660
        raise errors.OpPrereqError("Not enough disks to import."
8661
                                   " (instance: %d, export: %d)" %
8662
                                   (instance_disks, export_disks),
8663
                                   errors.ECODE_INVAL)
8664

    
8665
      disk_images = []
8666
      for idx in range(export_disks):
8667
        option = "disk%d_dump" % idx
8668
        if export_info.has_option(constants.INISECT_INS, option):
8669
          # FIXME: are the old os-es, disk sizes, etc. useful?
8670
          export_name = export_info.get(constants.INISECT_INS, option)
8671
          image = utils.PathJoin(self.op.src_path, export_name)
8672
          disk_images.append(image)
8673
        else:
8674
          disk_images.append(False)
8675

    
8676
      self.src_images = disk_images
8677

    
8678
      old_name = export_info.get(constants.INISECT_INS, "name")
8679
      try:
8680
        exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8681
      except (TypeError, ValueError), err:
8682
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
8683
                                   " an integer: %s" % str(err),
8684
                                   errors.ECODE_STATE)
8685
      if self.op.instance_name == old_name:
8686
        for idx, nic in enumerate(self.nics):
8687
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8688
            nic_mac_ini = "nic%d_mac" % idx
8689
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8690

    
8691
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8692

    
8693
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
8694
    if self.op.ip_check:
8695
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8696
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
8697
                                   (self.check_ip, self.op.instance_name),
8698
                                   errors.ECODE_NOTUNIQUE)
8699

    
8700
    #### mac address generation
8701
    # By generating here the mac address both the allocator and the hooks get
8702
    # the real final mac address rather than the 'auto' or 'generate' value.
8703
    # There is a race condition between the generation and the instance object
8704
    # creation, which means that we know the mac is valid now, but we're not
8705
    # sure it will be when we actually add the instance. If things go bad
8706
    # adding the instance will abort because of a duplicate mac, and the
8707
    # creation job will fail.
8708
    for nic in self.nics:
8709
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8710
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8711

    
8712
    #### allocator run
8713

    
8714
    if self.op.iallocator is not None:
8715
      self._RunAllocator()
8716

    
8717
    #### node related checks
8718

    
8719
    # check primary node
8720
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8721
    assert self.pnode is not None, \
8722
      "Cannot retrieve locked node %s" % self.op.pnode
8723
    if pnode.offline:
8724
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8725
                                 pnode.name, errors.ECODE_STATE)
8726
    if pnode.drained:
8727
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8728
                                 pnode.name, errors.ECODE_STATE)
8729
    if not pnode.vm_capable:
8730
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8731
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
8732

    
8733
    self.secondaries = []
8734

    
8735
    # mirror node verification
8736
    if self.op.disk_template in constants.DTS_INT_MIRROR:
8737
      if self.op.snode == pnode.name:
8738
        raise errors.OpPrereqError("The secondary node cannot be the"
8739
                                   " primary node", errors.ECODE_INVAL)
8740
      _CheckNodeOnline(self, self.op.snode)
8741
      _CheckNodeNotDrained(self, self.op.snode)
8742
      _CheckNodeVmCapable(self, self.op.snode)
8743
      self.secondaries.append(self.op.snode)
8744

    
8745
    nodenames = [pnode.name] + self.secondaries
8746

    
8747
    if not self.adopt_disks:
8748
      # Check lv size requirements, if not adopting
8749
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8750
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8751

    
8752
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8753
      all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8754
                                disk[constants.IDISK_ADOPT])
8755
                     for disk in self.disks])
8756
      if len(all_lvs) != len(self.disks):
8757
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
8758
                                   errors.ECODE_INVAL)
8759
      for lv_name in all_lvs:
8760
        try:
8761
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8762
          # to ReserveLV uses the same syntax
8763
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8764
        except errors.ReservationError:
8765
          raise errors.OpPrereqError("LV named %s used by another instance" %
8766
                                     lv_name, errors.ECODE_NOTUNIQUE)
8767

    
8768
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8769
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8770

    
8771
      node_lvs = self.rpc.call_lv_list([pnode.name],
8772
                                       vg_names.payload.keys())[pnode.name]
8773
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8774
      node_lvs = node_lvs.payload
8775

    
8776
      delta = all_lvs.difference(node_lvs.keys())
8777
      if delta:
8778
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
8779
                                   utils.CommaJoin(delta),
8780
                                   errors.ECODE_INVAL)
8781
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8782
      if online_lvs:
8783
        raise errors.OpPrereqError("Online logical volumes found, cannot"
8784
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
8785
                                   errors.ECODE_STATE)
8786
      # update the size of disk based on what is found
8787
      for dsk in self.disks:
8788
        dsk[constants.IDISK_SIZE] = \
8789
          int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8790
                                        dsk[constants.IDISK_ADOPT])][0]))
8791

    
8792
    elif self.op.disk_template == constants.DT_BLOCK:
8793
      # Normalize and de-duplicate device paths
8794
      all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8795
                       for disk in self.disks])
8796
      if len(all_disks) != len(self.disks):
8797
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
8798
                                   errors.ECODE_INVAL)
8799
      baddisks = [d for d in all_disks
8800
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8801
      if baddisks:
8802
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8803
                                   " cannot be adopted" %
8804
                                   (", ".join(baddisks),
8805
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
8806
                                   errors.ECODE_INVAL)
8807

    
8808
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
8809
                                            list(all_disks))[pnode.name]
8810
      node_disks.Raise("Cannot get block device information from node %s" %
8811
                       pnode.name)
8812
      node_disks = node_disks.payload
8813
      delta = all_disks.difference(node_disks.keys())
8814
      if delta:
8815
        raise errors.OpPrereqError("Missing block device(s): %s" %
8816
                                   utils.CommaJoin(delta),
8817
                                   errors.ECODE_INVAL)
8818
      for dsk in self.disks:
8819
        dsk[constants.IDISK_SIZE] = \
8820
          int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8821

    
8822
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8823

    
8824
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8825
    # check OS parameters (remotely)
8826
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8827

    
8828
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8829

    
8830
    # memory check on primary node
8831
    if self.op.start:
8832
      _CheckNodeFreeMemory(self, self.pnode.name,
8833
                           "creating instance %s" % self.op.instance_name,
8834
                           self.be_full[constants.BE_MEMORY],
8835
                           self.op.hypervisor)
8836

    
8837
    self.dry_run_result = list(nodenames)
8838

    
8839
  def Exec(self, feedback_fn):
8840
    """Create and add the instance to the cluster.
8841

8842
    """
8843
    instance = self.op.instance_name
8844
    pnode_name = self.pnode.name
8845

    
8846
    ht_kind = self.op.hypervisor
8847
    if ht_kind in constants.HTS_REQ_PORT:
8848
      network_port = self.cfg.AllocatePort()
8849
    else:
8850
      network_port = None
8851

    
8852
    disks = _GenerateDiskTemplate(self,
8853
                                  self.op.disk_template,
8854
                                  instance, pnode_name,
8855
                                  self.secondaries,
8856
                                  self.disks,
8857
                                  self.instance_file_storage_dir,
8858
                                  self.op.file_driver,
8859
                                  0,
8860
                                  feedback_fn)
8861

    
8862
    iobj = objects.Instance(name=instance, os=self.op.os_type,
8863
                            primary_node=pnode_name,
8864
                            nics=self.nics, disks=disks,
8865
                            disk_template=self.op.disk_template,
8866
                            admin_up=False,
8867
                            network_port=network_port,
8868
                            beparams=self.op.beparams,
8869
                            hvparams=self.op.hvparams,
8870
                            hypervisor=self.op.hypervisor,
8871
                            osparams=self.op.osparams,
8872
                            )
8873

    
8874
    if self.op.tags:
8875
      for tag in self.op.tags:
8876
        iobj.AddTag(tag)
8877

    
8878
    if self.adopt_disks:
8879
      if self.op.disk_template == constants.DT_PLAIN:
8880
        # rename LVs to the newly-generated names; we need to construct
8881
        # 'fake' LV disks with the old data, plus the new unique_id
8882
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8883
        rename_to = []
8884
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8885
          rename_to.append(t_dsk.logical_id)
8886
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8887
          self.cfg.SetDiskID(t_dsk, pnode_name)
8888
        result = self.rpc.call_blockdev_rename(pnode_name,
8889
                                               zip(tmp_disks, rename_to))
8890
        result.Raise("Failed to rename adoped LVs")
8891
    else:
8892
      feedback_fn("* creating instance disks...")
8893
      try:
8894
        _CreateDisks(self, iobj)
8895
      except errors.OpExecError:
8896
        self.LogWarning("Device creation failed, reverting...")
8897
        try:
8898
          _RemoveDisks(self, iobj)
8899
        finally:
8900
          self.cfg.ReleaseDRBDMinors(instance)
8901
          raise
8902

    
8903
    feedback_fn("adding instance %s to cluster config" % instance)
8904

    
8905
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8906

    
8907
    # Declare that we don't want to remove the instance lock anymore, as we've
8908
    # added the instance to the config
8909
    del self.remove_locks[locking.LEVEL_INSTANCE]
8910

    
8911
    if self.op.mode == constants.INSTANCE_IMPORT:
8912
      # Release unused nodes
8913
      _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8914
    else:
8915
      # Release all nodes
8916
      _ReleaseLocks(self, locking.LEVEL_NODE)
8917

    
8918
    disk_abort = False
8919
    if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8920
      feedback_fn("* wiping instance disks...")
8921
      try:
8922
        _WipeDisks(self, iobj)
8923
      except errors.OpExecError, err:
8924
        logging.exception("Wiping disks failed")
8925
        self.LogWarning("Wiping instance disks failed (%s)", err)
8926
        disk_abort = True
8927

    
8928
    if disk_abort:
8929
      # Something is already wrong with the disks, don't do anything else
8930
      pass
8931
    elif self.op.wait_for_sync:
8932
      disk_abort = not _WaitForSync(self, iobj)
8933
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8934
      # make sure the disks are not degraded (still sync-ing is ok)
8935
      feedback_fn("* checking mirrors status")
8936
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8937
    else:
8938
      disk_abort = False
8939

    
8940
    if disk_abort:
8941
      _RemoveDisks(self, iobj)
8942
      self.cfg.RemoveInstance(iobj.name)
8943
      # Make sure the instance lock gets removed
8944
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8945
      raise errors.OpExecError("There are some degraded disks for"
8946
                               " this instance")
8947

    
8948
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8949
      if self.op.mode == constants.INSTANCE_CREATE:
8950
        if not self.op.no_install:
8951
          pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8952
                        not self.op.wait_for_sync)
8953
          if pause_sync:
8954
            feedback_fn("* pausing disk sync to install instance OS")
8955
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8956
                                                              iobj.disks, True)
8957
            for idx, success in enumerate(result.payload):
8958
              if not success:
8959
                logging.warn("pause-sync of instance %s for disk %d failed",
8960
                             instance, idx)
8961

    
8962
          feedback_fn("* running the instance OS create scripts...")
8963
          # FIXME: pass debug option from opcode to backend
8964
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8965
                                                 self.op.debug_level)
8966
          if pause_sync:
8967
            feedback_fn("* resuming disk sync")
8968
            result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8969
                                                              iobj.disks, False)
8970
            for idx, success in enumerate(result.payload):
8971
              if not success:
8972
                logging.warn("resume-sync of instance %s for disk %d failed",
8973
                             instance, idx)
8974

    
8975
          result.Raise("Could not add os for instance %s"
8976
                       " on node %s" % (instance, pnode_name))
8977

    
8978
      elif self.op.mode == constants.INSTANCE_IMPORT:
8979
        feedback_fn("* running the instance OS import scripts...")
8980

    
8981
        transfers = []
8982

    
8983
        for idx, image in enumerate(self.src_images):
8984
          if not image:
8985
            continue
8986

    
8987
          # FIXME: pass debug option from opcode to backend
8988
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8989
                                             constants.IEIO_FILE, (image, ),
8990
                                             constants.IEIO_SCRIPT,
8991
                                             (iobj.disks[idx], idx),
8992
                                             None)
8993
          transfers.append(dt)
8994

    
8995
        import_result = \
8996
          masterd.instance.TransferInstanceData(self, feedback_fn,
8997
                                                self.op.src_node, pnode_name,
8998
                                                self.pnode.secondary_ip,
8999
                                                iobj, transfers)
9000
        if not compat.all(import_result):
9001
          self.LogWarning("Some disks for instance %s on node %s were not"
9002
                          " imported successfully" % (instance, pnode_name))
9003

    
9004
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
9005
        feedback_fn("* preparing remote import...")
9006
        # The source cluster will stop the instance before attempting to make a
9007
        # connection. In some cases stopping an instance can take a long time,
9008
        # hence the shutdown timeout is added to the connection timeout.
9009
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
9010
                           self.op.source_shutdown_timeout)
9011
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9012

    
9013
        assert iobj.primary_node == self.pnode.name
9014
        disk_results = \
9015
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
9016
                                        self.source_x509_ca,
9017
                                        self._cds, timeouts)
9018
        if not compat.all(disk_results):
9019
          # TODO: Should the instance still be started, even if some disks
9020
          # failed to import (valid for local imports, too)?
9021
          self.LogWarning("Some disks for instance %s on node %s were not"
9022
                          " imported successfully" % (instance, pnode_name))
9023

    
9024
        # Run rename script on newly imported instance
9025
        assert iobj.name == instance
9026
        feedback_fn("Running rename script for %s" % instance)
9027
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
9028
                                                   self.source_instance_name,
9029
                                                   self.op.debug_level)
9030
        if result.fail_msg:
9031
          self.LogWarning("Failed to run rename script for %s on node"
9032
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
9033

    
9034
      else:
9035
        # also checked in the prereq part
9036
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
9037
                                     % self.op.mode)
9038

    
9039
    if self.op.start:
9040
      iobj.admin_up = True
9041
      self.cfg.Update(iobj, feedback_fn)
9042
      logging.info("Starting instance %s on node %s", instance, pnode_name)
9043
      feedback_fn("* starting instance...")
9044
      result = self.rpc.call_instance_start(pnode_name, iobj,
9045
                                            None, None, False)
9046
      result.Raise("Could not start instance")
9047

    
9048
    return list(iobj.all_nodes)
9049

    
9050

    
9051
class LUInstanceConsole(NoHooksLU):
9052
  """Connect to an instance's console.
9053

9054
  This is somewhat special in that it returns the command line that
9055
  you need to run on the master node in order to connect to the
9056
  console.
9057

9058
  """
9059
  REQ_BGL = False
9060

    
9061
  def ExpandNames(self):
9062
    self._ExpandAndLockInstance()
9063

    
9064
  def CheckPrereq(self):
9065
    """Check prerequisites.
9066

9067
    This checks that the instance is in the cluster.
9068

9069
    """
9070
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9071
    assert self.instance is not None, \
9072
      "Cannot retrieve locked instance %s" % self.op.instance_name
9073
    _CheckNodeOnline(self, self.instance.primary_node)
9074

    
9075
  def Exec(self, feedback_fn):
9076
    """Connect to the console of an instance
9077

9078
    """
9079
    instance = self.instance
9080
    node = instance.primary_node
9081

    
9082
    node_insts = self.rpc.call_instance_list([node],
9083
                                             [instance.hypervisor])[node]
9084
    node_insts.Raise("Can't get node information from %s" % node)
9085

    
9086
    if instance.name not in node_insts.payload:
9087
      if instance.admin_up:
9088
        state = constants.INSTST_ERRORDOWN
9089
      else:
9090
        state = constants.INSTST_ADMINDOWN
9091
      raise errors.OpExecError("Instance %s is not running (state %s)" %
9092
                               (instance.name, state))
9093

    
9094
    logging.debug("Connecting to console of %s on %s", instance.name, node)
9095

    
9096
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9097

    
9098

    
9099
def _GetInstanceConsole(cluster, instance):
9100
  """Returns console information for an instance.
9101

9102
  @type cluster: L{objects.Cluster}
9103
  @type instance: L{objects.Instance}
9104
  @rtype: dict
9105

9106
  """
9107
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
9108
  # beparams and hvparams are passed separately, to avoid editing the
9109
  # instance and then saving the defaults in the instance itself.
9110
  hvparams = cluster.FillHV(instance)
9111
  beparams = cluster.FillBE(instance)
9112
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9113

    
9114
  assert console.instance == instance.name
9115
  assert console.Validate()
9116

    
9117
  return console.ToDict()
9118

    
9119

    
9120
class LUInstanceReplaceDisks(LogicalUnit):
9121
  """Replace the disks of an instance.
9122

9123
  """
9124
  HPATH = "mirrors-replace"
9125
  HTYPE = constants.HTYPE_INSTANCE
9126
  REQ_BGL = False
9127

    
9128
  def CheckArguments(self):
9129
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9130
                                  self.op.iallocator)
9131

    
9132
  def ExpandNames(self):
9133
    self._ExpandAndLockInstance()
9134

    
9135
    assert locking.LEVEL_NODE not in self.needed_locks
9136
    assert locking.LEVEL_NODEGROUP not in self.needed_locks
9137

    
9138
    assert self.op.iallocator is None or self.op.remote_node is None, \
9139
      "Conflicting options"
9140

    
9141
    if self.op.remote_node is not None:
9142
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9143

    
9144
      # Warning: do not remove the locking of the new secondary here
9145
      # unless DRBD8.AddChildren is changed to work in parallel;
9146
      # currently it doesn't since parallel invocations of
9147
      # FindUnusedMinor will conflict
9148
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9149
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9150
    else:
9151
      self.needed_locks[locking.LEVEL_NODE] = []
9152
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9153

    
9154
      if self.op.iallocator is not None:
9155
        # iallocator will select a new node in the same group
9156
        self.needed_locks[locking.LEVEL_NODEGROUP] = []
9157

    
9158
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9159
                                   self.op.iallocator, self.op.remote_node,
9160
                                   self.op.disks, False, self.op.early_release)
9161

    
9162
    self.tasklets = [self.replacer]
9163

    
9164
  def DeclareLocks(self, level):
9165
    if level == locking.LEVEL_NODEGROUP:
9166
      assert self.op.remote_node is None
9167
      assert self.op.iallocator is not None
9168
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9169

    
9170
      self.share_locks[locking.LEVEL_NODEGROUP] = 1
9171
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
9172
        self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9173

    
9174
    elif level == locking.LEVEL_NODE:
9175
      if self.op.iallocator is not None:
9176
        assert self.op.remote_node is None
9177
        assert not self.needed_locks[locking.LEVEL_NODE]
9178

    
9179
        # Lock member nodes of all locked groups
9180
        self.needed_locks[locking.LEVEL_NODE] = [node_name
9181
          for group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
9182
          for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9183
      else:
9184
        self._LockInstancesNodes()
9185

    
9186
  def BuildHooksEnv(self):
9187
    """Build hooks env.
9188

9189
    This runs on the master, the primary and all the secondaries.
9190

9191
    """
9192
    instance = self.replacer.instance
9193
    env = {
9194
      "MODE": self.op.mode,
9195
      "NEW_SECONDARY": self.op.remote_node,
9196
      "OLD_SECONDARY": instance.secondary_nodes[0],
9197
      }
9198
    env.update(_BuildInstanceHookEnvByObject(self, instance))
9199
    return env
9200

    
9201
  def BuildHooksNodes(self):
9202
    """Build hooks nodes.
9203

9204
    """
9205
    instance = self.replacer.instance
9206
    nl = [
9207
      self.cfg.GetMasterNode(),
9208
      instance.primary_node,
9209
      ]
9210
    if self.op.remote_node is not None:
9211
      nl.append(self.op.remote_node)
9212
    return nl, nl
9213

    
9214
  def CheckPrereq(self):
9215
    """Check prerequisites.
9216

9217
    """
9218
    assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9219
            self.op.iallocator is None)
9220

    
9221
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
9222
    if owned_groups:
9223
      _CheckInstanceNodeGroups(self.cfg, self.op.instance_name, owned_groups)
9224

    
9225
    return LogicalUnit.CheckPrereq(self)
9226

    
9227

    
9228
class TLReplaceDisks(Tasklet):
9229
  """Replaces disks for an instance.
9230

9231
  Note: Locking is not within the scope of this class.
9232

9233
  """
9234
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9235
               disks, delay_iallocator, early_release):
9236
    """Initializes this class.
9237

9238
    """
9239
    Tasklet.__init__(self, lu)
9240

    
9241
    # Parameters
9242
    self.instance_name = instance_name
9243
    self.mode = mode
9244
    self.iallocator_name = iallocator_name
9245
    self.remote_node = remote_node
9246
    self.disks = disks
9247
    self.delay_iallocator = delay_iallocator
9248
    self.early_release = early_release
9249

    
9250
    # Runtime data
9251
    self.instance = None
9252
    self.new_node = None
9253
    self.target_node = None
9254
    self.other_node = None
9255
    self.remote_node_info = None
9256
    self.node_secondary_ip = None
9257

    
9258
  @staticmethod
9259
  def CheckArguments(mode, remote_node, iallocator):
9260
    """Helper function for users of this class.
9261

9262
    """
9263
    # check for valid parameter combination
9264
    if mode == constants.REPLACE_DISK_CHG:
9265
      if remote_node is None and iallocator is None:
9266
        raise errors.OpPrereqError("When changing the secondary either an"
9267
                                   " iallocator script must be used or the"
9268
                                   " new node given", errors.ECODE_INVAL)
9269

    
9270
      if remote_node is not None and iallocator is not None:
9271
        raise errors.OpPrereqError("Give either the iallocator or the new"
9272
                                   " secondary, not both", errors.ECODE_INVAL)
9273

    
9274
    elif remote_node is not None or iallocator is not None:
9275
      # Not replacing the secondary
9276
      raise errors.OpPrereqError("The iallocator and new node options can"
9277
                                 " only be used when changing the"
9278
                                 " secondary node", errors.ECODE_INVAL)
9279

    
9280
  @staticmethod
9281
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9282
    """Compute a new secondary node using an IAllocator.
9283

9284
    """
9285
    ial = IAllocator(lu.cfg, lu.rpc,
9286
                     mode=constants.IALLOCATOR_MODE_RELOC,
9287
                     name=instance_name,
9288
                     relocate_from=list(relocate_from))
9289

    
9290
    ial.Run(iallocator_name)
9291

    
9292
    if not ial.success:
9293
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9294
                                 " %s" % (iallocator_name, ial.info),
9295
                                 errors.ECODE_NORES)
9296

    
9297
    if len(ial.result) != ial.required_nodes:
9298
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9299
                                 " of nodes (%s), required %s" %
9300
                                 (iallocator_name,
9301
                                  len(ial.result), ial.required_nodes),
9302
                                 errors.ECODE_FAULT)
9303

    
9304
    remote_node_name = ial.result[0]
9305

    
9306
    lu.LogInfo("Selected new secondary for instance '%s': %s",
9307
               instance_name, remote_node_name)
9308

    
9309
    return remote_node_name
9310

    
9311
  def _FindFaultyDisks(self, node_name):
9312
    """Wrapper for L{_FindFaultyInstanceDisks}.
9313

9314
    """
9315
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9316
                                    node_name, True)
9317

    
9318
  def _CheckDisksActivated(self, instance):
9319
    """Checks if the instance disks are activated.
9320

9321
    @param instance: The instance to check disks
9322
    @return: True if they are activated, False otherwise
9323

9324
    """
9325
    nodes = instance.all_nodes
9326

    
9327
    for idx, dev in enumerate(instance.disks):
9328
      for node in nodes:
9329
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9330
        self.cfg.SetDiskID(dev, node)
9331

    
9332
        result = self.rpc.call_blockdev_find(node, dev)
9333

    
9334
        if result.offline:
9335
          continue
9336
        elif result.fail_msg or not result.payload:
9337
          return False
9338

    
9339
    return True
9340

    
9341
  def CheckPrereq(self):
9342
    """Check prerequisites.
9343

9344
    This checks that the instance is in the cluster.
9345

9346
    """
9347
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9348
    assert instance is not None, \
9349
      "Cannot retrieve locked instance %s" % self.instance_name
9350

    
9351
    if instance.disk_template != constants.DT_DRBD8:
9352
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9353
                                 " instances", errors.ECODE_INVAL)
9354

    
9355
    if len(instance.secondary_nodes) != 1:
9356
      raise errors.OpPrereqError("The instance has a strange layout,"
9357
                                 " expected one secondary but found %d" %
9358
                                 len(instance.secondary_nodes),
9359
                                 errors.ECODE_FAULT)
9360

    
9361
    if not self.delay_iallocator:
9362
      self._CheckPrereq2()
9363

    
9364
  def _CheckPrereq2(self):
9365
    """Check prerequisites, second part.
9366

9367
    This function should always be part of CheckPrereq. It was separated and is
9368
    now called from Exec because during node evacuation iallocator was only
9369
    called with an unmodified cluster model, not taking planned changes into
9370
    account.
9371

9372
    """
9373
    instance = self.instance
9374
    secondary_node = instance.secondary_nodes[0]
9375

    
9376
    if self.iallocator_name is None:
9377
      remote_node = self.remote_node
9378
    else:
9379
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9380
                                       instance.name, instance.secondary_nodes)
9381

    
9382
    if remote_node is None:
9383
      self.remote_node_info = None
9384
    else:
9385
      assert remote_node in self.lu.owned_locks(locking.LEVEL_NODE), \
9386
             "Remote node '%s' is not locked" % remote_node
9387

    
9388
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9389
      assert self.remote_node_info is not None, \
9390
        "Cannot retrieve locked node %s" % remote_node
9391

    
9392
    if remote_node == self.instance.primary_node:
9393
      raise errors.OpPrereqError("The specified node is the primary node of"
9394
                                 " the instance", errors.ECODE_INVAL)
9395

    
9396
    if remote_node == secondary_node:
9397
      raise errors.OpPrereqError("The specified node is already the"
9398
                                 " secondary node of the instance",
9399
                                 errors.ECODE_INVAL)
9400

    
9401
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9402
                                    constants.REPLACE_DISK_CHG):
9403
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
9404
                                 errors.ECODE_INVAL)
9405

    
9406
    if self.mode == constants.REPLACE_DISK_AUTO:
9407
      if not self._CheckDisksActivated(instance):
9408
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
9409
                                   " first" % self.instance_name,
9410
                                   errors.ECODE_STATE)
9411
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
9412
      faulty_secondary = self._FindFaultyDisks(secondary_node)
9413

    
9414
      if faulty_primary and faulty_secondary:
9415
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9416
                                   " one node and can not be repaired"
9417
                                   " automatically" % self.instance_name,
9418
                                   errors.ECODE_STATE)
9419

    
9420
      if faulty_primary:
9421
        self.disks = faulty_primary
9422
        self.target_node = instance.primary_node
9423
        self.other_node = secondary_node
9424
        check_nodes = [self.target_node, self.other_node]
9425
      elif faulty_secondary:
9426
        self.disks = faulty_secondary
9427
        self.target_node = secondary_node
9428
        self.other_node = instance.primary_node
9429
        check_nodes = [self.target_node, self.other_node]
9430
      else:
9431
        self.disks = []
9432
        check_nodes = []
9433

    
9434
    else:
9435
      # Non-automatic modes
9436
      if self.mode == constants.REPLACE_DISK_PRI:
9437
        self.target_node = instance.primary_node
9438
        self.other_node = secondary_node
9439
        check_nodes = [self.target_node, self.other_node]
9440

    
9441
      elif self.mode == constants.REPLACE_DISK_SEC:
9442
        self.target_node = secondary_node
9443
        self.other_node = instance.primary_node
9444
        check_nodes = [self.target_node, self.other_node]
9445

    
9446
      elif self.mode == constants.REPLACE_DISK_CHG:
9447
        self.new_node = remote_node
9448
        self.other_node = instance.primary_node
9449
        self.target_node = secondary_node
9450
        check_nodes = [self.new_node, self.other_node]
9451

    
9452
        _CheckNodeNotDrained(self.lu, remote_node)
9453
        _CheckNodeVmCapable(self.lu, remote_node)
9454

    
9455
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
9456
        assert old_node_info is not None
9457
        if old_node_info.offline and not self.early_release:
9458
          # doesn't make sense to delay the release
9459
          self.early_release = True
9460
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9461
                          " early-release mode", secondary_node)
9462

    
9463
      else:
9464
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9465
                                     self.mode)
9466

    
9467
      # If not specified all disks should be replaced
9468
      if not self.disks:
9469
        self.disks = range(len(self.instance.disks))
9470

    
9471
    for node in check_nodes:
9472
      _CheckNodeOnline(self.lu, node)
9473

    
9474
    touched_nodes = frozenset(node_name for node_name in [self.new_node,
9475
                                                          self.other_node,
9476
                                                          self.target_node]
9477
                              if node_name is not None)
9478

    
9479
    # Release unneeded node locks
9480
    _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9481

    
9482
    # Release any owned node group
9483
    if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9484
      _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9485

    
9486
    # Check whether disks are valid
9487
    for disk_idx in self.disks:
9488
      instance.FindDisk(disk_idx)
9489

    
9490
    # Get secondary node IP addresses
9491
    self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9492
                                  in self.cfg.GetMultiNodeInfo(touched_nodes))
9493

    
9494
  def Exec(self, feedback_fn):
9495
    """Execute disk replacement.
9496

9497
    This dispatches the disk replacement to the appropriate handler.
9498

9499
    """
9500
    if self.delay_iallocator:
9501
      self._CheckPrereq2()
9502

    
9503
    if __debug__:
9504
      # Verify owned locks before starting operation
9505
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9506
      assert set(owned_nodes) == set(self.node_secondary_ip), \
9507
          ("Incorrect node locks, owning %s, expected %s" %
9508
           (owned_nodes, self.node_secondary_ip.keys()))
9509

    
9510
      owned_instances = self.lu.owned_locks(locking.LEVEL_INSTANCE)
9511
      assert list(owned_instances) == [self.instance_name], \
9512
          "Instance '%s' not locked" % self.instance_name
9513

    
9514
      assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9515
          "Should not own any node group lock at this point"
9516

    
9517
    if not self.disks:
9518
      feedback_fn("No disks need replacement")
9519
      return
9520

    
9521
    feedback_fn("Replacing disk(s) %s for %s" %
9522
                (utils.CommaJoin(self.disks), self.instance.name))
9523

    
9524
    activate_disks = (not self.instance.admin_up)
9525

    
9526
    # Activate the instance disks if we're replacing them on a down instance
9527
    if activate_disks:
9528
      _StartInstanceDisks(self.lu, self.instance, True)
9529

    
9530
    try:
9531
      # Should we replace the secondary node?
9532
      if self.new_node is not None:
9533
        fn = self._ExecDrbd8Secondary
9534
      else:
9535
        fn = self._ExecDrbd8DiskOnly
9536

    
9537
      result = fn(feedback_fn)
9538
    finally:
9539
      # Deactivate the instance disks if we're replacing them on a
9540
      # down instance
9541
      if activate_disks:
9542
        _SafeShutdownInstanceDisks(self.lu, self.instance)
9543

    
9544
    if __debug__:
9545
      # Verify owned locks
9546
      owned_nodes = self.lu.owned_locks(locking.LEVEL_NODE)
9547
      nodes = frozenset(self.node_secondary_ip)
9548
      assert ((self.early_release and not owned_nodes) or
9549
              (not self.early_release and not (set(owned_nodes) - nodes))), \
9550
        ("Not owning the correct locks, early_release=%s, owned=%r,"
9551
         " nodes=%r" % (self.early_release, owned_nodes, nodes))
9552

    
9553
    return result
9554

    
9555
  def _CheckVolumeGroup(self, nodes):
9556
    self.lu.LogInfo("Checking volume groups")
9557

    
9558
    vgname = self.cfg.GetVGName()
9559

    
9560
    # Make sure volume group exists on all involved nodes
9561
    results = self.rpc.call_vg_list(nodes)
9562
    if not results:
9563
      raise errors.OpExecError("Can't list volume groups on the nodes")
9564

    
9565
    for node in nodes:
9566
      res = results[node]
9567
      res.Raise("Error checking node %s" % node)
9568
      if vgname not in res.payload:
9569
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
9570
                                 (vgname, node))
9571

    
9572
  def _CheckDisksExistence(self, nodes):
9573
    # Check disk existence
9574
    for idx, dev in enumerate(self.instance.disks):
9575
      if idx not in self.disks:
9576
        continue
9577

    
9578
      for node in nodes:
9579
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9580
        self.cfg.SetDiskID(dev, node)
9581

    
9582
        result = self.rpc.call_blockdev_find(node, dev)
9583

    
9584
        msg = result.fail_msg
9585
        if msg or not result.payload:
9586
          if not msg:
9587
            msg = "disk not found"
9588
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9589
                                   (idx, node, msg))
9590

    
9591
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9592
    for idx, dev in enumerate(self.instance.disks):
9593
      if idx not in self.disks:
9594
        continue
9595

    
9596
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9597
                      (idx, node_name))
9598

    
9599
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9600
                                   ldisk=ldisk):
9601
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9602
                                 " replace disks for instance %s" %
9603
                                 (node_name, self.instance.name))
9604

    
9605
  def _CreateNewStorage(self, node_name):
9606
    """Create new storage on the primary or secondary node.
9607

9608
    This is only used for same-node replaces, not for changing the
9609
    secondary node, hence we don't want to modify the existing disk.
9610

9611
    """
9612
    iv_names = {}
9613

    
9614
    for idx, dev in enumerate(self.instance.disks):
9615
      if idx not in self.disks:
9616
        continue
9617

    
9618
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9619

    
9620
      self.cfg.SetDiskID(dev, node_name)
9621

    
9622
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9623
      names = _GenerateUniqueNames(self.lu, lv_names)
9624

    
9625
      vg_data = dev.children[0].logical_id[0]
9626
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9627
                             logical_id=(vg_data, names[0]))
9628
      vg_meta = dev.children[1].logical_id[0]
9629
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9630
                             logical_id=(vg_meta, names[1]))
9631

    
9632
      new_lvs = [lv_data, lv_meta]
9633
      old_lvs = [child.Copy() for child in dev.children]
9634
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9635

    
9636
      # we pass force_create=True to force the LVM creation
9637
      for new_lv in new_lvs:
9638
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9639
                        _GetInstanceInfoText(self.instance), False)
9640

    
9641
    return iv_names
9642

    
9643
  def _CheckDevices(self, node_name, iv_names):
9644
    for name, (dev, _, _) in iv_names.iteritems():
9645
      self.cfg.SetDiskID(dev, node_name)
9646

    
9647
      result = self.rpc.call_blockdev_find(node_name, dev)
9648

    
9649
      msg = result.fail_msg
9650
      if msg or not result.payload:
9651
        if not msg:
9652
          msg = "disk not found"
9653
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
9654
                                 (name, msg))
9655

    
9656
      if result.payload.is_degraded:
9657
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
9658

    
9659
  def _RemoveOldStorage(self, node_name, iv_names):
9660
    for name, (_, old_lvs, _) in iv_names.iteritems():
9661
      self.lu.LogInfo("Remove logical volumes for %s" % name)
9662

    
9663
      for lv in old_lvs:
9664
        self.cfg.SetDiskID(lv, node_name)
9665

    
9666
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9667
        if msg:
9668
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
9669
                             hint="remove unused LVs manually")
9670

    
9671
  def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9672
    """Replace a disk on the primary or secondary for DRBD 8.
9673

9674
    The algorithm for replace is quite complicated:
9675

9676
      1. for each disk to be replaced:
9677

9678
        1. create new LVs on the target node with unique names
9679
        1. detach old LVs from the drbd device
9680
        1. rename old LVs to name_replaced.<time_t>
9681
        1. rename new LVs to old LVs
9682
        1. attach the new LVs (with the old names now) to the drbd device
9683

9684
      1. wait for sync across all devices
9685

9686
      1. for each modified disk:
9687

9688
        1. remove old LVs (which have the name name_replaces.<time_t>)
9689

9690
    Failures are not very well handled.
9691

9692
    """
9693
    steps_total = 6
9694

    
9695
    # Step: check device activation
9696
    self.lu.LogStep(1, steps_total, "Check device existence")
9697
    self._CheckDisksExistence([self.other_node, self.target_node])
9698
    self._CheckVolumeGroup([self.target_node, self.other_node])
9699

    
9700
    # Step: check other node consistency
9701
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9702
    self._CheckDisksConsistency(self.other_node,
9703
                                self.other_node == self.instance.primary_node,
9704
                                False)
9705

    
9706
    # Step: create new storage
9707
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9708
    iv_names = self._CreateNewStorage(self.target_node)
9709

    
9710
    # Step: for each lv, detach+rename*2+attach
9711
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9712
    for dev, old_lvs, new_lvs in iv_names.itervalues():
9713
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9714

    
9715
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9716
                                                     old_lvs)
9717
      result.Raise("Can't detach drbd from local storage on node"
9718
                   " %s for device %s" % (self.target_node, dev.iv_name))
9719
      #dev.children = []
9720
      #cfg.Update(instance)
9721

    
9722
      # ok, we created the new LVs, so now we know we have the needed
9723
      # storage; as such, we proceed on the target node to rename
9724
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9725
      # using the assumption that logical_id == physical_id (which in
9726
      # turn is the unique_id on that node)
9727

    
9728
      # FIXME(iustin): use a better name for the replaced LVs
9729
      temp_suffix = int(time.time())
9730
      ren_fn = lambda d, suff: (d.physical_id[0],
9731
                                d.physical_id[1] + "_replaced-%s" % suff)
9732

    
9733
      # Build the rename list based on what LVs exist on the node
9734
      rename_old_to_new = []
9735
      for to_ren in old_lvs:
9736
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9737
        if not result.fail_msg and result.payload:
9738
          # device exists
9739
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9740

    
9741
      self.lu.LogInfo("Renaming the old LVs on the target node")
9742
      result = self.rpc.call_blockdev_rename(self.target_node,
9743
                                             rename_old_to_new)
9744
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
9745

    
9746
      # Now we rename the new LVs to the old LVs
9747
      self.lu.LogInfo("Renaming the new LVs on the target node")
9748
      rename_new_to_old = [(new, old.physical_id)
9749
                           for old, new in zip(old_lvs, new_lvs)]
9750
      result = self.rpc.call_blockdev_rename(self.target_node,
9751
                                             rename_new_to_old)
9752
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
9753

    
9754
      # Intermediate steps of in memory modifications
9755
      for old, new in zip(old_lvs, new_lvs):
9756
        new.logical_id = old.logical_id
9757
        self.cfg.SetDiskID(new, self.target_node)
9758

    
9759
      # We need to modify old_lvs so that removal later removes the
9760
      # right LVs, not the newly added ones; note that old_lvs is a
9761
      # copy here
9762
      for disk in old_lvs:
9763
        disk.logical_id = ren_fn(disk, temp_suffix)
9764
        self.cfg.SetDiskID(disk, self.target_node)
9765

    
9766
      # Now that the new lvs have the old name, we can add them to the device
9767
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9768
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9769
                                                  new_lvs)
9770
      msg = result.fail_msg
9771
      if msg:
9772
        for new_lv in new_lvs:
9773
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
9774
                                               new_lv).fail_msg
9775
          if msg2:
9776
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9777
                               hint=("cleanup manually the unused logical"
9778
                                     "volumes"))
9779
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9780

    
9781
    cstep = 5
9782
    if self.early_release:
9783
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9784
      cstep += 1
9785
      self._RemoveOldStorage(self.target_node, iv_names)
9786
      # WARNING: we release both node locks here, do not do other RPCs
9787
      # than WaitForSync to the primary node
9788
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9789
                    names=[self.target_node, self.other_node])
9790

    
9791
    # Wait for sync
9792
    # This can fail as the old devices are degraded and _WaitForSync
9793
    # does a combined result over all disks, so we don't check its return value
9794
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9795
    cstep += 1
9796
    _WaitForSync(self.lu, self.instance)
9797

    
9798
    # Check all devices manually
9799
    self._CheckDevices(self.instance.primary_node, iv_names)
9800

    
9801
    # Step: remove old storage
9802
    if not self.early_release:
9803
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9804
      cstep += 1
9805
      self._RemoveOldStorage(self.target_node, iv_names)
9806

    
9807
  def _ExecDrbd8Secondary(self, feedback_fn):
9808
    """Replace the secondary node for DRBD 8.
9809

9810
    The algorithm for replace is quite complicated:
9811
      - for all disks of the instance:
9812
        - create new LVs on the new node with same names
9813
        - shutdown the drbd device on the old secondary
9814
        - disconnect the drbd network on the primary
9815
        - create the drbd device on the new secondary
9816
        - network attach the drbd on the primary, using an artifice:
9817
          the drbd code for Attach() will connect to the network if it
9818
          finds a device which is connected to the good local disks but
9819
          not network enabled
9820
      - wait for sync across all devices
9821
      - remove all disks from the old secondary
9822

9823
    Failures are not very well handled.
9824

9825
    """
9826
    steps_total = 6
9827

    
9828
    # Step: check device activation
9829
    self.lu.LogStep(1, steps_total, "Check device existence")
9830
    self._CheckDisksExistence([self.instance.primary_node])
9831
    self._CheckVolumeGroup([self.instance.primary_node])
9832

    
9833
    # Step: check other node consistency
9834
    self.lu.LogStep(2, steps_total, "Check peer consistency")
9835
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
9836

    
9837
    # Step: create new storage
9838
    self.lu.LogStep(3, steps_total, "Allocate new storage")
9839
    for idx, dev in enumerate(self.instance.disks):
9840
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9841
                      (self.new_node, idx))
9842
      # we pass force_create=True to force LVM creation
9843
      for new_lv in dev.children:
9844
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9845
                        _GetInstanceInfoText(self.instance), False)
9846

    
9847
    # Step 4: dbrd minors and drbd setups changes
9848
    # after this, we must manually remove the drbd minors on both the
9849
    # error and the success paths
9850
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9851
    minors = self.cfg.AllocateDRBDMinor([self.new_node
9852
                                         for dev in self.instance.disks],
9853
                                        self.instance.name)
9854
    logging.debug("Allocated minors %r", minors)
9855

    
9856
    iv_names = {}
9857
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9858
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9859
                      (self.new_node, idx))
9860
      # create new devices on new_node; note that we create two IDs:
9861
      # one without port, so the drbd will be activated without
9862
      # networking information on the new node at this stage, and one
9863
      # with network, for the latter activation in step 4
9864
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9865
      if self.instance.primary_node == o_node1:
9866
        p_minor = o_minor1
9867
      else:
9868
        assert self.instance.primary_node == o_node2, "Three-node instance?"
9869
        p_minor = o_minor2
9870

    
9871
      new_alone_id = (self.instance.primary_node, self.new_node, None,
9872
                      p_minor, new_minor, o_secret)
9873
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
9874
                    p_minor, new_minor, o_secret)
9875

    
9876
      iv_names[idx] = (dev, dev.children, new_net_id)
9877
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9878
                    new_net_id)
9879
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9880
                              logical_id=new_alone_id,
9881
                              children=dev.children,
9882
                              size=dev.size)
9883
      try:
9884
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9885
                              _GetInstanceInfoText(self.instance), False)
9886
      except errors.GenericError:
9887
        self.cfg.ReleaseDRBDMinors(self.instance.name)
9888
        raise
9889

    
9890
    # We have new devices, shutdown the drbd on the old secondary
9891
    for idx, dev in enumerate(self.instance.disks):
9892
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9893
      self.cfg.SetDiskID(dev, self.target_node)
9894
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9895
      if msg:
9896
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9897
                           "node: %s" % (idx, msg),
9898
                           hint=("Please cleanup this device manually as"
9899
                                 " soon as possible"))
9900

    
9901
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9902
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9903
                                               self.node_secondary_ip,
9904
                                               self.instance.disks)\
9905
                                              [self.instance.primary_node]
9906

    
9907
    msg = result.fail_msg
9908
    if msg:
9909
      # detaches didn't succeed (unlikely)
9910
      self.cfg.ReleaseDRBDMinors(self.instance.name)
9911
      raise errors.OpExecError("Can't detach the disks from the network on"
9912
                               " old node: %s" % (msg,))
9913

    
9914
    # if we managed to detach at least one, we update all the disks of
9915
    # the instance to point to the new secondary
9916
    self.lu.LogInfo("Updating instance configuration")
9917
    for dev, _, new_logical_id in iv_names.itervalues():
9918
      dev.logical_id = new_logical_id
9919
      self.cfg.SetDiskID(dev, self.instance.primary_node)
9920

    
9921
    self.cfg.Update(self.instance, feedback_fn)
9922

    
9923
    # and now perform the drbd attach
9924
    self.lu.LogInfo("Attaching primary drbds to new secondary"
9925
                    " (standalone => connected)")
9926
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9927
                                            self.new_node],
9928
                                           self.node_secondary_ip,
9929
                                           self.instance.disks,
9930
                                           self.instance.name,
9931
                                           False)
9932
    for to_node, to_result in result.items():
9933
      msg = to_result.fail_msg
9934
      if msg:
9935
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9936
                           to_node, msg,
9937
                           hint=("please do a gnt-instance info to see the"
9938
                                 " status of disks"))
9939
    cstep = 5
9940
    if self.early_release:
9941
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9942
      cstep += 1
9943
      self._RemoveOldStorage(self.target_node, iv_names)
9944
      # WARNING: we release all node locks here, do not do other RPCs
9945
      # than WaitForSync to the primary node
9946
      _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9947
                    names=[self.instance.primary_node,
9948
                           self.target_node,
9949
                           self.new_node])
9950

    
9951
    # Wait for sync
9952
    # This can fail as the old devices are degraded and _WaitForSync
9953
    # does a combined result over all disks, so we don't check its return value
9954
    self.lu.LogStep(cstep, steps_total, "Sync devices")
9955
    cstep += 1
9956
    _WaitForSync(self.lu, self.instance)
9957

    
9958
    # Check all devices manually
9959
    self._CheckDevices(self.instance.primary_node, iv_names)
9960

    
9961
    # Step: remove old storage
9962
    if not self.early_release:
9963
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
9964
      self._RemoveOldStorage(self.target_node, iv_names)
9965

    
9966

    
9967
class LURepairNodeStorage(NoHooksLU):
9968
  """Repairs the volume group on a node.
9969

9970
  """
9971
  REQ_BGL = False
9972

    
9973
  def CheckArguments(self):
9974
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9975

    
9976
    storage_type = self.op.storage_type
9977

    
9978
    if (constants.SO_FIX_CONSISTENCY not in
9979
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9980
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
9981
                                 " repaired" % storage_type,
9982
                                 errors.ECODE_INVAL)
9983

    
9984
  def ExpandNames(self):
9985
    self.needed_locks = {
9986
      locking.LEVEL_NODE: [self.op.node_name],
9987
      }
9988

    
9989
  def _CheckFaultyDisks(self, instance, node_name):
9990
    """Ensure faulty disks abort the opcode or at least warn."""
9991
    try:
9992
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9993
                                  node_name, True):
9994
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9995
                                   " node '%s'" % (instance.name, node_name),
9996
                                   errors.ECODE_STATE)
9997
    except errors.OpPrereqError, err:
9998
      if self.op.ignore_consistency:
9999
        self.proc.LogWarning(str(err.args[0]))
10000
      else:
10001
        raise
10002

    
10003
  def CheckPrereq(self):
10004
    """Check prerequisites.
10005

10006
    """
10007
    # Check whether any instance on this node has faulty disks
10008
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
10009
      if not inst.admin_up:
10010
        continue
10011
      check_nodes = set(inst.all_nodes)
10012
      check_nodes.discard(self.op.node_name)
10013
      for inst_node_name in check_nodes:
10014
        self._CheckFaultyDisks(inst, inst_node_name)
10015

    
10016
  def Exec(self, feedback_fn):
10017
    feedback_fn("Repairing storage unit '%s' on %s ..." %
10018
                (self.op.name, self.op.node_name))
10019

    
10020
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
10021
    result = self.rpc.call_storage_execute(self.op.node_name,
10022
                                           self.op.storage_type, st_args,
10023
                                           self.op.name,
10024
                                           constants.SO_FIX_CONSISTENCY)
10025
    result.Raise("Failed to repair storage unit '%s' on %s" %
10026
                 (self.op.name, self.op.node_name))
10027

    
10028

    
10029
class LUNodeEvacuate(NoHooksLU):
10030
  """Evacuates instances off a list of nodes.
10031

10032
  """
10033
  REQ_BGL = False
10034

    
10035
  def CheckArguments(self):
10036
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
10037

    
10038
  def ExpandNames(self):
10039
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
10040

    
10041
    if self.op.remote_node is not None:
10042
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10043
      assert self.op.remote_node
10044

    
10045
      if self.op.remote_node == self.op.node_name:
10046
        raise errors.OpPrereqError("Can not use evacuated node as a new"
10047
                                   " secondary node", errors.ECODE_INVAL)
10048

    
10049
      if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
10050
        raise errors.OpPrereqError("Without the use of an iallocator only"
10051
                                   " secondary instances can be evacuated",
10052
                                   errors.ECODE_INVAL)
10053

    
10054
    # Declare locks
10055
    self.share_locks = _ShareAll()
10056
    self.needed_locks = {
10057
      locking.LEVEL_INSTANCE: [],
10058
      locking.LEVEL_NODEGROUP: [],
10059
      locking.LEVEL_NODE: [],
10060
      }
10061

    
10062
    if self.op.remote_node is None:
10063
      # Iallocator will choose any node(s) in the same group
10064
      group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10065
    else:
10066
      group_nodes = frozenset([self.op.remote_node])
10067

    
10068
    # Determine nodes to be locked
10069
    self.lock_nodes = set([self.op.node_name]) | group_nodes
10070

    
10071
  def _DetermineInstances(self):
10072
    """Builds list of instances to operate on.
10073

10074
    """
10075
    assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10076

    
10077
    if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10078
      # Primary instances only
10079
      inst_fn = _GetNodePrimaryInstances
10080
      assert self.op.remote_node is None, \
10081
        "Evacuating primary instances requires iallocator"
10082
    elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10083
      # Secondary instances only
10084
      inst_fn = _GetNodeSecondaryInstances
10085
    else:
10086
      # All instances
10087
      assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10088
      inst_fn = _GetNodeInstances
10089

    
10090
    return inst_fn(self.cfg, self.op.node_name)
10091

    
10092
  def DeclareLocks(self, level):
10093
    if level == locking.LEVEL_INSTANCE:
10094
      # Lock instances optimistically, needs verification once node and group
10095
      # locks have been acquired
10096
      self.needed_locks[locking.LEVEL_INSTANCE] = \
10097
        set(i.name for i in self._DetermineInstances())
10098

    
10099
    elif level == locking.LEVEL_NODEGROUP:
10100
      # Lock node groups optimistically, needs verification once nodes have
10101
      # been acquired
10102
      self.needed_locks[locking.LEVEL_NODEGROUP] = \
10103
        self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10104

    
10105
    elif level == locking.LEVEL_NODE:
10106
      self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10107

    
10108
  def CheckPrereq(self):
10109
    # Verify locks
10110
    owned_instances = self.owned_locks(locking.LEVEL_INSTANCE)
10111
    owned_nodes = self.owned_locks(locking.LEVEL_NODE)
10112
    owned_groups = self.owned_locks(locking.LEVEL_NODEGROUP)
10113

    
10114
    assert owned_nodes == self.lock_nodes
10115

    
10116
    wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10117
    if owned_groups != wanted_groups:
10118
      raise errors.OpExecError("Node groups changed since locks were acquired,"
10119
                               " current groups are '%s', used to be '%s'" %
10120
                               (utils.CommaJoin(wanted_groups),
10121
                                utils.CommaJoin(owned_groups)))
10122

    
10123
    # Determine affected instances
10124
    self.instances = self._DetermineInstances()
10125
    self.instance_names = [i.name for i in self.instances]
10126

    
10127
    if set(self.instance_names) != owned_instances:
10128
      raise errors.OpExecError("Instances on node '%s' changed since locks"
10129
                               " were acquired, current instances are '%s',"
10130
                               " used to be '%s'" %
10131
                               (self.op.node_name,
10132
                                utils.CommaJoin(self.instance_names),
10133
                                utils.CommaJoin(owned_instances)))
10134

    
10135
    if self.instance_names:
10136
      self.LogInfo("Evacuating instances from node '%s': %s",
10137
                   self.op.node_name,
10138
                   utils.CommaJoin(utils.NiceSort(self.instance_names)))
10139
    else:
10140
      self.LogInfo("No instances to evacuate from node '%s'",
10141
                   self.op.node_name)
10142

    
10143
    if self.op.remote_node is not None:
10144
      for i in self.instances:
10145
        if i.primary_node == self.op.remote_node:
10146
          raise errors.OpPrereqError("Node %s is the primary node of"
10147
                                     " instance %s, cannot use it as"
10148
                                     " secondary" %
10149
                                     (self.op.remote_node, i.name),
10150
                                     errors.ECODE_INVAL)
10151

    
10152
  def Exec(self, feedback_fn):
10153
    assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10154

    
10155
    if not self.instance_names:
10156
      # No instances to evacuate
10157
      jobs = []
10158

    
10159
    elif self.op.iallocator is not None:
10160
      # TODO: Implement relocation to other group
10161
      ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10162
                       evac_mode=self.op.mode,
10163
                       instances=list(self.instance_names))
10164

    
10165
      ial.Run(self.op.iallocator)
10166

    
10167
      if not ial.success:
10168
        raise errors.OpPrereqError("Can't compute node evacuation using"
10169
                                   " iallocator '%s': %s" %
10170
                                   (self.op.iallocator, ial.info),
10171
                                   errors.ECODE_NORES)
10172

    
10173
      jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10174

    
10175
    elif self.op.remote_node is not None:
10176
      assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10177
      jobs = [
10178
        [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10179
                                        remote_node=self.op.remote_node,
10180
                                        disks=[],
10181
                                        mode=constants.REPLACE_DISK_CHG,
10182
                                        early_release=self.op.early_release)]
10183
        for instance_name in self.instance_names
10184
        ]
10185

    
10186
    else:
10187
      raise errors.ProgrammerError("No iallocator or remote node")
10188

    
10189
    return ResultWithJobs(jobs)
10190

    
10191

    
10192
def _SetOpEarlyRelease(early_release, op):
10193
  """Sets C{early_release} flag on opcodes if available.
10194

10195
  """
10196
  try:
10197
    op.early_release = early_release
10198
  except AttributeError:
10199
    assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10200

    
10201
  return op
10202

    
10203

    
10204
def _NodeEvacDest(use_nodes, group, nodes):
10205
  """Returns group or nodes depending on caller's choice.
10206

10207
  """
10208
  if use_nodes:
10209
    return utils.CommaJoin(nodes)
10210
  else:
10211
    return group
10212

    
10213

    
10214
def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10215
  """Unpacks the result of change-group and node-evacuate iallocator requests.
10216

10217
  Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10218
  L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10219

10220
  @type lu: L{LogicalUnit}
10221
  @param lu: Logical unit instance
10222
  @type alloc_result: tuple/list
10223
  @param alloc_result: Result from iallocator
10224
  @type early_release: bool
10225
  @param early_release: Whether to release locks early if possible
10226
  @type use_nodes: bool
10227
  @param use_nodes: Whether to display node names instead of groups
10228

10229
  """
10230
  (moved, failed, jobs) = alloc_result
10231

    
10232
  if failed:
10233
    lu.LogWarning("Unable to evacuate instances %s",
10234
                  utils.CommaJoin("%s (%s)" % (name, reason)
10235
                                  for (name, reason) in failed))
10236

    
10237
  if moved:
10238
    lu.LogInfo("Instances to be moved: %s",
10239
               utils.CommaJoin("%s (to %s)" %
10240
                               (name, _NodeEvacDest(use_nodes, group, nodes))
10241
                               for (name, group, nodes) in moved))
10242

    
10243
  return [map(compat.partial(_SetOpEarlyRelease, early_release),
10244
              map(opcodes.OpCode.LoadOpCode, ops))
10245
          for ops in jobs]
10246

    
10247

    
10248
class LUInstanceGrowDisk(LogicalUnit):
10249
  """Grow a disk of an instance.
10250

10251
  """
10252
  HPATH = "disk-grow"
10253
  HTYPE = constants.HTYPE_INSTANCE
10254
  REQ_BGL = False
10255

    
10256
  def ExpandNames(self):
10257
    self._ExpandAndLockInstance()
10258
    self.needed_locks[locking.LEVEL_NODE] = []
10259
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10260

    
10261
  def DeclareLocks(self, level):
10262
    if level == locking.LEVEL_NODE:
10263
      self._LockInstancesNodes()
10264

    
10265
  def BuildHooksEnv(self):
10266
    """Build hooks env.
10267

10268
    This runs on the master, the primary and all the secondaries.
10269

10270
    """
10271
    env = {
10272
      "DISK": self.op.disk,
10273
      "AMOUNT": self.op.amount,
10274
      }
10275
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10276
    return env
10277

    
10278
  def BuildHooksNodes(self):
10279
    """Build hooks nodes.
10280

10281
    """
10282
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10283
    return (nl, nl)
10284

    
10285
  def CheckPrereq(self):
10286
    """Check prerequisites.
10287

10288
    This checks that the instance is in the cluster.
10289

10290
    """
10291
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10292
    assert instance is not None, \
10293
      "Cannot retrieve locked instance %s" % self.op.instance_name
10294
    nodenames = list(instance.all_nodes)
10295
    for node in nodenames:
10296
      _CheckNodeOnline(self, node)
10297

    
10298
    self.instance = instance
10299

    
10300
    if instance.disk_template not in constants.DTS_GROWABLE:
10301
      raise errors.OpPrereqError("Instance's disk layout does not support"
10302
                                 " growing", errors.ECODE_INVAL)
10303

    
10304
    self.disk = instance.FindDisk(self.op.disk)
10305

    
10306
    if instance.disk_template not in (constants.DT_FILE,
10307
                                      constants.DT_SHARED_FILE):
10308
      # TODO: check the free disk space for file, when that feature will be
10309
      # supported
10310
      _CheckNodesFreeDiskPerVG(self, nodenames,
10311
                               self.disk.ComputeGrowth(self.op.amount))
10312

    
10313
  def Exec(self, feedback_fn):
10314
    """Execute disk grow.
10315

10316
    """
10317
    instance = self.instance
10318
    disk = self.disk
10319

    
10320
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10321
    if not disks_ok:
10322
      raise errors.OpExecError("Cannot activate block device to grow")
10323

    
10324
    # First run all grow ops in dry-run mode
10325
    for node in instance.all_nodes:
10326
      self.cfg.SetDiskID(disk, node)
10327
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10328
      result.Raise("Grow request failed to node %s" % node)
10329

    
10330
    # We know that (as far as we can test) operations across different
10331
    # nodes will succeed, time to run it for real
10332
    for node in instance.all_nodes:
10333
      self.cfg.SetDiskID(disk, node)
10334
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10335
      result.Raise("Grow request failed to node %s" % node)
10336

    
10337
      # TODO: Rewrite code to work properly
10338
      # DRBD goes into sync mode for a short amount of time after executing the
10339
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10340
      # calling "resize" in sync mode fails. Sleeping for a short amount of
10341
      # time is a work-around.
10342
      time.sleep(5)
10343

    
10344
    disk.RecordGrow(self.op.amount)
10345
    self.cfg.Update(instance, feedback_fn)
10346
    if self.op.wait_for_sync:
10347
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
10348
      if disk_abort:
10349
        self.proc.LogWarning("Disk sync-ing has not returned a good"
10350
                             " status; please check the instance")
10351
      if not instance.admin_up:
10352
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10353
    elif not instance.admin_up:
10354
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
10355
                           " not supposed to be running because no wait for"
10356
                           " sync mode was requested")
10357

    
10358

    
10359
class LUInstanceQueryData(NoHooksLU):
10360
  """Query runtime instance data.
10361

10362
  """
10363
  REQ_BGL = False
10364

    
10365
  def ExpandNames(self):
10366
    self.needed_locks = {}
10367

    
10368
    # Use locking if requested or when non-static information is wanted
10369
    if not (self.op.static or self.op.use_locking):
10370
      self.LogWarning("Non-static data requested, locks need to be acquired")
10371
      self.op.use_locking = True
10372

    
10373
    if self.op.instances or not self.op.use_locking:
10374
      # Expand instance names right here
10375
      self.wanted_names = _GetWantedInstances(self, self.op.instances)
10376
    else:
10377
      # Will use acquired locks
10378
      self.wanted_names = None
10379

    
10380
    if self.op.use_locking:
10381
      self.share_locks = _ShareAll()
10382

    
10383
      if self.wanted_names is None:
10384
        self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10385
      else:
10386
        self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10387

    
10388
      self.needed_locks[locking.LEVEL_NODE] = []
10389
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10390

    
10391
  def DeclareLocks(self, level):
10392
    if self.op.use_locking and level == locking.LEVEL_NODE:
10393
      self._LockInstancesNodes()
10394

    
10395
  def CheckPrereq(self):
10396
    """Check prerequisites.
10397

10398
    This only checks the optional instance list against the existing names.
10399

10400
    """
10401
    if self.wanted_names is None:
10402
      assert self.op.use_locking, "Locking was not used"
10403
      self.wanted_names = self.owned_locks(locking.LEVEL_INSTANCE)
10404

    
10405
    self.wanted_instances = \
10406
        map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10407

    
10408
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
10409
    """Returns the status of a block device
10410

10411
    """
10412
    if self.op.static or not node:
10413
      return None
10414

    
10415
    self.cfg.SetDiskID(dev, node)
10416

    
10417
    result = self.rpc.call_blockdev_find(node, dev)
10418
    if result.offline:
10419
      return None
10420

    
10421
    result.Raise("Can't compute disk status for %s" % instance_name)
10422

    
10423
    status = result.payload
10424
    if status is None:
10425
      return None
10426

    
10427
    return (status.dev_path, status.major, status.minor,
10428
            status.sync_percent, status.estimated_time,
10429
            status.is_degraded, status.ldisk_status)
10430

    
10431
  def _ComputeDiskStatus(self, instance, snode, dev):
10432
    """Compute block device status.
10433

10434
    """
10435
    if dev.dev_type in constants.LDS_DRBD:
10436
      # we change the snode then (otherwise we use the one passed in)
10437
      if dev.logical_id[0] == instance.primary_node:
10438
        snode = dev.logical_id[1]
10439
      else:
10440
        snode = dev.logical_id[0]
10441

    
10442
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10443
                                              instance.name, dev)
10444
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10445

    
10446
    if dev.children:
10447
      dev_children = map(compat.partial(self._ComputeDiskStatus,
10448
                                        instance, snode),
10449
                         dev.children)
10450
    else:
10451
      dev_children = []
10452

    
10453
    return {
10454
      "iv_name": dev.iv_name,
10455
      "dev_type": dev.dev_type,
10456
      "logical_id": dev.logical_id,
10457
      "physical_id": dev.physical_id,
10458
      "pstatus": dev_pstatus,
10459
      "sstatus": dev_sstatus,
10460
      "children": dev_children,
10461
      "mode": dev.mode,
10462
      "size": dev.size,
10463
      }
10464

    
10465
  def Exec(self, feedback_fn):
10466
    """Gather and return data"""
10467
    result = {}
10468

    
10469
    cluster = self.cfg.GetClusterInfo()
10470

    
10471
    pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10472
                                          for i in self.wanted_instances)
10473
    for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10474
      if self.op.static or pnode.offline:
10475
        remote_state = None
10476
        if pnode.offline:
10477
          self.LogWarning("Primary node %s is marked offline, returning static"
10478
                          " information only for instance %s" %
10479
                          (pnode.name, instance.name))
10480
      else:
10481
        remote_info = self.rpc.call_instance_info(instance.primary_node,
10482
                                                  instance.name,
10483
                                                  instance.hypervisor)
10484
        remote_info.Raise("Error checking node %s" % instance.primary_node)
10485
        remote_info = remote_info.payload
10486
        if remote_info and "state" in remote_info:
10487
          remote_state = "up"
10488
        else:
10489
          remote_state = "down"
10490

    
10491
      if instance.admin_up:
10492
        config_state = "up"
10493
      else:
10494
        config_state = "down"
10495

    
10496
      disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10497
                  instance.disks)
10498

    
10499
      result[instance.name] = {
10500
        "name": instance.name,
10501
        "config_state": config_state,
10502
        "run_state": remote_state,
10503
        "pnode": instance.primary_node,
10504
        "snodes": instance.secondary_nodes,
10505
        "os": instance.os,
10506
        # this happens to be the same format used for hooks
10507
        "nics": _NICListToTuple(self, instance.nics),
10508
        "disk_template": instance.disk_template,
10509
        "disks": disks,
10510
        "hypervisor": instance.hypervisor,
10511
        "network_port": instance.network_port,
10512
        "hv_instance": instance.hvparams,
10513
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
10514
        "be_instance": instance.beparams,
10515
        "be_actual": cluster.FillBE(instance),
10516
        "os_instance": instance.osparams,
10517
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10518
        "serial_no": instance.serial_no,
10519
        "mtime": instance.mtime,
10520
        "ctime": instance.ctime,
10521
        "uuid": instance.uuid,
10522
        }
10523

    
10524
    return result
10525

    
10526

    
10527
class LUInstanceSetParams(LogicalUnit):
10528
  """Modifies an instances's parameters.
10529

10530
  """
10531
  HPATH = "instance-modify"
10532
  HTYPE = constants.HTYPE_INSTANCE
10533
  REQ_BGL = False
10534

    
10535
  def CheckArguments(self):
10536
    if not (self.op.nics or self.op.disks or self.op.disk_template or
10537
            self.op.hvparams or self.op.beparams or self.op.os_name):
10538
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10539

    
10540
    if self.op.hvparams:
10541
      _CheckGlobalHvParams(self.op.hvparams)
10542

    
10543
    # Disk validation
10544
    disk_addremove = 0
10545
    for disk_op, disk_dict in self.op.disks:
10546
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10547
      if disk_op == constants.DDM_REMOVE:
10548
        disk_addremove += 1
10549
        continue
10550
      elif disk_op == constants.DDM_ADD:
10551
        disk_addremove += 1
10552
      else:
10553
        if not isinstance(disk_op, int):
10554
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10555
        if not isinstance(disk_dict, dict):
10556
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10557
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10558

    
10559
      if disk_op == constants.DDM_ADD:
10560
        mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10561
        if mode not in constants.DISK_ACCESS_SET:
10562
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10563
                                     errors.ECODE_INVAL)
10564
        size = disk_dict.get(constants.IDISK_SIZE, None)
10565
        if size is None:
10566
          raise errors.OpPrereqError("Required disk parameter size missing",
10567
                                     errors.ECODE_INVAL)
10568
        try:
10569
          size = int(size)
10570
        except (TypeError, ValueError), err:
10571
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10572
                                     str(err), errors.ECODE_INVAL)
10573
        disk_dict[constants.IDISK_SIZE] = size
10574
      else:
10575
        # modification of disk
10576
        if constants.IDISK_SIZE in disk_dict:
10577
          raise errors.OpPrereqError("Disk size change not possible, use"
10578
                                     " grow-disk", errors.ECODE_INVAL)
10579

    
10580
    if disk_addremove > 1:
10581
      raise errors.OpPrereqError("Only one disk add or remove operation"
10582
                                 " supported at a time", errors.ECODE_INVAL)
10583

    
10584
    if self.op.disks and self.op.disk_template is not None:
10585
      raise errors.OpPrereqError("Disk template conversion and other disk"
10586
                                 " changes not supported at the same time",
10587
                                 errors.ECODE_INVAL)
10588

    
10589
    if (self.op.disk_template and
10590
        self.op.disk_template in constants.DTS_INT_MIRROR and
10591
        self.op.remote_node is None):
10592
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
10593
                                 " one requires specifying a secondary node",
10594
                                 errors.ECODE_INVAL)
10595

    
10596
    # NIC validation
10597
    nic_addremove = 0
10598
    for nic_op, nic_dict in self.op.nics:
10599
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10600
      if nic_op == constants.DDM_REMOVE:
10601
        nic_addremove += 1
10602
        continue
10603
      elif nic_op == constants.DDM_ADD:
10604
        nic_addremove += 1
10605
      else:
10606
        if not isinstance(nic_op, int):
10607
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10608
        if not isinstance(nic_dict, dict):
10609
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10610
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10611

    
10612
      # nic_dict should be a dict
10613
      nic_ip = nic_dict.get(constants.INIC_IP, None)
10614
      if nic_ip is not None:
10615
        if nic_ip.lower() == constants.VALUE_NONE:
10616
          nic_dict[constants.INIC_IP] = None
10617
        else:
10618
          if not netutils.IPAddress.IsValid(nic_ip):
10619
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10620
                                       errors.ECODE_INVAL)
10621

    
10622
      nic_bridge = nic_dict.get("bridge", None)
10623
      nic_link = nic_dict.get(constants.INIC_LINK, None)
10624
      if nic_bridge and nic_link:
10625
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10626
                                   " at the same time", errors.ECODE_INVAL)
10627
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10628
        nic_dict["bridge"] = None
10629
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10630
        nic_dict[constants.INIC_LINK] = None
10631

    
10632
      if nic_op == constants.DDM_ADD:
10633
        nic_mac = nic_dict.get(constants.INIC_MAC, None)
10634
        if nic_mac is None:
10635
          nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10636

    
10637
      if constants.INIC_MAC in nic_dict:
10638
        nic_mac = nic_dict[constants.INIC_MAC]
10639
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10640
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10641

    
10642
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10643
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10644
                                     " modifying an existing nic",
10645
                                     errors.ECODE_INVAL)
10646

    
10647
    if nic_addremove > 1:
10648
      raise errors.OpPrereqError("Only one NIC add or remove operation"
10649
                                 " supported at a time", errors.ECODE_INVAL)
10650

    
10651
  def ExpandNames(self):
10652
    self._ExpandAndLockInstance()
10653
    self.needed_locks[locking.LEVEL_NODE] = []
10654
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10655

    
10656
  def DeclareLocks(self, level):
10657
    if level == locking.LEVEL_NODE:
10658
      self._LockInstancesNodes()
10659
      if self.op.disk_template and self.op.remote_node:
10660
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10661
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10662

    
10663
  def BuildHooksEnv(self):
10664
    """Build hooks env.
10665

10666
    This runs on the master, primary and secondaries.
10667

10668
    """
10669
    args = dict()
10670
    if constants.BE_MEMORY in self.be_new:
10671
      args["memory"] = self.be_new[constants.BE_MEMORY]
10672
    if constants.BE_VCPUS in self.be_new:
10673
      args["vcpus"] = self.be_new[constants.BE_VCPUS]
10674
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10675
    # information at all.
10676
    if self.op.nics:
10677
      args["nics"] = []
10678
      nic_override = dict(self.op.nics)
10679
      for idx, nic in enumerate(self.instance.nics):
10680
        if idx in nic_override:
10681
          this_nic_override = nic_override[idx]
10682
        else:
10683
          this_nic_override = {}
10684
        if constants.INIC_IP in this_nic_override:
10685
          ip = this_nic_override[constants.INIC_IP]
10686
        else:
10687
          ip = nic.ip
10688
        if constants.INIC_MAC in this_nic_override:
10689
          mac = this_nic_override[constants.INIC_MAC]
10690
        else:
10691
          mac = nic.mac
10692
        if idx in self.nic_pnew:
10693
          nicparams = self.nic_pnew[idx]
10694
        else:
10695
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10696
        mode = nicparams[constants.NIC_MODE]
10697
        link = nicparams[constants.NIC_LINK]
10698
        args["nics"].append((ip, mac, mode, link))
10699
      if constants.DDM_ADD in nic_override:
10700
        ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10701
        mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10702
        nicparams = self.nic_pnew[constants.DDM_ADD]
10703
        mode = nicparams[constants.NIC_MODE]
10704
        link = nicparams[constants.NIC_LINK]
10705
        args["nics"].append((ip, mac, mode, link))
10706
      elif constants.DDM_REMOVE in nic_override:
10707
        del args["nics"][-1]
10708

    
10709
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10710
    if self.op.disk_template:
10711
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10712

    
10713
    return env
10714

    
10715
  def BuildHooksNodes(self):
10716
    """Build hooks nodes.
10717

10718
    """
10719
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10720
    return (nl, nl)
10721

    
10722
  def CheckPrereq(self):
10723
    """Check prerequisites.
10724

10725
    This only checks the instance list against the existing names.
10726

10727
    """
10728
    # checking the new params on the primary/secondary nodes
10729

    
10730
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10731
    cluster = self.cluster = self.cfg.GetClusterInfo()
10732
    assert self.instance is not None, \
10733
      "Cannot retrieve locked instance %s" % self.op.instance_name
10734
    pnode = instance.primary_node
10735
    nodelist = list(instance.all_nodes)
10736

    
10737
    # OS change
10738
    if self.op.os_name and not self.op.force:
10739
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10740
                      self.op.force_variant)
10741
      instance_os = self.op.os_name
10742
    else:
10743
      instance_os = instance.os
10744

    
10745
    if self.op.disk_template:
10746
      if instance.disk_template == self.op.disk_template:
10747
        raise errors.OpPrereqError("Instance already has disk template %s" %
10748
                                   instance.disk_template, errors.ECODE_INVAL)
10749

    
10750
      if (instance.disk_template,
10751
          self.op.disk_template) not in self._DISK_CONVERSIONS:
10752
        raise errors.OpPrereqError("Unsupported disk template conversion from"
10753
                                   " %s to %s" % (instance.disk_template,
10754
                                                  self.op.disk_template),
10755
                                   errors.ECODE_INVAL)
10756
      _CheckInstanceDown(self, instance, "cannot change disk template")
10757
      if self.op.disk_template in constants.DTS_INT_MIRROR:
10758
        if self.op.remote_node == pnode:
10759
          raise errors.OpPrereqError("Given new secondary node %s is the same"
10760
                                     " as the primary node of the instance" %
10761
                                     self.op.remote_node, errors.ECODE_STATE)
10762
        _CheckNodeOnline(self, self.op.remote_node)
10763
        _CheckNodeNotDrained(self, self.op.remote_node)
10764
        # FIXME: here we assume that the old instance type is DT_PLAIN
10765
        assert instance.disk_template == constants.DT_PLAIN
10766
        disks = [{constants.IDISK_SIZE: d.size,
10767
                  constants.IDISK_VG: d.logical_id[0]}
10768
                 for d in instance.disks]
10769
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10770
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10771

    
10772
    # hvparams processing
10773
    if self.op.hvparams:
10774
      hv_type = instance.hypervisor
10775
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10776
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10777
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10778

    
10779
      # local check
10780
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10781
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10782
      self.hv_new = hv_new # the new actual values
10783
      self.hv_inst = i_hvdict # the new dict (without defaults)
10784
    else:
10785
      self.hv_new = self.hv_inst = {}
10786

    
10787
    # beparams processing
10788
    if self.op.beparams:
10789
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10790
                                   use_none=True)
10791
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10792
      be_new = cluster.SimpleFillBE(i_bedict)
10793
      self.be_new = be_new # the new actual values
10794
      self.be_inst = i_bedict # the new dict (without defaults)
10795
    else:
10796
      self.be_new = self.be_inst = {}
10797
    be_old = cluster.FillBE(instance)
10798

    
10799
    # osparams processing
10800
    if self.op.osparams:
10801
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10802
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10803
      self.os_inst = i_osdict # the new dict (without defaults)
10804
    else:
10805
      self.os_inst = {}
10806

    
10807
    self.warn = []
10808

    
10809
    if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10810
        be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10811
      mem_check_list = [pnode]
10812
      if be_new[constants.BE_AUTO_BALANCE]:
10813
        # either we changed auto_balance to yes or it was from before
10814
        mem_check_list.extend(instance.secondary_nodes)
10815
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
10816
                                                  instance.hypervisor)
10817
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10818
                                         instance.hypervisor)
10819
      pninfo = nodeinfo[pnode]
10820
      msg = pninfo.fail_msg
10821
      if msg:
10822
        # Assume the primary node is unreachable and go ahead
10823
        self.warn.append("Can't get info from primary node %s: %s" %
10824
                         (pnode,  msg))
10825
      elif not isinstance(pninfo.payload.get("memory_free", None), int):
10826
        self.warn.append("Node data from primary node %s doesn't contain"
10827
                         " free memory information" % pnode)
10828
      elif instance_info.fail_msg:
10829
        self.warn.append("Can't get instance runtime information: %s" %
10830
                        instance_info.fail_msg)
10831
      else:
10832
        if instance_info.payload:
10833
          current_mem = int(instance_info.payload["memory"])
10834
        else:
10835
          # Assume instance not running
10836
          # (there is a slight race condition here, but it's not very probable,
10837
          # and we have no other way to check)
10838
          current_mem = 0
10839
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10840
                    pninfo.payload["memory_free"])
10841
        if miss_mem > 0:
10842
          raise errors.OpPrereqError("This change will prevent the instance"
10843
                                     " from starting, due to %d MB of memory"
10844
                                     " missing on its primary node" % miss_mem,
10845
                                     errors.ECODE_NORES)
10846

    
10847
      if be_new[constants.BE_AUTO_BALANCE]:
10848
        for node, nres in nodeinfo.items():
10849
          if node not in instance.secondary_nodes:
10850
            continue
10851
          nres.Raise("Can't get info from secondary node %s" % node,
10852
                     prereq=True, ecode=errors.ECODE_STATE)
10853
          if not isinstance(nres.payload.get("memory_free", None), int):
10854
            raise errors.OpPrereqError("Secondary node %s didn't return free"
10855
                                       " memory information" % node,
10856
                                       errors.ECODE_STATE)
10857
          elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10858
            raise errors.OpPrereqError("This change will prevent the instance"
10859
                                       " from failover to its secondary node"
10860
                                       " %s, due to not enough memory" % node,
10861
                                       errors.ECODE_STATE)
10862

    
10863
    # NIC processing
10864
    self.nic_pnew = {}
10865
    self.nic_pinst = {}
10866
    for nic_op, nic_dict in self.op.nics:
10867
      if nic_op == constants.DDM_REMOVE:
10868
        if not instance.nics:
10869
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10870
                                     errors.ECODE_INVAL)
10871
        continue
10872
      if nic_op != constants.DDM_ADD:
10873
        # an existing nic
10874
        if not instance.nics:
10875
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10876
                                     " no NICs" % nic_op,
10877
                                     errors.ECODE_INVAL)
10878
        if nic_op < 0 or nic_op >= len(instance.nics):
10879
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10880
                                     " are 0 to %d" %
10881
                                     (nic_op, len(instance.nics) - 1),
10882
                                     errors.ECODE_INVAL)
10883
        old_nic_params = instance.nics[nic_op].nicparams
10884
        old_nic_ip = instance.nics[nic_op].ip
10885
      else:
10886
        old_nic_params = {}
10887
        old_nic_ip = None
10888

    
10889
      update_params_dict = dict([(key, nic_dict[key])
10890
                                 for key in constants.NICS_PARAMETERS
10891
                                 if key in nic_dict])
10892

    
10893
      if "bridge" in nic_dict:
10894
        update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10895

    
10896
      new_nic_params = _GetUpdatedParams(old_nic_params,
10897
                                         update_params_dict)
10898
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10899
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10900
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10901
      self.nic_pinst[nic_op] = new_nic_params
10902
      self.nic_pnew[nic_op] = new_filled_nic_params
10903
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10904

    
10905
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
10906
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10907
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10908
        if msg:
10909
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10910
          if self.op.force:
10911
            self.warn.append(msg)
10912
          else:
10913
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10914
      if new_nic_mode == constants.NIC_MODE_ROUTED:
10915
        if constants.INIC_IP in nic_dict:
10916
          nic_ip = nic_dict[constants.INIC_IP]
10917
        else:
10918
          nic_ip = old_nic_ip
10919
        if nic_ip is None:
10920
          raise errors.OpPrereqError("Cannot set the nic ip to None"
10921
                                     " on a routed nic", errors.ECODE_INVAL)
10922
      if constants.INIC_MAC in nic_dict:
10923
        nic_mac = nic_dict[constants.INIC_MAC]
10924
        if nic_mac is None:
10925
          raise errors.OpPrereqError("Cannot set the nic mac to None",
10926
                                     errors.ECODE_INVAL)
10927
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10928
          # otherwise generate the mac
10929
          nic_dict[constants.INIC_MAC] = \
10930
            self.cfg.GenerateMAC(self.proc.GetECId())
10931
        else:
10932
          # or validate/reserve the current one
10933
          try:
10934
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10935
          except errors.ReservationError:
10936
            raise errors.OpPrereqError("MAC address %s already in use"
10937
                                       " in cluster" % nic_mac,
10938
                                       errors.ECODE_NOTUNIQUE)
10939

    
10940
    # DISK processing
10941
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10942
      raise errors.OpPrereqError("Disk operations not supported for"
10943
                                 " diskless instances",
10944
                                 errors.ECODE_INVAL)
10945
    for disk_op, _ in self.op.disks:
10946
      if disk_op == constants.DDM_REMOVE:
10947
        if len(instance.disks) == 1:
10948
          raise errors.OpPrereqError("Cannot remove the last disk of"
10949
                                     " an instance", errors.ECODE_INVAL)
10950
        _CheckInstanceDown(self, instance, "cannot remove disks")
10951

    
10952
      if (disk_op == constants.DDM_ADD and
10953
          len(instance.disks) >= constants.MAX_DISKS):
10954
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10955
                                   " add more" % constants.MAX_DISKS,
10956
                                   errors.ECODE_STATE)
10957
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10958
        # an existing disk
10959
        if disk_op < 0 or disk_op >= len(instance.disks):
10960
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
10961
                                     " are 0 to %d" %
10962
                                     (disk_op, len(instance.disks)),
10963
                                     errors.ECODE_INVAL)
10964

    
10965
    return
10966

    
10967
  def _ConvertPlainToDrbd(self, feedback_fn):
10968
    """Converts an instance from plain to drbd.
10969

10970
    """
10971
    feedback_fn("Converting template to drbd")
10972
    instance = self.instance
10973
    pnode = instance.primary_node
10974
    snode = self.op.remote_node
10975

    
10976
    # create a fake disk info for _GenerateDiskTemplate
10977
    disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10978
                  constants.IDISK_VG: d.logical_id[0]}
10979
                 for d in instance.disks]
10980
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10981
                                      instance.name, pnode, [snode],
10982
                                      disk_info, None, None, 0, feedback_fn)
10983
    info = _GetInstanceInfoText(instance)
10984
    feedback_fn("Creating aditional volumes...")
10985
    # first, create the missing data and meta devices
10986
    for disk in new_disks:
10987
      # unfortunately this is... not too nice
10988
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10989
                            info, True)
10990
      for child in disk.children:
10991
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
10992
    # at this stage, all new LVs have been created, we can rename the
10993
    # old ones
10994
    feedback_fn("Renaming original volumes...")
10995
    rename_list = [(o, n.children[0].logical_id)
10996
                   for (o, n) in zip(instance.disks, new_disks)]
10997
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
10998
    result.Raise("Failed to rename original LVs")
10999

    
11000
    feedback_fn("Initializing DRBD devices...")
11001
    # all child devices are in place, we can now create the DRBD devices
11002
    for disk in new_disks:
11003
      for node in [pnode, snode]:
11004
        f_create = node == pnode
11005
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
11006

    
11007
    # at this point, the instance has been modified
11008
    instance.disk_template = constants.DT_DRBD8
11009
    instance.disks = new_disks
11010
    self.cfg.Update(instance, feedback_fn)
11011

    
11012
    # disks are created, waiting for sync
11013
    disk_abort = not _WaitForSync(self, instance,
11014
                                  oneshot=not self.op.wait_for_sync)
11015
    if disk_abort:
11016
      raise errors.OpExecError("There are some degraded disks for"
11017
                               " this instance, please cleanup manually")
11018

    
11019
  def _ConvertDrbdToPlain(self, feedback_fn):
11020
    """Converts an instance from drbd to plain.
11021

11022
    """
11023
    instance = self.instance
11024
    assert len(instance.secondary_nodes) == 1
11025
    pnode = instance.primary_node
11026
    snode = instance.secondary_nodes[0]
11027
    feedback_fn("Converting template to plain")
11028

    
11029
    old_disks = instance.disks
11030
    new_disks = [d.children[0] for d in old_disks]
11031

    
11032
    # copy over size and mode
11033
    for parent, child in zip(old_disks, new_disks):
11034
      child.size = parent.size
11035
      child.mode = parent.mode
11036

    
11037
    # update instance structure
11038
    instance.disks = new_disks
11039
    instance.disk_template = constants.DT_PLAIN
11040
    self.cfg.Update(instance, feedback_fn)
11041

    
11042
    feedback_fn("Removing volumes on the secondary node...")
11043
    for disk in old_disks:
11044
      self.cfg.SetDiskID(disk, snode)
11045
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
11046
      if msg:
11047
        self.LogWarning("Could not remove block device %s on node %s,"
11048
                        " continuing anyway: %s", disk.iv_name, snode, msg)
11049

    
11050
    feedback_fn("Removing unneeded volumes on the primary node...")
11051
    for idx, disk in enumerate(old_disks):
11052
      meta = disk.children[1]
11053
      self.cfg.SetDiskID(meta, pnode)
11054
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
11055
      if msg:
11056
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
11057
                        " continuing anyway: %s", idx, pnode, msg)
11058

    
11059
  def Exec(self, feedback_fn):
11060
    """Modifies an instance.
11061

11062
    All parameters take effect only at the next restart of the instance.
11063

11064
    """
11065
    # Process here the warnings from CheckPrereq, as we don't have a
11066
    # feedback_fn there.
11067
    for warn in self.warn:
11068
      feedback_fn("WARNING: %s" % warn)
11069

    
11070
    result = []
11071
    instance = self.instance
11072
    # disk changes
11073
    for disk_op, disk_dict in self.op.disks:
11074
      if disk_op == constants.DDM_REMOVE:
11075
        # remove the last disk
11076
        device = instance.disks.pop()
11077
        device_idx = len(instance.disks)
11078
        for node, disk in device.ComputeNodeTree(instance.primary_node):
11079
          self.cfg.SetDiskID(disk, node)
11080
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11081
          if msg:
11082
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
11083
                            " continuing anyway", device_idx, node, msg)
11084
        result.append(("disk/%d" % device_idx, "remove"))
11085
      elif disk_op == constants.DDM_ADD:
11086
        # add a new disk
11087
        if instance.disk_template in (constants.DT_FILE,
11088
                                        constants.DT_SHARED_FILE):
11089
          file_driver, file_path = instance.disks[0].logical_id
11090
          file_path = os.path.dirname(file_path)
11091
        else:
11092
          file_driver = file_path = None
11093
        disk_idx_base = len(instance.disks)
11094
        new_disk = _GenerateDiskTemplate(self,
11095
                                         instance.disk_template,
11096
                                         instance.name, instance.primary_node,
11097
                                         instance.secondary_nodes,
11098
                                         [disk_dict],
11099
                                         file_path,
11100
                                         file_driver,
11101
                                         disk_idx_base, feedback_fn)[0]
11102
        instance.disks.append(new_disk)
11103
        info = _GetInstanceInfoText(instance)
11104

    
11105
        logging.info("Creating volume %s for instance %s",
11106
                     new_disk.iv_name, instance.name)
11107
        # Note: this needs to be kept in sync with _CreateDisks
11108
        #HARDCODE
11109
        for node in instance.all_nodes:
11110
          f_create = node == instance.primary_node
11111
          try:
11112
            _CreateBlockDev(self, node, instance, new_disk,
11113
                            f_create, info, f_create)
11114
          except errors.OpExecError, err:
11115
            self.LogWarning("Failed to create volume %s (%s) on"
11116
                            " node %s: %s",
11117
                            new_disk.iv_name, new_disk, node, err)
11118
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11119
                       (new_disk.size, new_disk.mode)))
11120
      else:
11121
        # change a given disk
11122
        instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11123
        result.append(("disk.mode/%d" % disk_op,
11124
                       disk_dict[constants.IDISK_MODE]))
11125

    
11126
    if self.op.disk_template:
11127
      r_shut = _ShutdownInstanceDisks(self, instance)
11128
      if not r_shut:
11129
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11130
                                 " proceed with disk template conversion")
11131
      mode = (instance.disk_template, self.op.disk_template)
11132
      try:
11133
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
11134
      except:
11135
        self.cfg.ReleaseDRBDMinors(instance.name)
11136
        raise
11137
      result.append(("disk_template", self.op.disk_template))
11138

    
11139
    # NIC changes
11140
    for nic_op, nic_dict in self.op.nics:
11141
      if nic_op == constants.DDM_REMOVE:
11142
        # remove the last nic
11143
        del instance.nics[-1]
11144
        result.append(("nic.%d" % len(instance.nics), "remove"))
11145
      elif nic_op == constants.DDM_ADD:
11146
        # mac and bridge should be set, by now
11147
        mac = nic_dict[constants.INIC_MAC]
11148
        ip = nic_dict.get(constants.INIC_IP, None)
11149
        nicparams = self.nic_pinst[constants.DDM_ADD]
11150
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11151
        instance.nics.append(new_nic)
11152
        result.append(("nic.%d" % (len(instance.nics) - 1),
11153
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
11154
                       (new_nic.mac, new_nic.ip,
11155
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11156
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11157
                       )))
11158
      else:
11159
        for key in (constants.INIC_MAC, constants.INIC_IP):
11160
          if key in nic_dict:
11161
            setattr(instance.nics[nic_op], key, nic_dict[key])
11162
        if nic_op in self.nic_pinst:
11163
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11164
        for key, val in nic_dict.iteritems():
11165
          result.append(("nic.%s/%d" % (key, nic_op), val))
11166

    
11167
    # hvparams changes
11168
    if self.op.hvparams:
11169
      instance.hvparams = self.hv_inst
11170
      for key, val in self.op.hvparams.iteritems():
11171
        result.append(("hv/%s" % key, val))
11172

    
11173
    # beparams changes
11174
    if self.op.beparams:
11175
      instance.beparams = self.be_inst
11176
      for key, val in self.op.beparams.iteritems():
11177
        result.append(("be/%s" % key, val))
11178

    
11179
    # OS change
11180
    if self.op.os_name:
11181
      instance.os = self.op.os_name
11182

    
11183
    # osparams changes
11184
    if self.op.osparams:
11185
      instance.osparams = self.os_inst
11186
      for key, val in self.op.osparams.iteritems():
11187
        result.append(("os/%s" % key, val))
11188

    
11189
    self.cfg.Update(instance, feedback_fn)
11190

    
11191
    return result
11192

    
11193
  _DISK_CONVERSIONS = {
11194
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11195
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11196
    }
11197

    
11198

    
11199
class LUInstanceChangeGroup(LogicalUnit):
11200
  HPATH = "instance-change-group"
11201
  HTYPE = constants.HTYPE_INSTANCE
11202
  REQ_BGL = False
11203

    
11204
  def ExpandNames(self):
11205
    self.share_locks = _ShareAll()
11206
    self.needed_locks = {
11207
      locking.LEVEL_NODEGROUP: [],
11208
      locking.LEVEL_NODE: [],
11209
      }
11210

    
11211
    self._ExpandAndLockInstance()
11212

    
11213
    if self.op.target_groups:
11214
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
11215
                                  self.op.target_groups)
11216
    else:
11217
      self.req_target_uuids = None
11218

    
11219
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
11220

    
11221
  def DeclareLocks(self, level):
11222
    if level == locking.LEVEL_NODEGROUP:
11223
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
11224

    
11225
      if self.req_target_uuids:
11226
        lock_groups = set(self.req_target_uuids)
11227

    
11228
        # Lock all groups used by instance optimistically; this requires going
11229
        # via the node before it's locked, requiring verification later on
11230
        instance_groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
11231
        lock_groups.update(instance_groups)
11232
      else:
11233
        # No target groups, need to lock all of them
11234
        lock_groups = locking.ALL_SET
11235

    
11236
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
11237

    
11238
    elif level == locking.LEVEL_NODE:
11239
      if self.req_target_uuids:
11240
        # Lock all nodes used by instances
11241
        self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
11242
        self._LockInstancesNodes()
11243

    
11244
        # Lock all nodes in all potential target groups
11245
        lock_groups = (frozenset(self.owned_locks(locking.LEVEL_NODEGROUP)) -
11246
                       self.cfg.GetInstanceNodeGroups(self.op.instance_name))
11247
        member_nodes = [node_name
11248
                        for group in lock_groups
11249
                        for node_name in self.cfg.GetNodeGroup(group).members]
11250
        self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
11251
      else:
11252
        # Lock all nodes as all groups are potential targets
11253
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11254

    
11255
  def CheckPrereq(self):
11256
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
11257
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
11258
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
11259

    
11260
    assert (self.req_target_uuids is None or
11261
            owned_groups.issuperset(self.req_target_uuids))
11262
    assert owned_instances == set([self.op.instance_name])
11263

    
11264
    # Get instance information
11265
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
11266

    
11267
    # Check if node groups for locked instance are still correct
11268
    assert owned_nodes.issuperset(self.instance.all_nodes), \
11269
      ("Instance %s's nodes changed while we kept the lock" %
11270
       self.op.instance_name)
11271

    
11272
    inst_groups = _CheckInstanceNodeGroups(self.cfg, self.op.instance_name,
11273
                                           owned_groups)
11274

    
11275
    if self.req_target_uuids:
11276
      # User requested specific target groups
11277
      self.target_uuids = self.req_target_uuids
11278
    else:
11279
      # All groups except those used by the instance are potential targets
11280
      self.target_uuids = owned_groups - inst_groups
11281

    
11282
    conflicting_groups = self.target_uuids & inst_groups
11283
    if conflicting_groups:
11284
      raise errors.OpPrereqError("Can't use group(s) '%s' as targets, they are"
11285
                                 " used by the instance '%s'" %
11286
                                 (utils.CommaJoin(conflicting_groups),
11287
                                  self.op.instance_name),
11288
                                 errors.ECODE_INVAL)
11289

    
11290
    if not self.target_uuids:
11291
      raise errors.OpPrereqError("There are no possible target groups",
11292
                                 errors.ECODE_INVAL)
11293

    
11294
  def BuildHooksEnv(self):
11295
    """Build hooks env.
11296

11297
    """
11298
    assert self.target_uuids
11299

    
11300
    env = {
11301
      "TARGET_GROUPS": " ".join(self.target_uuids),
11302
      }
11303

    
11304
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11305

    
11306
    return env
11307

    
11308
  def BuildHooksNodes(self):
11309
    """Build hooks nodes.
11310

11311
    """
11312
    mn = self.cfg.GetMasterNode()
11313
    return ([mn], [mn])
11314

    
11315
  def Exec(self, feedback_fn):
11316
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
11317

    
11318
    assert instances == [self.op.instance_name], "Instance not locked"
11319

    
11320
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
11321
                     instances=instances, target_groups=list(self.target_uuids))
11322

    
11323
    ial.Run(self.op.iallocator)
11324

    
11325
    if not ial.success:
11326
      raise errors.OpPrereqError("Can't compute solution for changing group of"
11327
                                 " instance '%s' using iallocator '%s': %s" %
11328
                                 (self.op.instance_name, self.op.iallocator,
11329
                                  ial.info),
11330
                                 errors.ECODE_NORES)
11331

    
11332
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
11333

    
11334
    self.LogInfo("Iallocator returned %s job(s) for changing group of"
11335
                 " instance '%s'", len(jobs), self.op.instance_name)
11336

    
11337
    return ResultWithJobs(jobs)
11338

    
11339

    
11340
class LUBackupQuery(NoHooksLU):
11341
  """Query the exports list
11342

11343
  """
11344
  REQ_BGL = False
11345

    
11346
  def ExpandNames(self):
11347
    self.needed_locks = {}
11348
    self.share_locks[locking.LEVEL_NODE] = 1
11349
    if not self.op.nodes:
11350
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11351
    else:
11352
      self.needed_locks[locking.LEVEL_NODE] = \
11353
        _GetWantedNodes(self, self.op.nodes)
11354

    
11355
  def Exec(self, feedback_fn):
11356
    """Compute the list of all the exported system images.
11357

11358
    @rtype: dict
11359
    @return: a dictionary with the structure node->(export-list)
11360
        where export-list is a list of the instances exported on
11361
        that node.
11362

11363
    """
11364
    self.nodes = self.owned_locks(locking.LEVEL_NODE)
11365
    rpcresult = self.rpc.call_export_list(self.nodes)
11366
    result = {}
11367
    for node in rpcresult:
11368
      if rpcresult[node].fail_msg:
11369
        result[node] = False
11370
      else:
11371
        result[node] = rpcresult[node].payload
11372

    
11373
    return result
11374

    
11375

    
11376
class LUBackupPrepare(NoHooksLU):
11377
  """Prepares an instance for an export and returns useful information.
11378

11379
  """
11380
  REQ_BGL = False
11381

    
11382
  def ExpandNames(self):
11383
    self._ExpandAndLockInstance()
11384

    
11385
  def CheckPrereq(self):
11386
    """Check prerequisites.
11387

11388
    """
11389
    instance_name = self.op.instance_name
11390

    
11391
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11392
    assert self.instance is not None, \
11393
          "Cannot retrieve locked instance %s" % self.op.instance_name
11394
    _CheckNodeOnline(self, self.instance.primary_node)
11395

    
11396
    self._cds = _GetClusterDomainSecret()
11397

    
11398
  def Exec(self, feedback_fn):
11399
    """Prepares an instance for an export.
11400

11401
    """
11402
    instance = self.instance
11403

    
11404
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11405
      salt = utils.GenerateSecret(8)
11406

    
11407
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11408
      result = self.rpc.call_x509_cert_create(instance.primary_node,
11409
                                              constants.RIE_CERT_VALIDITY)
11410
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
11411

    
11412
      (name, cert_pem) = result.payload
11413

    
11414
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11415
                                             cert_pem)
11416

    
11417
      return {
11418
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11419
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11420
                          salt),
11421
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11422
        }
11423

    
11424
    return None
11425

    
11426

    
11427
class LUBackupExport(LogicalUnit):
11428
  """Export an instance to an image in the cluster.
11429

11430
  """
11431
  HPATH = "instance-export"
11432
  HTYPE = constants.HTYPE_INSTANCE
11433
  REQ_BGL = False
11434

    
11435
  def CheckArguments(self):
11436
    """Check the arguments.
11437

11438
    """
11439
    self.x509_key_name = self.op.x509_key_name
11440
    self.dest_x509_ca_pem = self.op.destination_x509_ca
11441

    
11442
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
11443
      if not self.x509_key_name:
11444
        raise errors.OpPrereqError("Missing X509 key name for encryption",
11445
                                   errors.ECODE_INVAL)
11446

    
11447
      if not self.dest_x509_ca_pem:
11448
        raise errors.OpPrereqError("Missing destination X509 CA",
11449
                                   errors.ECODE_INVAL)
11450

    
11451
  def ExpandNames(self):
11452
    self._ExpandAndLockInstance()
11453

    
11454
    # Lock all nodes for local exports
11455
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11456
      # FIXME: lock only instance primary and destination node
11457
      #
11458
      # Sad but true, for now we have do lock all nodes, as we don't know where
11459
      # the previous export might be, and in this LU we search for it and
11460
      # remove it from its current node. In the future we could fix this by:
11461
      #  - making a tasklet to search (share-lock all), then create the
11462
      #    new one, then one to remove, after
11463
      #  - removing the removal operation altogether
11464
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11465

    
11466
  def DeclareLocks(self, level):
11467
    """Last minute lock declaration."""
11468
    # All nodes are locked anyway, so nothing to do here.
11469

    
11470
  def BuildHooksEnv(self):
11471
    """Build hooks env.
11472

11473
    This will run on the master, primary node and target node.
11474

11475
    """
11476
    env = {
11477
      "EXPORT_MODE": self.op.mode,
11478
      "EXPORT_NODE": self.op.target_node,
11479
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11480
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11481
      # TODO: Generic function for boolean env variables
11482
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11483
      }
11484

    
11485
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11486

    
11487
    return env
11488

    
11489
  def BuildHooksNodes(self):
11490
    """Build hooks nodes.
11491

11492
    """
11493
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11494

    
11495
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11496
      nl.append(self.op.target_node)
11497

    
11498
    return (nl, nl)
11499

    
11500
  def CheckPrereq(self):
11501
    """Check prerequisites.
11502

11503
    This checks that the instance and node names are valid.
11504

11505
    """
11506
    instance_name = self.op.instance_name
11507

    
11508
    self.instance = self.cfg.GetInstanceInfo(instance_name)
11509
    assert self.instance is not None, \
11510
          "Cannot retrieve locked instance %s" % self.op.instance_name
11511
    _CheckNodeOnline(self, self.instance.primary_node)
11512

    
11513
    if (self.op.remove_instance and self.instance.admin_up and
11514
        not self.op.shutdown):
11515
      raise errors.OpPrereqError("Can not remove instance without shutting it"
11516
                                 " down before")
11517

    
11518
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11519
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11520
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11521
      assert self.dst_node is not None
11522

    
11523
      _CheckNodeOnline(self, self.dst_node.name)
11524
      _CheckNodeNotDrained(self, self.dst_node.name)
11525

    
11526
      self._cds = None
11527
      self.dest_disk_info = None
11528
      self.dest_x509_ca = None
11529

    
11530
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11531
      self.dst_node = None
11532

    
11533
      if len(self.op.target_node) != len(self.instance.disks):
11534
        raise errors.OpPrereqError(("Received destination information for %s"
11535
                                    " disks, but instance %s has %s disks") %
11536
                                   (len(self.op.target_node), instance_name,
11537
                                    len(self.instance.disks)),
11538
                                   errors.ECODE_INVAL)
11539

    
11540
      cds = _GetClusterDomainSecret()
11541

    
11542
      # Check X509 key name
11543
      try:
11544
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11545
      except (TypeError, ValueError), err:
11546
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11547

    
11548
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11549
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11550
                                   errors.ECODE_INVAL)
11551

    
11552
      # Load and verify CA
11553
      try:
11554
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11555
      except OpenSSL.crypto.Error, err:
11556
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11557
                                   (err, ), errors.ECODE_INVAL)
11558

    
11559
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11560
      if errcode is not None:
11561
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11562
                                   (msg, ), errors.ECODE_INVAL)
11563

    
11564
      self.dest_x509_ca = cert
11565

    
11566
      # Verify target information
11567
      disk_info = []
11568
      for idx, disk_data in enumerate(self.op.target_node):
11569
        try:
11570
          (host, port, magic) = \
11571
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11572
        except errors.GenericError, err:
11573
          raise errors.OpPrereqError("Target info for disk %s: %s" %
11574
                                     (idx, err), errors.ECODE_INVAL)
11575

    
11576
        disk_info.append((host, port, magic))
11577

    
11578
      assert len(disk_info) == len(self.op.target_node)
11579
      self.dest_disk_info = disk_info
11580

    
11581
    else:
11582
      raise errors.ProgrammerError("Unhandled export mode %r" %
11583
                                   self.op.mode)
11584

    
11585
    # instance disk type verification
11586
    # TODO: Implement export support for file-based disks
11587
    for disk in self.instance.disks:
11588
      if disk.dev_type == constants.LD_FILE:
11589
        raise errors.OpPrereqError("Export not supported for instances with"
11590
                                   " file-based disks", errors.ECODE_INVAL)
11591

    
11592
  def _CleanupExports(self, feedback_fn):
11593
    """Removes exports of current instance from all other nodes.
11594

11595
    If an instance in a cluster with nodes A..D was exported to node C, its
11596
    exports will be removed from the nodes A, B and D.
11597

11598
    """
11599
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
11600

    
11601
    nodelist = self.cfg.GetNodeList()
11602
    nodelist.remove(self.dst_node.name)
11603

    
11604
    # on one-node clusters nodelist will be empty after the removal
11605
    # if we proceed the backup would be removed because OpBackupQuery
11606
    # substitutes an empty list with the full cluster node list.
11607
    iname = self.instance.name
11608
    if nodelist:
11609
      feedback_fn("Removing old exports for instance %s" % iname)
11610
      exportlist = self.rpc.call_export_list(nodelist)
11611
      for node in exportlist:
11612
        if exportlist[node].fail_msg:
11613
          continue
11614
        if iname in exportlist[node].payload:
11615
          msg = self.rpc.call_export_remove(node, iname).fail_msg
11616
          if msg:
11617
            self.LogWarning("Could not remove older export for instance %s"
11618
                            " on node %s: %s", iname, node, msg)
11619

    
11620
  def Exec(self, feedback_fn):
11621
    """Export an instance to an image in the cluster.
11622

11623
    """
11624
    assert self.op.mode in constants.EXPORT_MODES
11625

    
11626
    instance = self.instance
11627
    src_node = instance.primary_node
11628

    
11629
    if self.op.shutdown:
11630
      # shutdown the instance, but not the disks
11631
      feedback_fn("Shutting down instance %s" % instance.name)
11632
      result = self.rpc.call_instance_shutdown(src_node, instance,
11633
                                               self.op.shutdown_timeout)
11634
      # TODO: Maybe ignore failures if ignore_remove_failures is set
11635
      result.Raise("Could not shutdown instance %s on"
11636
                   " node %s" % (instance.name, src_node))
11637

    
11638
    # set the disks ID correctly since call_instance_start needs the
11639
    # correct drbd minor to create the symlinks
11640
    for disk in instance.disks:
11641
      self.cfg.SetDiskID(disk, src_node)
11642

    
11643
    activate_disks = (not instance.admin_up)
11644

    
11645
    if activate_disks:
11646
      # Activate the instance disks if we'exporting a stopped instance
11647
      feedback_fn("Activating disks for %s" % instance.name)
11648
      _StartInstanceDisks(self, instance, None)
11649

    
11650
    try:
11651
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11652
                                                     instance)
11653

    
11654
      helper.CreateSnapshots()
11655
      try:
11656
        if (self.op.shutdown and instance.admin_up and
11657
            not self.op.remove_instance):
11658
          assert not activate_disks
11659
          feedback_fn("Starting instance %s" % instance.name)
11660
          result = self.rpc.call_instance_start(src_node, instance,
11661
                                                None, None, False)
11662
          msg = result.fail_msg
11663
          if msg:
11664
            feedback_fn("Failed to start instance: %s" % msg)
11665
            _ShutdownInstanceDisks(self, instance)
11666
            raise errors.OpExecError("Could not start instance: %s" % msg)
11667

    
11668
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
11669
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11670
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11671
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
11672
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11673

    
11674
          (key_name, _, _) = self.x509_key_name
11675

    
11676
          dest_ca_pem = \
11677
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11678
                                            self.dest_x509_ca)
11679

    
11680
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11681
                                                     key_name, dest_ca_pem,
11682
                                                     timeouts)
11683
      finally:
11684
        helper.Cleanup()
11685

    
11686
      # Check for backwards compatibility
11687
      assert len(dresults) == len(instance.disks)
11688
      assert compat.all(isinstance(i, bool) for i in dresults), \
11689
             "Not all results are boolean: %r" % dresults
11690

    
11691
    finally:
11692
      if activate_disks:
11693
        feedback_fn("Deactivating disks for %s" % instance.name)
11694
        _ShutdownInstanceDisks(self, instance)
11695

    
11696
    if not (compat.all(dresults) and fin_resu):
11697
      failures = []
11698
      if not fin_resu:
11699
        failures.append("export finalization")
11700
      if not compat.all(dresults):
11701
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11702
                               if not dsk)
11703
        failures.append("disk export: disk(s) %s" % fdsk)
11704

    
11705
      raise errors.OpExecError("Export failed, errors in %s" %
11706
                               utils.CommaJoin(failures))
11707

    
11708
    # At this point, the export was successful, we can cleanup/finish
11709

    
11710
    # Remove instance if requested
11711
    if self.op.remove_instance:
11712
      feedback_fn("Removing instance %s" % instance.name)
11713
      _RemoveInstance(self, feedback_fn, instance,
11714
                      self.op.ignore_remove_failures)
11715

    
11716
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
11717
      self._CleanupExports(feedback_fn)
11718

    
11719
    return fin_resu, dresults
11720

    
11721

    
11722
class LUBackupRemove(NoHooksLU):
11723
  """Remove exports related to the named instance.
11724

11725
  """
11726
  REQ_BGL = False
11727

    
11728
  def ExpandNames(self):
11729
    self.needed_locks = {}
11730
    # We need all nodes to be locked in order for RemoveExport to work, but we
11731
    # don't need to lock the instance itself, as nothing will happen to it (and
11732
    # we can remove exports also for a removed instance)
11733
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11734

    
11735
  def Exec(self, feedback_fn):
11736
    """Remove any export.
11737

11738
    """
11739
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11740
    # If the instance was not found we'll try with the name that was passed in.
11741
    # This will only work if it was an FQDN, though.
11742
    fqdn_warn = False
11743
    if not instance_name:
11744
      fqdn_warn = True
11745
      instance_name = self.op.instance_name
11746

    
11747
    locked_nodes = self.owned_locks(locking.LEVEL_NODE)
11748
    exportlist = self.rpc.call_export_list(locked_nodes)
11749
    found = False
11750
    for node in exportlist:
11751
      msg = exportlist[node].fail_msg
11752
      if msg:
11753
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11754
        continue
11755
      if instance_name in exportlist[node].payload:
11756
        found = True
11757
        result = self.rpc.call_export_remove(node, instance_name)
11758
        msg = result.fail_msg
11759
        if msg:
11760
          logging.error("Could not remove export for instance %s"
11761
                        " on node %s: %s", instance_name, node, msg)
11762

    
11763
    if fqdn_warn and not found:
11764
      feedback_fn("Export not found. If trying to remove an export belonging"
11765
                  " to a deleted instance please use its Fully Qualified"
11766
                  " Domain Name.")
11767

    
11768

    
11769
class LUGroupAdd(LogicalUnit):
11770
  """Logical unit for creating node groups.
11771

11772
  """
11773
  HPATH = "group-add"
11774
  HTYPE = constants.HTYPE_GROUP
11775
  REQ_BGL = False
11776

    
11777
  def ExpandNames(self):
11778
    # We need the new group's UUID here so that we can create and acquire the
11779
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11780
    # that it should not check whether the UUID exists in the configuration.
11781
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11782
    self.needed_locks = {}
11783
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11784

    
11785
  def CheckPrereq(self):
11786
    """Check prerequisites.
11787

11788
    This checks that the given group name is not an existing node group
11789
    already.
11790

11791
    """
11792
    try:
11793
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11794
    except errors.OpPrereqError:
11795
      pass
11796
    else:
11797
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11798
                                 " node group (UUID: %s)" %
11799
                                 (self.op.group_name, existing_uuid),
11800
                                 errors.ECODE_EXISTS)
11801

    
11802
    if self.op.ndparams:
11803
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11804

    
11805
  def BuildHooksEnv(self):
11806
    """Build hooks env.
11807

11808
    """
11809
    return {
11810
      "GROUP_NAME": self.op.group_name,
11811
      }
11812

    
11813
  def BuildHooksNodes(self):
11814
    """Build hooks nodes.
11815

11816
    """
11817
    mn = self.cfg.GetMasterNode()
11818
    return ([mn], [mn])
11819

    
11820
  def Exec(self, feedback_fn):
11821
    """Add the node group to the cluster.
11822

11823
    """
11824
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11825
                                  uuid=self.group_uuid,
11826
                                  alloc_policy=self.op.alloc_policy,
11827
                                  ndparams=self.op.ndparams)
11828

    
11829
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11830
    del self.remove_locks[locking.LEVEL_NODEGROUP]
11831

    
11832

    
11833
class LUGroupAssignNodes(NoHooksLU):
11834
  """Logical unit for assigning nodes to groups.
11835

11836
  """
11837
  REQ_BGL = False
11838

    
11839
  def ExpandNames(self):
11840
    # These raise errors.OpPrereqError on their own:
11841
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11842
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11843

    
11844
    # We want to lock all the affected nodes and groups. We have readily
11845
    # available the list of nodes, and the *destination* group. To gather the
11846
    # list of "source" groups, we need to fetch node information later on.
11847
    self.needed_locks = {
11848
      locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11849
      locking.LEVEL_NODE: self.op.nodes,
11850
      }
11851

    
11852
  def DeclareLocks(self, level):
11853
    if level == locking.LEVEL_NODEGROUP:
11854
      assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11855

    
11856
      # Try to get all affected nodes' groups without having the group or node
11857
      # lock yet. Needs verification later in the code flow.
11858
      groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11859

    
11860
      self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11861

    
11862
  def CheckPrereq(self):
11863
    """Check prerequisites.
11864

11865
    """
11866
    assert self.needed_locks[locking.LEVEL_NODEGROUP]
11867
    assert (frozenset(self.owned_locks(locking.LEVEL_NODE)) ==
11868
            frozenset(self.op.nodes))
11869

    
11870
    expected_locks = (set([self.group_uuid]) |
11871
                      self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11872
    actual_locks = self.owned_locks(locking.LEVEL_NODEGROUP)
11873
    if actual_locks != expected_locks:
11874
      raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11875
                               " current groups are '%s', used to be '%s'" %
11876
                               (utils.CommaJoin(expected_locks),
11877
                                utils.CommaJoin(actual_locks)))
11878

    
11879
    self.node_data = self.cfg.GetAllNodesInfo()
11880
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
11881
    instance_data = self.cfg.GetAllInstancesInfo()
11882

    
11883
    if self.group is None:
11884
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11885
                               (self.op.group_name, self.group_uuid))
11886

    
11887
    (new_splits, previous_splits) = \
11888
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11889
                                             for node in self.op.nodes],
11890
                                            self.node_data, instance_data)
11891

    
11892
    if new_splits:
11893
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11894

    
11895
      if not self.op.force:
11896
        raise errors.OpExecError("The following instances get split by this"
11897
                                 " change and --force was not given: %s" %
11898
                                 fmt_new_splits)
11899
      else:
11900
        self.LogWarning("This operation will split the following instances: %s",
11901
                        fmt_new_splits)
11902

    
11903
        if previous_splits:
11904
          self.LogWarning("In addition, these already-split instances continue"
11905
                          " to be split across groups: %s",
11906
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
11907

    
11908
  def Exec(self, feedback_fn):
11909
    """Assign nodes to a new group.
11910

11911
    """
11912
    for node in self.op.nodes:
11913
      self.node_data[node].group = self.group_uuid
11914

    
11915
    # FIXME: Depends on side-effects of modifying the result of
11916
    # C{cfg.GetAllNodesInfo}
11917

    
11918
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11919

    
11920
  @staticmethod
11921
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11922
    """Check for split instances after a node assignment.
11923

11924
    This method considers a series of node assignments as an atomic operation,
11925
    and returns information about split instances after applying the set of
11926
    changes.
11927

11928
    In particular, it returns information about newly split instances, and
11929
    instances that were already split, and remain so after the change.
11930

11931
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11932
    considered.
11933

11934
    @type changes: list of (node_name, new_group_uuid) pairs.
11935
    @param changes: list of node assignments to consider.
11936
    @param node_data: a dict with data for all nodes
11937
    @param instance_data: a dict with all instances to consider
11938
    @rtype: a two-tuple
11939
    @return: a list of instances that were previously okay and result split as a
11940
      consequence of this change, and a list of instances that were previously
11941
      split and this change does not fix.
11942

11943
    """
11944
    changed_nodes = dict((node, group) for node, group in changes
11945
                         if node_data[node].group != group)
11946

    
11947
    all_split_instances = set()
11948
    previously_split_instances = set()
11949

    
11950
    def InstanceNodes(instance):
11951
      return [instance.primary_node] + list(instance.secondary_nodes)
11952

    
11953
    for inst in instance_data.values():
11954
      if inst.disk_template not in constants.DTS_INT_MIRROR:
11955
        continue
11956

    
11957
      instance_nodes = InstanceNodes(inst)
11958

    
11959
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
11960
        previously_split_instances.add(inst.name)
11961

    
11962
      if len(set(changed_nodes.get(node, node_data[node].group)
11963
                 for node in instance_nodes)) > 1:
11964
        all_split_instances.add(inst.name)
11965

    
11966
    return (list(all_split_instances - previously_split_instances),
11967
            list(previously_split_instances & all_split_instances))
11968

    
11969

    
11970
class _GroupQuery(_QueryBase):
11971
  FIELDS = query.GROUP_FIELDS
11972

    
11973
  def ExpandNames(self, lu):
11974
    lu.needed_locks = {}
11975

    
11976
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11977
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11978

    
11979
    if not self.names:
11980
      self.wanted = [name_to_uuid[name]
11981
                     for name in utils.NiceSort(name_to_uuid.keys())]
11982
    else:
11983
      # Accept names to be either names or UUIDs.
11984
      missing = []
11985
      self.wanted = []
11986
      all_uuid = frozenset(self._all_groups.keys())
11987

    
11988
      for name in self.names:
11989
        if name in all_uuid:
11990
          self.wanted.append(name)
11991
        elif name in name_to_uuid:
11992
          self.wanted.append(name_to_uuid[name])
11993
        else:
11994
          missing.append(name)
11995

    
11996
      if missing:
11997
        raise errors.OpPrereqError("Some groups do not exist: %s" %
11998
                                   utils.CommaJoin(missing),
11999
                                   errors.ECODE_NOENT)
12000

    
12001
  def DeclareLocks(self, lu, level):
12002
    pass
12003

    
12004
  def _GetQueryData(self, lu):
12005
    """Computes the list of node groups and their attributes.
12006

12007
    """
12008
    do_nodes = query.GQ_NODE in self.requested_data
12009
    do_instances = query.GQ_INST in self.requested_data
12010

    
12011
    group_to_nodes = None
12012
    group_to_instances = None
12013

    
12014
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
12015
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
12016
    # latter GetAllInstancesInfo() is not enough, for we have to go through
12017
    # instance->node. Hence, we will need to process nodes even if we only need
12018
    # instance information.
12019
    if do_nodes or do_instances:
12020
      all_nodes = lu.cfg.GetAllNodesInfo()
12021
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
12022
      node_to_group = {}
12023

    
12024
      for node in all_nodes.values():
12025
        if node.group in group_to_nodes:
12026
          group_to_nodes[node.group].append(node.name)
12027
          node_to_group[node.name] = node.group
12028

    
12029
      if do_instances:
12030
        all_instances = lu.cfg.GetAllInstancesInfo()
12031
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
12032

    
12033
        for instance in all_instances.values():
12034
          node = instance.primary_node
12035
          if node in node_to_group:
12036
            group_to_instances[node_to_group[node]].append(instance.name)
12037

    
12038
        if not do_nodes:
12039
          # Do not pass on node information if it was not requested.
12040
          group_to_nodes = None
12041

    
12042
    return query.GroupQueryData([self._all_groups[uuid]
12043
                                 for uuid in self.wanted],
12044
                                group_to_nodes, group_to_instances)
12045

    
12046

    
12047
class LUGroupQuery(NoHooksLU):
12048
  """Logical unit for querying node groups.
12049

12050
  """
12051
  REQ_BGL = False
12052

    
12053
  def CheckArguments(self):
12054
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
12055
                          self.op.output_fields, False)
12056

    
12057
  def ExpandNames(self):
12058
    self.gq.ExpandNames(self)
12059

    
12060
  def Exec(self, feedback_fn):
12061
    return self.gq.OldStyleQuery(self)
12062

    
12063

    
12064
class LUGroupSetParams(LogicalUnit):
12065
  """Modifies the parameters of a node group.
12066

12067
  """
12068
  HPATH = "group-modify"
12069
  HTYPE = constants.HTYPE_GROUP
12070
  REQ_BGL = False
12071

    
12072
  def CheckArguments(self):
12073
    all_changes = [
12074
      self.op.ndparams,
12075
      self.op.alloc_policy,
12076
      ]
12077

    
12078
    if all_changes.count(None) == len(all_changes):
12079
      raise errors.OpPrereqError("Please pass at least one modification",
12080
                                 errors.ECODE_INVAL)
12081

    
12082
  def ExpandNames(self):
12083
    # This raises errors.OpPrereqError on its own:
12084
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12085

    
12086
    self.needed_locks = {
12087
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12088
      }
12089

    
12090
  def CheckPrereq(self):
12091
    """Check prerequisites.
12092

12093
    """
12094
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
12095

    
12096
    if self.group is None:
12097
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12098
                               (self.op.group_name, self.group_uuid))
12099

    
12100
    if self.op.ndparams:
12101
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
12102
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
12103
      self.new_ndparams = new_ndparams
12104

    
12105
  def BuildHooksEnv(self):
12106
    """Build hooks env.
12107

12108
    """
12109
    return {
12110
      "GROUP_NAME": self.op.group_name,
12111
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
12112
      }
12113

    
12114
  def BuildHooksNodes(self):
12115
    """Build hooks nodes.
12116

12117
    """
12118
    mn = self.cfg.GetMasterNode()
12119
    return ([mn], [mn])
12120

    
12121
  def Exec(self, feedback_fn):
12122
    """Modifies the node group.
12123

12124
    """
12125
    result = []
12126

    
12127
    if self.op.ndparams:
12128
      self.group.ndparams = self.new_ndparams
12129
      result.append(("ndparams", str(self.group.ndparams)))
12130

    
12131
    if self.op.alloc_policy:
12132
      self.group.alloc_policy = self.op.alloc_policy
12133

    
12134
    self.cfg.Update(self.group, feedback_fn)
12135
    return result
12136

    
12137

    
12138

    
12139
class LUGroupRemove(LogicalUnit):
12140
  HPATH = "group-remove"
12141
  HTYPE = constants.HTYPE_GROUP
12142
  REQ_BGL = False
12143

    
12144
  def ExpandNames(self):
12145
    # This will raises errors.OpPrereqError on its own:
12146
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12147
    self.needed_locks = {
12148
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12149
      }
12150

    
12151
  def CheckPrereq(self):
12152
    """Check prerequisites.
12153

12154
    This checks that the given group name exists as a node group, that is
12155
    empty (i.e., contains no nodes), and that is not the last group of the
12156
    cluster.
12157

12158
    """
12159
    # Verify that the group is empty.
12160
    group_nodes = [node.name
12161
                   for node in self.cfg.GetAllNodesInfo().values()
12162
                   if node.group == self.group_uuid]
12163

    
12164
    if group_nodes:
12165
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
12166
                                 " nodes: %s" %
12167
                                 (self.op.group_name,
12168
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
12169
                                 errors.ECODE_STATE)
12170

    
12171
    # Verify the cluster would not be left group-less.
12172
    if len(self.cfg.GetNodeGroupList()) == 1:
12173
      raise errors.OpPrereqError("Group '%s' is the only group,"
12174
                                 " cannot be removed" %
12175
                                 self.op.group_name,
12176
                                 errors.ECODE_STATE)
12177

    
12178
  def BuildHooksEnv(self):
12179
    """Build hooks env.
12180

12181
    """
12182
    return {
12183
      "GROUP_NAME": self.op.group_name,
12184
      }
12185

    
12186
  def BuildHooksNodes(self):
12187
    """Build hooks nodes.
12188

12189
    """
12190
    mn = self.cfg.GetMasterNode()
12191
    return ([mn], [mn])
12192

    
12193
  def Exec(self, feedback_fn):
12194
    """Remove the node group.
12195

12196
    """
12197
    try:
12198
      self.cfg.RemoveNodeGroup(self.group_uuid)
12199
    except errors.ConfigurationError:
12200
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12201
                               (self.op.group_name, self.group_uuid))
12202

    
12203
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12204

    
12205

    
12206
class LUGroupRename(LogicalUnit):
12207
  HPATH = "group-rename"
12208
  HTYPE = constants.HTYPE_GROUP
12209
  REQ_BGL = False
12210

    
12211
  def ExpandNames(self):
12212
    # This raises errors.OpPrereqError on its own:
12213
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12214

    
12215
    self.needed_locks = {
12216
      locking.LEVEL_NODEGROUP: [self.group_uuid],
12217
      }
12218

    
12219
  def CheckPrereq(self):
12220
    """Check prerequisites.
12221

12222
    Ensures requested new name is not yet used.
12223

12224
    """
12225
    try:
12226
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12227
    except errors.OpPrereqError:
12228
      pass
12229
    else:
12230
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12231
                                 " node group (UUID: %s)" %
12232
                                 (self.op.new_name, new_name_uuid),
12233
                                 errors.ECODE_EXISTS)
12234

    
12235
  def BuildHooksEnv(self):
12236
    """Build hooks env.
12237

12238
    """
12239
    return {
12240
      "OLD_NAME": self.op.group_name,
12241
      "NEW_NAME": self.op.new_name,
12242
      }
12243

    
12244
  def BuildHooksNodes(self):
12245
    """Build hooks nodes.
12246

12247
    """
12248
    mn = self.cfg.GetMasterNode()
12249

    
12250
    all_nodes = self.cfg.GetAllNodesInfo()
12251
    all_nodes.pop(mn, None)
12252

    
12253
    run_nodes = [mn]
12254
    run_nodes.extend(node.name for node in all_nodes.values()
12255
                     if node.group == self.group_uuid)
12256

    
12257
    return (run_nodes, run_nodes)
12258

    
12259
  def Exec(self, feedback_fn):
12260
    """Rename the node group.
12261

12262
    """
12263
    group = self.cfg.GetNodeGroup(self.group_uuid)
12264

    
12265
    if group is None:
12266
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12267
                               (self.op.group_name, self.group_uuid))
12268

    
12269
    group.name = self.op.new_name
12270
    self.cfg.Update(group, feedback_fn)
12271

    
12272
    return self.op.new_name
12273

    
12274

    
12275
class LUGroupEvacuate(LogicalUnit):
12276
  HPATH = "group-evacuate"
12277
  HTYPE = constants.HTYPE_GROUP
12278
  REQ_BGL = False
12279

    
12280
  def ExpandNames(self):
12281
    # This raises errors.OpPrereqError on its own:
12282
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12283

    
12284
    if self.op.target_groups:
12285
      self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12286
                                  self.op.target_groups)
12287
    else:
12288
      self.req_target_uuids = []
12289

    
12290
    if self.group_uuid in self.req_target_uuids:
12291
      raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12292
                                 " as a target group (targets are %s)" %
12293
                                 (self.group_uuid,
12294
                                  utils.CommaJoin(self.req_target_uuids)),
12295
                                 errors.ECODE_INVAL)
12296

    
12297
    self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12298

    
12299
    self.share_locks = _ShareAll()
12300
    self.needed_locks = {
12301
      locking.LEVEL_INSTANCE: [],
12302
      locking.LEVEL_NODEGROUP: [],
12303
      locking.LEVEL_NODE: [],
12304
      }
12305

    
12306
  def DeclareLocks(self, level):
12307
    if level == locking.LEVEL_INSTANCE:
12308
      assert not self.needed_locks[locking.LEVEL_INSTANCE]
12309

    
12310
      # Lock instances optimistically, needs verification once node and group
12311
      # locks have been acquired
12312
      self.needed_locks[locking.LEVEL_INSTANCE] = \
12313
        self.cfg.GetNodeGroupInstances(self.group_uuid)
12314

    
12315
    elif level == locking.LEVEL_NODEGROUP:
12316
      assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12317

    
12318
      if self.req_target_uuids:
12319
        lock_groups = set([self.group_uuid] + self.req_target_uuids)
12320

    
12321
        # Lock all groups used by instances optimistically; this requires going
12322
        # via the node before it's locked, requiring verification later on
12323
        lock_groups.update(group_uuid
12324
                           for instance_name in
12325
                             self.owned_locks(locking.LEVEL_INSTANCE)
12326
                           for group_uuid in
12327
                             self.cfg.GetInstanceNodeGroups(instance_name))
12328
      else:
12329
        # No target groups, need to lock all of them
12330
        lock_groups = locking.ALL_SET
12331

    
12332
      self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12333

    
12334
    elif level == locking.LEVEL_NODE:
12335
      # This will only lock the nodes in the group to be evacuated which
12336
      # contain actual instances
12337
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12338
      self._LockInstancesNodes()
12339

    
12340
      # Lock all nodes in group to be evacuated and target groups
12341
      owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12342
      assert self.group_uuid in owned_groups
12343
      member_nodes = [node_name
12344
                      for group in owned_groups
12345
                      for node_name in self.cfg.GetNodeGroup(group).members]
12346
      self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12347

    
12348
  def CheckPrereq(self):
12349
    owned_instances = frozenset(self.owned_locks(locking.LEVEL_INSTANCE))
12350
    owned_groups = frozenset(self.owned_locks(locking.LEVEL_NODEGROUP))
12351
    owned_nodes = frozenset(self.owned_locks(locking.LEVEL_NODE))
12352

    
12353
    assert owned_groups.issuperset(self.req_target_uuids)
12354
    assert self.group_uuid in owned_groups
12355

    
12356
    # Check if locked instances are still correct
12357
    _CheckNodeGroupInstances(self.cfg, self.group_uuid, owned_instances)
12358

    
12359
    # Get instance information
12360
    self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12361

    
12362
    # Check if node groups for locked instances are still correct
12363
    for instance_name in owned_instances:
12364
      inst = self.instances[instance_name]
12365
      assert owned_nodes.issuperset(inst.all_nodes), \
12366
        "Instance %s's nodes changed while we kept the lock" % instance_name
12367

    
12368
      inst_groups = _CheckInstanceNodeGroups(self.cfg, instance_name,
12369
                                             owned_groups)
12370

    
12371
      assert self.group_uuid in inst_groups, \
12372
        "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12373

    
12374
    if self.req_target_uuids:
12375
      # User requested specific target groups
12376
      self.target_uuids = self.req_target_uuids
12377
    else:
12378
      # All groups except the one to be evacuated are potential targets
12379
      self.target_uuids = [group_uuid for group_uuid in owned_groups
12380
                           if group_uuid != self.group_uuid]
12381

    
12382
      if not self.target_uuids:
12383
        raise errors.OpPrereqError("There are no possible target groups",
12384
                                   errors.ECODE_INVAL)
12385

    
12386
  def BuildHooksEnv(self):
12387
    """Build hooks env.
12388

12389
    """
12390
    return {
12391
      "GROUP_NAME": self.op.group_name,
12392
      "TARGET_GROUPS": " ".join(self.target_uuids),
12393
      }
12394

    
12395
  def BuildHooksNodes(self):
12396
    """Build hooks nodes.
12397

12398
    """
12399
    mn = self.cfg.GetMasterNode()
12400

    
12401
    assert self.group_uuid in self.owned_locks(locking.LEVEL_NODEGROUP)
12402

    
12403
    run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12404

    
12405
    return (run_nodes, run_nodes)
12406

    
12407
  def Exec(self, feedback_fn):
12408
    instances = list(self.owned_locks(locking.LEVEL_INSTANCE))
12409

    
12410
    assert self.group_uuid not in self.target_uuids
12411

    
12412
    ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12413
                     instances=instances, target_groups=self.target_uuids)
12414

    
12415
    ial.Run(self.op.iallocator)
12416

    
12417
    if not ial.success:
12418
      raise errors.OpPrereqError("Can't compute group evacuation using"
12419
                                 " iallocator '%s': %s" %
12420
                                 (self.op.iallocator, ial.info),
12421
                                 errors.ECODE_NORES)
12422

    
12423
    jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12424

    
12425
    self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12426
                 len(jobs), self.op.group_name)
12427

    
12428
    return ResultWithJobs(jobs)
12429

    
12430

    
12431
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12432
  """Generic tags LU.
12433

12434
  This is an abstract class which is the parent of all the other tags LUs.
12435

12436
  """
12437
  def ExpandNames(self):
12438
    self.group_uuid = None
12439
    self.needed_locks = {}
12440
    if self.op.kind == constants.TAG_NODE:
12441
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12442
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
12443
    elif self.op.kind == constants.TAG_INSTANCE:
12444
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12445
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12446
    elif self.op.kind == constants.TAG_NODEGROUP:
12447
      self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12448

    
12449
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12450
    # not possible to acquire the BGL based on opcode parameters)
12451

    
12452
  def CheckPrereq(self):
12453
    """Check prerequisites.
12454

12455
    """
12456
    if self.op.kind == constants.TAG_CLUSTER:
12457
      self.target = self.cfg.GetClusterInfo()
12458
    elif self.op.kind == constants.TAG_NODE:
12459
      self.target = self.cfg.GetNodeInfo(self.op.name)
12460
    elif self.op.kind == constants.TAG_INSTANCE:
12461
      self.target = self.cfg.GetInstanceInfo(self.op.name)
12462
    elif self.op.kind == constants.TAG_NODEGROUP:
12463
      self.target = self.cfg.GetNodeGroup(self.group_uuid)
12464
    else:
12465
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12466
                                 str(self.op.kind), errors.ECODE_INVAL)
12467

    
12468

    
12469
class LUTagsGet(TagsLU):
12470
  """Returns the tags of a given object.
12471

12472
  """
12473
  REQ_BGL = False
12474

    
12475
  def ExpandNames(self):
12476
    TagsLU.ExpandNames(self)
12477

    
12478
    # Share locks as this is only a read operation
12479
    self.share_locks = _ShareAll()
12480

    
12481
  def Exec(self, feedback_fn):
12482
    """Returns the tag list.
12483

12484
    """
12485
    return list(self.target.GetTags())
12486

    
12487

    
12488
class LUTagsSearch(NoHooksLU):
12489
  """Searches the tags for a given pattern.
12490

12491
  """
12492
  REQ_BGL = False
12493

    
12494
  def ExpandNames(self):
12495
    self.needed_locks = {}
12496

    
12497
  def CheckPrereq(self):
12498
    """Check prerequisites.
12499

12500
    This checks the pattern passed for validity by compiling it.
12501

12502
    """
12503
    try:
12504
      self.re = re.compile(self.op.pattern)
12505
    except re.error, err:
12506
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12507
                                 (self.op.pattern, err), errors.ECODE_INVAL)
12508

    
12509
  def Exec(self, feedback_fn):
12510
    """Returns the tag list.
12511

12512
    """
12513
    cfg = self.cfg
12514
    tgts = [("/cluster", cfg.GetClusterInfo())]
12515
    ilist = cfg.GetAllInstancesInfo().values()
12516
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12517
    nlist = cfg.GetAllNodesInfo().values()
12518
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12519
    tgts.extend(("/nodegroup/%s" % n.name, n)
12520
                for n in cfg.GetAllNodeGroupsInfo().values())
12521
    results = []
12522
    for path, target in tgts:
12523
      for tag in target.GetTags():
12524
        if self.re.search(tag):
12525
          results.append((path, tag))
12526
    return results
12527

    
12528

    
12529
class LUTagsSet(TagsLU):
12530
  """Sets a tag on a given object.
12531

12532
  """
12533
  REQ_BGL = False
12534

    
12535
  def CheckPrereq(self):
12536
    """Check prerequisites.
12537

12538
    This checks the type and length of the tag name and value.
12539

12540
    """
12541
    TagsLU.CheckPrereq(self)
12542
    for tag in self.op.tags:
12543
      objects.TaggableObject.ValidateTag(tag)
12544

    
12545
  def Exec(self, feedback_fn):
12546
    """Sets the tag.
12547

12548
    """
12549
    try:
12550
      for tag in self.op.tags:
12551
        self.target.AddTag(tag)
12552
    except errors.TagError, err:
12553
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
12554
    self.cfg.Update(self.target, feedback_fn)
12555

    
12556

    
12557
class LUTagsDel(TagsLU):
12558
  """Delete a list of tags from a given object.
12559

12560
  """
12561
  REQ_BGL = False
12562

    
12563
  def CheckPrereq(self):
12564
    """Check prerequisites.
12565

12566
    This checks that we have the given tag.
12567

12568
    """
12569
    TagsLU.CheckPrereq(self)
12570
    for tag in self.op.tags:
12571
      objects.TaggableObject.ValidateTag(tag)
12572
    del_tags = frozenset(self.op.tags)
12573
    cur_tags = self.target.GetTags()
12574

    
12575
    diff_tags = del_tags - cur_tags
12576
    if diff_tags:
12577
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
12578
      raise errors.OpPrereqError("Tag(s) %s not found" %
12579
                                 (utils.CommaJoin(diff_names), ),
12580
                                 errors.ECODE_NOENT)
12581

    
12582
  def Exec(self, feedback_fn):
12583
    """Remove the tag from the object.
12584

12585
    """
12586
    for tag in self.op.tags:
12587
      self.target.RemoveTag(tag)
12588
    self.cfg.Update(self.target, feedback_fn)
12589

    
12590

    
12591
class LUTestDelay(NoHooksLU):
12592
  """Sleep for a specified amount of time.
12593

12594
  This LU sleeps on the master and/or nodes for a specified amount of
12595
  time.
12596

12597
  """
12598
  REQ_BGL = False
12599

    
12600
  def ExpandNames(self):
12601
    """Expand names and set required locks.
12602

12603
    This expands the node list, if any.
12604

12605
    """
12606
    self.needed_locks = {}
12607
    if self.op.on_nodes:
12608
      # _GetWantedNodes can be used here, but is not always appropriate to use
12609
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12610
      # more information.
12611
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12612
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12613

    
12614
  def _TestDelay(self):
12615
    """Do the actual sleep.
12616

12617
    """
12618
    if self.op.on_master:
12619
      if not utils.TestDelay(self.op.duration):
12620
        raise errors.OpExecError("Error during master delay test")
12621
    if self.op.on_nodes:
12622
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12623
      for node, node_result in result.items():
12624
        node_result.Raise("Failure during rpc call to node %s" % node)
12625

    
12626
  def Exec(self, feedback_fn):
12627
    """Execute the test delay opcode, with the wanted repetitions.
12628

12629
    """
12630
    if self.op.repeat == 0:
12631
      self._TestDelay()
12632
    else:
12633
      top_value = self.op.repeat - 1
12634
      for i in range(self.op.repeat):
12635
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12636
        self._TestDelay()
12637

    
12638

    
12639
class LUTestJqueue(NoHooksLU):
12640
  """Utility LU to test some aspects of the job queue.
12641

12642
  """
12643
  REQ_BGL = False
12644

    
12645
  # Must be lower than default timeout for WaitForJobChange to see whether it
12646
  # notices changed jobs
12647
  _CLIENT_CONNECT_TIMEOUT = 20.0
12648
  _CLIENT_CONFIRM_TIMEOUT = 60.0
12649

    
12650
  @classmethod
12651
  def _NotifyUsingSocket(cls, cb, errcls):
12652
    """Opens a Unix socket and waits for another program to connect.
12653

12654
    @type cb: callable
12655
    @param cb: Callback to send socket name to client
12656
    @type errcls: class
12657
    @param errcls: Exception class to use for errors
12658

12659
    """
12660
    # Using a temporary directory as there's no easy way to create temporary
12661
    # sockets without writing a custom loop around tempfile.mktemp and
12662
    # socket.bind
12663
    tmpdir = tempfile.mkdtemp()
12664
    try:
12665
      tmpsock = utils.PathJoin(tmpdir, "sock")
12666

    
12667
      logging.debug("Creating temporary socket at %s", tmpsock)
12668
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12669
      try:
12670
        sock.bind(tmpsock)
12671
        sock.listen(1)
12672

    
12673
        # Send details to client
12674
        cb(tmpsock)
12675

    
12676
        # Wait for client to connect before continuing
12677
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12678
        try:
12679
          (conn, _) = sock.accept()
12680
        except socket.error, err:
12681
          raise errcls("Client didn't connect in time (%s)" % err)
12682
      finally:
12683
        sock.close()
12684
    finally:
12685
      # Remove as soon as client is connected
12686
      shutil.rmtree(tmpdir)
12687

    
12688
    # Wait for client to close
12689
    try:
12690
      try:
12691
        # pylint: disable-msg=E1101
12692
        # Instance of '_socketobject' has no ... member
12693
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12694
        conn.recv(1)
12695
      except socket.error, err:
12696
        raise errcls("Client failed to confirm notification (%s)" % err)
12697
    finally:
12698
      conn.close()
12699

    
12700
  def _SendNotification(self, test, arg, sockname):
12701
    """Sends a notification to the client.
12702

12703
    @type test: string
12704
    @param test: Test name
12705
    @param arg: Test argument (depends on test)
12706
    @type sockname: string
12707
    @param sockname: Socket path
12708

12709
    """
12710
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12711

    
12712
  def _Notify(self, prereq, test, arg):
12713
    """Notifies the client of a test.
12714

12715
    @type prereq: bool
12716
    @param prereq: Whether this is a prereq-phase test
12717
    @type test: string
12718
    @param test: Test name
12719
    @param arg: Test argument (depends on test)
12720

12721
    """
12722
    if prereq:
12723
      errcls = errors.OpPrereqError
12724
    else:
12725
      errcls = errors.OpExecError
12726

    
12727
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12728
                                                  test, arg),
12729
                                   errcls)
12730

    
12731
  def CheckArguments(self):
12732
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12733
    self.expandnames_calls = 0
12734

    
12735
  def ExpandNames(self):
12736
    checkargs_calls = getattr(self, "checkargs_calls", 0)
12737
    if checkargs_calls < 1:
12738
      raise errors.ProgrammerError("CheckArguments was not called")
12739

    
12740
    self.expandnames_calls += 1
12741

    
12742
    if self.op.notify_waitlock:
12743
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
12744

    
12745
    self.LogInfo("Expanding names")
12746

    
12747
    # Get lock on master node (just to get a lock, not for a particular reason)
12748
    self.needed_locks = {
12749
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12750
      }
12751

    
12752
  def Exec(self, feedback_fn):
12753
    if self.expandnames_calls < 1:
12754
      raise errors.ProgrammerError("ExpandNames was not called")
12755

    
12756
    if self.op.notify_exec:
12757
      self._Notify(False, constants.JQT_EXEC, None)
12758

    
12759
    self.LogInfo("Executing")
12760

    
12761
    if self.op.log_messages:
12762
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12763
      for idx, msg in enumerate(self.op.log_messages):
12764
        self.LogInfo("Sending log message %s", idx + 1)
12765
        feedback_fn(constants.JQT_MSGPREFIX + msg)
12766
        # Report how many test messages have been sent
12767
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12768

    
12769
    if self.op.fail:
12770
      raise errors.OpExecError("Opcode failure was requested")
12771

    
12772
    return True
12773

    
12774

    
12775
class IAllocator(object):
12776
  """IAllocator framework.
12777

12778
  An IAllocator instance has three sets of attributes:
12779
    - cfg that is needed to query the cluster
12780
    - input data (all members of the _KEYS class attribute are required)
12781
    - four buffer attributes (in|out_data|text), that represent the
12782
      input (to the external script) in text and data structure format,
12783
      and the output from it, again in two formats
12784
    - the result variables from the script (success, info, nodes) for
12785
      easy usage
12786

12787
  """
12788
  # pylint: disable-msg=R0902
12789
  # lots of instance attributes
12790

    
12791
  def __init__(self, cfg, rpc, mode, **kwargs):
12792
    self.cfg = cfg
12793
    self.rpc = rpc
12794
    # init buffer variables
12795
    self.in_text = self.out_text = self.in_data = self.out_data = None
12796
    # init all input fields so that pylint is happy
12797
    self.mode = mode
12798
    self.memory = self.disks = self.disk_template = None
12799
    self.os = self.tags = self.nics = self.vcpus = None
12800
    self.hypervisor = None
12801
    self.relocate_from = None
12802
    self.name = None
12803
    self.instances = None
12804
    self.evac_mode = None
12805
    self.target_groups = []
12806
    # computed fields
12807
    self.required_nodes = None
12808
    # init result fields
12809
    self.success = self.info = self.result = None
12810

    
12811
    try:
12812
      (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12813
    except KeyError:
12814
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12815
                                   " IAllocator" % self.mode)
12816

    
12817
    keyset = [n for (n, _) in keydata]
12818

    
12819
    for key in kwargs:
12820
      if key not in keyset:
12821
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
12822
                                     " IAllocator" % key)
12823
      setattr(self, key, kwargs[key])
12824

    
12825
    for key in keyset:
12826
      if key not in kwargs:
12827
        raise errors.ProgrammerError("Missing input parameter '%s' to"
12828
                                     " IAllocator" % key)
12829
    self._BuildInputData(compat.partial(fn, self), keydata)
12830

    
12831
  def _ComputeClusterData(self):
12832
    """Compute the generic allocator input data.
12833

12834
    This is the data that is independent of the actual operation.
12835

12836
    """
12837
    cfg = self.cfg
12838
    cluster_info = cfg.GetClusterInfo()
12839
    # cluster data
12840
    data = {
12841
      "version": constants.IALLOCATOR_VERSION,
12842
      "cluster_name": cfg.GetClusterName(),
12843
      "cluster_tags": list(cluster_info.GetTags()),
12844
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12845
      # we don't have job IDs
12846
      }
12847
    ninfo = cfg.GetAllNodesInfo()
12848
    iinfo = cfg.GetAllInstancesInfo().values()
12849
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12850

    
12851
    # node data
12852
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
12853

    
12854
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12855
      hypervisor_name = self.hypervisor
12856
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12857
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12858
    else:
12859
      hypervisor_name = cluster_info.enabled_hypervisors[0]
12860

    
12861
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12862
                                        hypervisor_name)
12863
    node_iinfo = \
12864
      self.rpc.call_all_instances_info(node_list,
12865
                                       cluster_info.enabled_hypervisors)
12866

    
12867
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12868

    
12869
    config_ndata = self._ComputeBasicNodeData(ninfo)
12870
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12871
                                                 i_list, config_ndata)
12872
    assert len(data["nodes"]) == len(ninfo), \
12873
        "Incomplete node data computed"
12874

    
12875
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12876

    
12877
    self.in_data = data
12878

    
12879
  @staticmethod
12880
  def _ComputeNodeGroupData(cfg):
12881
    """Compute node groups data.
12882

12883
    """
12884
    ng = dict((guuid, {
12885
      "name": gdata.name,
12886
      "alloc_policy": gdata.alloc_policy,
12887
      })
12888
      for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12889

    
12890
    return ng
12891

    
12892
  @staticmethod
12893
  def _ComputeBasicNodeData(node_cfg):
12894
    """Compute global node data.
12895

12896
    @rtype: dict
12897
    @returns: a dict of name: (node dict, node config)
12898

12899
    """
12900
    # fill in static (config-based) values
12901
    node_results = dict((ninfo.name, {
12902
      "tags": list(ninfo.GetTags()),
12903
      "primary_ip": ninfo.primary_ip,
12904
      "secondary_ip": ninfo.secondary_ip,
12905
      "offline": ninfo.offline,
12906
      "drained": ninfo.drained,
12907
      "master_candidate": ninfo.master_candidate,
12908
      "group": ninfo.group,
12909
      "master_capable": ninfo.master_capable,
12910
      "vm_capable": ninfo.vm_capable,
12911
      })
12912
      for ninfo in node_cfg.values())
12913

    
12914
    return node_results
12915

    
12916
  @staticmethod
12917
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12918
                              node_results):
12919
    """Compute global node data.
12920

12921
    @param node_results: the basic node structures as filled from the config
12922

12923
    """
12924
    # make a copy of the current dict
12925
    node_results = dict(node_results)
12926
    for nname, nresult in node_data.items():
12927
      assert nname in node_results, "Missing basic data for node %s" % nname
12928
      ninfo = node_cfg[nname]
12929

    
12930
      if not (ninfo.offline or ninfo.drained):
12931
        nresult.Raise("Can't get data for node %s" % nname)
12932
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12933
                                nname)
12934
        remote_info = nresult.payload
12935

    
12936
        for attr in ["memory_total", "memory_free", "memory_dom0",
12937
                     "vg_size", "vg_free", "cpu_total"]:
12938
          if attr not in remote_info:
12939
            raise errors.OpExecError("Node '%s' didn't return attribute"
12940
                                     " '%s'" % (nname, attr))
12941
          if not isinstance(remote_info[attr], int):
12942
            raise errors.OpExecError("Node '%s' returned invalid value"
12943
                                     " for '%s': %s" %
12944
                                     (nname, attr, remote_info[attr]))
12945
        # compute memory used by primary instances
12946
        i_p_mem = i_p_up_mem = 0
12947
        for iinfo, beinfo in i_list:
12948
          if iinfo.primary_node == nname:
12949
            i_p_mem += beinfo[constants.BE_MEMORY]
12950
            if iinfo.name not in node_iinfo[nname].payload:
12951
              i_used_mem = 0
12952
            else:
12953
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12954
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12955
            remote_info["memory_free"] -= max(0, i_mem_diff)
12956

    
12957
            if iinfo.admin_up:
12958
              i_p_up_mem += beinfo[constants.BE_MEMORY]
12959

    
12960
        # compute memory used by instances
12961
        pnr_dyn = {
12962
          "total_memory": remote_info["memory_total"],
12963
          "reserved_memory": remote_info["memory_dom0"],
12964
          "free_memory": remote_info["memory_free"],
12965
          "total_disk": remote_info["vg_size"],
12966
          "free_disk": remote_info["vg_free"],
12967
          "total_cpus": remote_info["cpu_total"],
12968
          "i_pri_memory": i_p_mem,
12969
          "i_pri_up_memory": i_p_up_mem,
12970
          }
12971
        pnr_dyn.update(node_results[nname])
12972
        node_results[nname] = pnr_dyn
12973

    
12974
    return node_results
12975

    
12976
  @staticmethod
12977
  def _ComputeInstanceData(cluster_info, i_list):
12978
    """Compute global instance data.
12979

12980
    """
12981
    instance_data = {}
12982
    for iinfo, beinfo in i_list:
12983
      nic_data = []
12984
      for nic in iinfo.nics:
12985
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12986
        nic_dict = {
12987
          "mac": nic.mac,
12988
          "ip": nic.ip,
12989
          "mode": filled_params[constants.NIC_MODE],
12990
          "link": filled_params[constants.NIC_LINK],
12991
          }
12992
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12993
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12994
        nic_data.append(nic_dict)
12995
      pir = {
12996
        "tags": list(iinfo.GetTags()),
12997
        "admin_up": iinfo.admin_up,
12998
        "vcpus": beinfo[constants.BE_VCPUS],
12999
        "memory": beinfo[constants.BE_MEMORY],
13000
        "os": iinfo.os,
13001
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
13002
        "nics": nic_data,
13003
        "disks": [{constants.IDISK_SIZE: dsk.size,
13004
                   constants.IDISK_MODE: dsk.mode}
13005
                  for dsk in iinfo.disks],
13006
        "disk_template": iinfo.disk_template,
13007
        "hypervisor": iinfo.hypervisor,
13008
        }
13009
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
13010
                                                 pir["disks"])
13011
      instance_data[iinfo.name] = pir
13012

    
13013
    return instance_data
13014

    
13015
  def _AddNewInstance(self):
13016
    """Add new instance data to allocator structure.
13017

13018
    This in combination with _AllocatorGetClusterData will create the
13019
    correct structure needed as input for the allocator.
13020

13021
    The checks for the completeness of the opcode must have already been
13022
    done.
13023

13024
    """
13025
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
13026

    
13027
    if self.disk_template in constants.DTS_INT_MIRROR:
13028
      self.required_nodes = 2
13029
    else:
13030
      self.required_nodes = 1
13031

    
13032
    request = {
13033
      "name": self.name,
13034
      "disk_template": self.disk_template,
13035
      "tags": self.tags,
13036
      "os": self.os,
13037
      "vcpus": self.vcpus,
13038
      "memory": self.memory,
13039
      "disks": self.disks,
13040
      "disk_space_total": disk_space,
13041
      "nics": self.nics,
13042
      "required_nodes": self.required_nodes,
13043
      "hypervisor": self.hypervisor,
13044
      }
13045

    
13046
    return request
13047

    
13048
  def _AddRelocateInstance(self):
13049
    """Add relocate instance data to allocator structure.
13050

13051
    This in combination with _IAllocatorGetClusterData will create the
13052
    correct structure needed as input for the allocator.
13053

13054
    The checks for the completeness of the opcode must have already been
13055
    done.
13056

13057
    """
13058
    instance = self.cfg.GetInstanceInfo(self.name)
13059
    if instance is None:
13060
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
13061
                                   " IAllocator" % self.name)
13062

    
13063
    if instance.disk_template not in constants.DTS_MIRRORED:
13064
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
13065
                                 errors.ECODE_INVAL)
13066

    
13067
    if instance.disk_template in constants.DTS_INT_MIRROR and \
13068
        len(instance.secondary_nodes) != 1:
13069
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
13070
                                 errors.ECODE_STATE)
13071

    
13072
    self.required_nodes = 1
13073
    disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
13074
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
13075

    
13076
    request = {
13077
      "name": self.name,
13078
      "disk_space_total": disk_space,
13079
      "required_nodes": self.required_nodes,
13080
      "relocate_from": self.relocate_from,
13081
      }
13082
    return request
13083

    
13084
  def _AddNodeEvacuate(self):
13085
    """Get data for node-evacuate requests.
13086

13087
    """
13088
    return {
13089
      "instances": self.instances,
13090
      "evac_mode": self.evac_mode,
13091
      }
13092

    
13093
  def _AddChangeGroup(self):
13094
    """Get data for node-evacuate requests.
13095

13096
    """
13097
    return {
13098
      "instances": self.instances,
13099
      "target_groups": self.target_groups,
13100
      }
13101

    
13102
  def _BuildInputData(self, fn, keydata):
13103
    """Build input data structures.
13104

13105
    """
13106
    self._ComputeClusterData()
13107

    
13108
    request = fn()
13109
    request["type"] = self.mode
13110
    for keyname, keytype in keydata:
13111
      if keyname not in request:
13112
        raise errors.ProgrammerError("Request parameter %s is missing" %
13113
                                     keyname)
13114
      val = request[keyname]
13115
      if not keytype(val):
13116
        raise errors.ProgrammerError("Request parameter %s doesn't pass"
13117
                                     " validation, value %s, expected"
13118
                                     " type %s" % (keyname, val, keytype))
13119
    self.in_data["request"] = request
13120

    
13121
    self.in_text = serializer.Dump(self.in_data)
13122

    
13123
  _STRING_LIST = ht.TListOf(ht.TString)
13124
  _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
13125
     # pylint: disable-msg=E1101
13126
     # Class '...' has no 'OP_ID' member
13127
     "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
13128
                          opcodes.OpInstanceMigrate.OP_ID,
13129
                          opcodes.OpInstanceReplaceDisks.OP_ID])
13130
     })))
13131

    
13132
  _NEVAC_MOVED = \
13133
    ht.TListOf(ht.TAnd(ht.TIsLength(3),
13134
                       ht.TItems([ht.TNonEmptyString,
13135
                                  ht.TNonEmptyString,
13136
                                  ht.TListOf(ht.TNonEmptyString),
13137
                                 ])))
13138
  _NEVAC_FAILED = \
13139
    ht.TListOf(ht.TAnd(ht.TIsLength(2),
13140
                       ht.TItems([ht.TNonEmptyString,
13141
                                  ht.TMaybeString,
13142
                                 ])))
13143
  _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
13144
                          ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
13145

    
13146
  _MODE_DATA = {
13147
    constants.IALLOCATOR_MODE_ALLOC:
13148
      (_AddNewInstance,
13149
       [
13150
        ("name", ht.TString),
13151
        ("memory", ht.TInt),
13152
        ("disks", ht.TListOf(ht.TDict)),
13153
        ("disk_template", ht.TString),
13154
        ("os", ht.TString),
13155
        ("tags", _STRING_LIST),
13156
        ("nics", ht.TListOf(ht.TDict)),
13157
        ("vcpus", ht.TInt),
13158
        ("hypervisor", ht.TString),
13159
        ], ht.TList),
13160
    constants.IALLOCATOR_MODE_RELOC:
13161
      (_AddRelocateInstance,
13162
       [("name", ht.TString), ("relocate_from", _STRING_LIST)],
13163
       ht.TList),
13164
     constants.IALLOCATOR_MODE_NODE_EVAC:
13165
      (_AddNodeEvacuate, [
13166
        ("instances", _STRING_LIST),
13167
        ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
13168
        ], _NEVAC_RESULT),
13169
     constants.IALLOCATOR_MODE_CHG_GROUP:
13170
      (_AddChangeGroup, [
13171
        ("instances", _STRING_LIST),
13172
        ("target_groups", _STRING_LIST),
13173
        ], _NEVAC_RESULT),
13174
    }
13175

    
13176
  def Run(self, name, validate=True, call_fn=None):
13177
    """Run an instance allocator and return the results.
13178

13179
    """
13180
    if call_fn is None:
13181
      call_fn = self.rpc.call_iallocator_runner
13182

    
13183
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13184
    result.Raise("Failure while running the iallocator script")
13185

    
13186
    self.out_text = result.payload
13187
    if validate:
13188
      self._ValidateResult()
13189

    
13190
  def _ValidateResult(self):
13191
    """Process the allocator results.
13192

13193
    This will process and if successful save the result in
13194
    self.out_data and the other parameters.
13195

13196
    """
13197
    try:
13198
      rdict = serializer.Load(self.out_text)
13199
    except Exception, err:
13200
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13201

    
13202
    if not isinstance(rdict, dict):
13203
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
13204

    
13205
    # TODO: remove backwards compatiblity in later versions
13206
    if "nodes" in rdict and "result" not in rdict:
13207
      rdict["result"] = rdict["nodes"]
13208
      del rdict["nodes"]
13209

    
13210
    for key in "success", "info", "result":
13211
      if key not in rdict:
13212
        raise errors.OpExecError("Can't parse iallocator results:"
13213
                                 " missing key '%s'" % key)
13214
      setattr(self, key, rdict[key])
13215

    
13216
    if not self._result_check(self.result):
13217
      raise errors.OpExecError("Iallocator returned invalid result,"
13218
                               " expected %s, got %s" %
13219
                               (self._result_check, self.result),
13220
                               errors.ECODE_INVAL)
13221

    
13222
    if self.mode == constants.IALLOCATOR_MODE_RELOC:
13223
      assert self.relocate_from is not None
13224
      assert self.required_nodes == 1
13225

    
13226
      node2group = dict((name, ndata["group"])
13227
                        for (name, ndata) in self.in_data["nodes"].items())
13228

    
13229
      fn = compat.partial(self._NodesToGroups, node2group,
13230
                          self.in_data["nodegroups"])
13231

    
13232
      instance = self.cfg.GetInstanceInfo(self.name)
13233
      request_groups = fn(self.relocate_from + [instance.primary_node])
13234
      result_groups = fn(rdict["result"] + [instance.primary_node])
13235

    
13236
      if self.success and not set(result_groups).issubset(request_groups):
13237
        raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13238
                                 " differ from original groups (%s)" %
13239
                                 (utils.CommaJoin(result_groups),
13240
                                  utils.CommaJoin(request_groups)))
13241

    
13242
    elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13243
      assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13244

    
13245
    self.out_data = rdict
13246

    
13247
  @staticmethod
13248
  def _NodesToGroups(node2group, groups, nodes):
13249
    """Returns a list of unique group names for a list of nodes.
13250

13251
    @type node2group: dict
13252
    @param node2group: Map from node name to group UUID
13253
    @type groups: dict
13254
    @param groups: Group information
13255
    @type nodes: list
13256
    @param nodes: Node names
13257

13258
    """
13259
    result = set()
13260

    
13261
    for node in nodes:
13262
      try:
13263
        group_uuid = node2group[node]
13264
      except KeyError:
13265
        # Ignore unknown node
13266
        pass
13267
      else:
13268
        try:
13269
          group = groups[group_uuid]
13270
        except KeyError:
13271
          # Can't find group, let's use UUID
13272
          group_name = group_uuid
13273
        else:
13274
          group_name = group["name"]
13275

    
13276
        result.add(group_name)
13277

    
13278
    return sorted(result)
13279

    
13280

    
13281
class LUTestAllocator(NoHooksLU):
13282
  """Run allocator tests.
13283

13284
  This LU runs the allocator tests
13285

13286
  """
13287
  def CheckPrereq(self):
13288
    """Check prerequisites.
13289

13290
    This checks the opcode parameters depending on the director and mode test.
13291

13292
    """
13293
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13294
      for attr in ["memory", "disks", "disk_template",
13295
                   "os", "tags", "nics", "vcpus"]:
13296
        if not hasattr(self.op, attr):
13297
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13298
                                     attr, errors.ECODE_INVAL)
13299
      iname = self.cfg.ExpandInstanceName(self.op.name)
13300
      if iname is not None:
13301
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13302
                                   iname, errors.ECODE_EXISTS)
13303
      if not isinstance(self.op.nics, list):
13304
        raise errors.OpPrereqError("Invalid parameter 'nics'",
13305
                                   errors.ECODE_INVAL)
13306
      if not isinstance(self.op.disks, list):
13307
        raise errors.OpPrereqError("Invalid parameter 'disks'",
13308
                                   errors.ECODE_INVAL)
13309
      for row in self.op.disks:
13310
        if (not isinstance(row, dict) or
13311
            constants.IDISK_SIZE not in row or
13312
            not isinstance(row[constants.IDISK_SIZE], int) or
13313
            constants.IDISK_MODE not in row or
13314
            row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13315
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
13316
                                     " parameter", errors.ECODE_INVAL)
13317
      if self.op.hypervisor is None:
13318
        self.op.hypervisor = self.cfg.GetHypervisorType()
13319
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13320
      fname = _ExpandInstanceName(self.cfg, self.op.name)
13321
      self.op.name = fname
13322
      self.relocate_from = \
13323
          list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13324
    elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13325
                          constants.IALLOCATOR_MODE_NODE_EVAC):
13326
      if not self.op.instances:
13327
        raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13328
      self.op.instances = _GetWantedInstances(self, self.op.instances)
13329
    else:
13330
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13331
                                 self.op.mode, errors.ECODE_INVAL)
13332

    
13333
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13334
      if self.op.allocator is None:
13335
        raise errors.OpPrereqError("Missing allocator name",
13336
                                   errors.ECODE_INVAL)
13337
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13338
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
13339
                                 self.op.direction, errors.ECODE_INVAL)
13340

    
13341
  def Exec(self, feedback_fn):
13342
    """Run the allocator test.
13343

13344
    """
13345
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13346
      ial = IAllocator(self.cfg, self.rpc,
13347
                       mode=self.op.mode,
13348
                       name=self.op.name,
13349
                       memory=self.op.memory,
13350
                       disks=self.op.disks,
13351
                       disk_template=self.op.disk_template,
13352
                       os=self.op.os,
13353
                       tags=self.op.tags,
13354
                       nics=self.op.nics,
13355
                       vcpus=self.op.vcpus,
13356
                       hypervisor=self.op.hypervisor,
13357
                       )
13358
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13359
      ial = IAllocator(self.cfg, self.rpc,
13360
                       mode=self.op.mode,
13361
                       name=self.op.name,
13362
                       relocate_from=list(self.relocate_from),
13363
                       )
13364
    elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13365
      ial = IAllocator(self.cfg, self.rpc,
13366
                       mode=self.op.mode,
13367
                       instances=self.op.instances,
13368
                       target_groups=self.op.target_groups)
13369
    elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13370
      ial = IAllocator(self.cfg, self.rpc,
13371
                       mode=self.op.mode,
13372
                       instances=self.op.instances,
13373
                       evac_mode=self.op.evac_mode)
13374
    else:
13375
      raise errors.ProgrammerError("Uncatched mode %s in"
13376
                                   " LUTestAllocator.Exec", self.op.mode)
13377

    
13378
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
13379
      result = ial.in_text
13380
    else:
13381
      ial.Run(self.op.allocator, validate=False)
13382
      result = ial.out_text
13383
    return result
13384

    
13385

    
13386
#: Query type implementations
13387
_QUERY_IMPL = {
13388
  constants.QR_INSTANCE: _InstanceQuery,
13389
  constants.QR_NODE: _NodeQuery,
13390
  constants.QR_GROUP: _GroupQuery,
13391
  constants.QR_OS: _OsQuery,
13392
  }
13393

    
13394
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13395

    
13396

    
13397
def _GetQueryImplementation(name):
13398
  """Returns the implemtnation for a query type.
13399

13400
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
13401

13402
  """
13403
  try:
13404
    return _QUERY_IMPL[name]
13405
  except KeyError:
13406
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13407
                               errors.ECODE_INVAL)