Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ abd66bf8

History | View | Annotate | Download (412.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201,C0302
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
# C0302: since we have waaaay to many lines in this module
30

    
31
import os
32
import os.path
33
import time
34
import re
35
import platform
36
import logging
37
import copy
38
import OpenSSL
39
import socket
40
import tempfile
41
import shutil
42
import itertools
43

    
44
from ganeti import ssh
45
from ganeti import utils
46
from ganeti import errors
47
from ganeti import hypervisor
48
from ganeti import locking
49
from ganeti import constants
50
from ganeti import objects
51
from ganeti import serializer
52
from ganeti import ssconf
53
from ganeti import uidpool
54
from ganeti import compat
55
from ganeti import masterd
56
from ganeti import netutils
57
from ganeti import query
58
from ganeti import qlang
59
from ganeti import opcodes
60

    
61
import ganeti.masterd.instance # pylint: disable-msg=W0611
62

    
63

    
64
def _SupportsOob(cfg, node):
65
  """Tells if node supports OOB.
66

67
  @type cfg: L{config.ConfigWriter}
68
  @param cfg: The cluster configuration
69
  @type node: L{objects.Node}
70
  @param node: The node
71
  @return: The OOB script if supported or an empty string otherwise
72

73
  """
74
  return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
75

    
76

    
77
# End types
78
class LogicalUnit(object):
79
  """Logical Unit base class.
80

81
  Subclasses must follow these rules:
82
    - implement ExpandNames
83
    - implement CheckPrereq (except when tasklets are used)
84
    - implement Exec (except when tasklets are used)
85
    - implement BuildHooksEnv
86
    - redefine HPATH and HTYPE
87
    - optionally redefine their run requirements:
88
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
89

90
  Note that all commands require root permissions.
91

92
  @ivar dry_run_result: the value (if any) that will be returned to the caller
93
      in dry-run mode (signalled by opcode dry_run parameter)
94

95
  """
96
  HPATH = None
97
  HTYPE = None
98
  REQ_BGL = True
99

    
100
  def __init__(self, processor, op, context, rpc):
101
    """Constructor for LogicalUnit.
102

103
    This needs to be overridden in derived classes in order to check op
104
    validity.
105

106
    """
107
    self.proc = processor
108
    self.op = op
109
    self.cfg = context.cfg
110
    self.context = context
111
    self.rpc = rpc
112
    # Dicts used to declare locking needs to mcpu
113
    self.needed_locks = None
114
    self.acquired_locks = {}
115
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
116
    self.add_locks = {}
117
    self.remove_locks = {}
118
    # Used to force good behavior when calling helper functions
119
    self.recalculate_locks = {}
120
    self.__ssh = None
121
    # logging
122
    self.Log = processor.Log # pylint: disable-msg=C0103
123
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
124
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
125
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
126
    # support for dry-run
127
    self.dry_run_result = None
128
    # support for generic debug attribute
129
    if (not hasattr(self.op, "debug_level") or
130
        not isinstance(self.op.debug_level, int)):
131
      self.op.debug_level = 0
132

    
133
    # Tasklets
134
    self.tasklets = None
135

    
136
    # Validate opcode parameters and set defaults
137
    self.op.Validate(True)
138

    
139
    self.CheckArguments()
140

    
141
  def __GetSSH(self):
142
    """Returns the SshRunner object
143

144
    """
145
    if not self.__ssh:
146
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
147
    return self.__ssh
148

    
149
  ssh = property(fget=__GetSSH)
150

    
151
  def CheckArguments(self):
152
    """Check syntactic validity for the opcode arguments.
153

154
    This method is for doing a simple syntactic check and ensure
155
    validity of opcode parameters, without any cluster-related
156
    checks. While the same can be accomplished in ExpandNames and/or
157
    CheckPrereq, doing these separate is better because:
158

159
      - ExpandNames is left as as purely a lock-related function
160
      - CheckPrereq is run after we have acquired locks (and possible
161
        waited for them)
162

163
    The function is allowed to change the self.op attribute so that
164
    later methods can no longer worry about missing parameters.
165

166
    """
167
    pass
168

    
169
  def ExpandNames(self):
170
    """Expand names for this LU.
171

172
    This method is called before starting to execute the opcode, and it should
173
    update all the parameters of the opcode to their canonical form (e.g. a
174
    short node name must be fully expanded after this method has successfully
175
    completed). This way locking, hooks, logging, etc. can work correctly.
176

177
    LUs which implement this method must also populate the self.needed_locks
178
    member, as a dict with lock levels as keys, and a list of needed lock names
179
    as values. Rules:
180

181
      - use an empty dict if you don't need any lock
182
      - if you don't need any lock at a particular level omit that level
183
      - don't put anything for the BGL level
184
      - if you want all locks at a level use locking.ALL_SET as a value
185

186
    If you need to share locks (rather than acquire them exclusively) at one
187
    level you can modify self.share_locks, setting a true value (usually 1) for
188
    that level. By default locks are not shared.
189

190
    This function can also define a list of tasklets, which then will be
191
    executed in order instead of the usual LU-level CheckPrereq and Exec
192
    functions, if those are not defined by the LU.
193

194
    Examples::
195

196
      # Acquire all nodes and one instance
197
      self.needed_locks = {
198
        locking.LEVEL_NODE: locking.ALL_SET,
199
        locking.LEVEL_INSTANCE: ['instance1.example.com'],
200
      }
201
      # Acquire just two nodes
202
      self.needed_locks = {
203
        locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
204
      }
205
      # Acquire no locks
206
      self.needed_locks = {} # No, you can't leave it to the default value None
207

208
    """
209
    # The implementation of this method is mandatory only if the new LU is
210
    # concurrent, so that old LUs don't need to be changed all at the same
211
    # time.
212
    if self.REQ_BGL:
213
      self.needed_locks = {} # Exclusive LUs don't need locks.
214
    else:
215
      raise NotImplementedError
216

    
217
  def DeclareLocks(self, level):
218
    """Declare LU locking needs for a level
219

220
    While most LUs can just declare their locking needs at ExpandNames time,
221
    sometimes there's the need to calculate some locks after having acquired
222
    the ones before. This function is called just before acquiring locks at a
223
    particular level, but after acquiring the ones at lower levels, and permits
224
    such calculations. It can be used to modify self.needed_locks, and by
225
    default it does nothing.
226

227
    This function is only called if you have something already set in
228
    self.needed_locks for the level.
229

230
    @param level: Locking level which is going to be locked
231
    @type level: member of ganeti.locking.LEVELS
232

233
    """
234

    
235
  def CheckPrereq(self):
236
    """Check prerequisites for this LU.
237

238
    This method should check that the prerequisites for the execution
239
    of this LU are fulfilled. It can do internode communication, but
240
    it should be idempotent - no cluster or system changes are
241
    allowed.
242

243
    The method should raise errors.OpPrereqError in case something is
244
    not fulfilled. Its return value is ignored.
245

246
    This method should also update all the parameters of the opcode to
247
    their canonical form if it hasn't been done by ExpandNames before.
248

249
    """
250
    if self.tasklets is not None:
251
      for (idx, tl) in enumerate(self.tasklets):
252
        logging.debug("Checking prerequisites for tasklet %s/%s",
253
                      idx + 1, len(self.tasklets))
254
        tl.CheckPrereq()
255
    else:
256
      pass
257

    
258
  def Exec(self, feedback_fn):
259
    """Execute the LU.
260

261
    This method should implement the actual work. It should raise
262
    errors.OpExecError for failures that are somewhat dealt with in
263
    code, or expected.
264

265
    """
266
    if self.tasklets is not None:
267
      for (idx, tl) in enumerate(self.tasklets):
268
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
269
        tl.Exec(feedback_fn)
270
    else:
271
      raise NotImplementedError
272

    
273
  def BuildHooksEnv(self):
274
    """Build hooks environment for this LU.
275

276
    This method should return a three-node tuple consisting of: a dict
277
    containing the environment that will be used for running the
278
    specific hook for this LU, a list of node names on which the hook
279
    should run before the execution, and a list of node names on which
280
    the hook should run after the execution.
281

282
    The keys of the dict must not have 'GANETI_' prefixed as this will
283
    be handled in the hooks runner. Also note additional keys will be
284
    added by the hooks runner. If the LU doesn't define any
285
    environment, an empty dict (and not None) should be returned.
286

287
    No nodes should be returned as an empty list (and not None).
288

289
    Note that if the HPATH for a LU class is None, this function will
290
    not be called.
291

292
    """
293
    raise NotImplementedError
294

    
295
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
296
    """Notify the LU about the results of its hooks.
297

298
    This method is called every time a hooks phase is executed, and notifies
299
    the Logical Unit about the hooks' result. The LU can then use it to alter
300
    its result based on the hooks.  By default the method does nothing and the
301
    previous result is passed back unchanged but any LU can define it if it
302
    wants to use the local cluster hook-scripts somehow.
303

304
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
305
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
306
    @param hook_results: the results of the multi-node hooks rpc call
307
    @param feedback_fn: function used send feedback back to the caller
308
    @param lu_result: the previous Exec result this LU had, or None
309
        in the PRE phase
310
    @return: the new Exec result, based on the previous result
311
        and hook results
312

313
    """
314
    # API must be kept, thus we ignore the unused argument and could
315
    # be a function warnings
316
    # pylint: disable-msg=W0613,R0201
317
    return lu_result
318

    
319
  def _ExpandAndLockInstance(self):
320
    """Helper function to expand and lock an instance.
321

322
    Many LUs that work on an instance take its name in self.op.instance_name
323
    and need to expand it and then declare the expanded name for locking. This
324
    function does it, and then updates self.op.instance_name to the expanded
325
    name. It also initializes needed_locks as a dict, if this hasn't been done
326
    before.
327

328
    """
329
    if self.needed_locks is None:
330
      self.needed_locks = {}
331
    else:
332
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
333
        "_ExpandAndLockInstance called with instance-level locks set"
334
    self.op.instance_name = _ExpandInstanceName(self.cfg,
335
                                                self.op.instance_name)
336
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
337

    
338
  def _LockInstancesNodes(self, primary_only=False):
339
    """Helper function to declare instances' nodes for locking.
340

341
    This function should be called after locking one or more instances to lock
342
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
343
    with all primary or secondary nodes for instances already locked and
344
    present in self.needed_locks[locking.LEVEL_INSTANCE].
345

346
    It should be called from DeclareLocks, and for safety only works if
347
    self.recalculate_locks[locking.LEVEL_NODE] is set.
348

349
    In the future it may grow parameters to just lock some instance's nodes, or
350
    to just lock primaries or secondary nodes, if needed.
351

352
    If should be called in DeclareLocks in a way similar to::
353

354
      if level == locking.LEVEL_NODE:
355
        self._LockInstancesNodes()
356

357
    @type primary_only: boolean
358
    @param primary_only: only lock primary nodes of locked instances
359

360
    """
361
    assert locking.LEVEL_NODE in self.recalculate_locks, \
362
      "_LockInstancesNodes helper function called with no nodes to recalculate"
363

    
364
    # TODO: check if we're really been called with the instance locks held
365

    
366
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
367
    # future we might want to have different behaviors depending on the value
368
    # of self.recalculate_locks[locking.LEVEL_NODE]
369
    wanted_nodes = []
370
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
371
      instance = self.context.cfg.GetInstanceInfo(instance_name)
372
      wanted_nodes.append(instance.primary_node)
373
      if not primary_only:
374
        wanted_nodes.extend(instance.secondary_nodes)
375

    
376
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
377
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
378
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
379
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
380

    
381
    del self.recalculate_locks[locking.LEVEL_NODE]
382

    
383

    
384
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
385
  """Simple LU which runs no hooks.
386

387
  This LU is intended as a parent for other LogicalUnits which will
388
  run no hooks, in order to reduce duplicate code.
389

390
  """
391
  HPATH = None
392
  HTYPE = None
393

    
394
  def BuildHooksEnv(self):
395
    """Empty BuildHooksEnv for NoHooksLu.
396

397
    This just raises an error.
398

399
    """
400
    assert False, "BuildHooksEnv called for NoHooksLUs"
401

    
402

    
403
class Tasklet:
404
  """Tasklet base class.
405

406
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
407
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
408
  tasklets know nothing about locks.
409

410
  Subclasses must follow these rules:
411
    - Implement CheckPrereq
412
    - Implement Exec
413

414
  """
415
  def __init__(self, lu):
416
    self.lu = lu
417

    
418
    # Shortcuts
419
    self.cfg = lu.cfg
420
    self.rpc = lu.rpc
421

    
422
  def CheckPrereq(self):
423
    """Check prerequisites for this tasklets.
424

425
    This method should check whether the prerequisites for the execution of
426
    this tasklet are fulfilled. It can do internode communication, but it
427
    should be idempotent - no cluster or system changes are allowed.
428

429
    The method should raise errors.OpPrereqError in case something is not
430
    fulfilled. Its return value is ignored.
431

432
    This method should also update all parameters to their canonical form if it
433
    hasn't been done before.
434

435
    """
436
    pass
437

    
438
  def Exec(self, feedback_fn):
439
    """Execute the tasklet.
440

441
    This method should implement the actual work. It should raise
442
    errors.OpExecError for failures that are somewhat dealt with in code, or
443
    expected.
444

445
    """
446
    raise NotImplementedError
447

    
448

    
449
class _QueryBase:
450
  """Base for query utility classes.
451

452
  """
453
  #: Attribute holding field definitions
454
  FIELDS = None
455

    
456
  def __init__(self, filter_, fields, use_locking):
457
    """Initializes this class.
458

459
    """
460
    self.use_locking = use_locking
461

    
462
    self.query = query.Query(self.FIELDS, fields, filter_=filter_,
463
                             namefield="name")
464
    self.requested_data = self.query.RequestedData()
465
    self.names = self.query.RequestedNames()
466

    
467
    # Sort only if no names were requested
468
    self.sort_by_name = not self.names
469

    
470
    self.do_locking = None
471
    self.wanted = None
472

    
473
  def _GetNames(self, lu, all_names, lock_level):
474
    """Helper function to determine names asked for in the query.
475

476
    """
477
    if self.do_locking:
478
      names = lu.acquired_locks[lock_level]
479
    else:
480
      names = all_names
481

    
482
    if self.wanted == locking.ALL_SET:
483
      assert not self.names
484
      # caller didn't specify names, so ordering is not important
485
      return utils.NiceSort(names)
486

    
487
    # caller specified names and we must keep the same order
488
    assert self.names
489
    assert not self.do_locking or lu.acquired_locks[lock_level]
490

    
491
    missing = set(self.wanted).difference(names)
492
    if missing:
493
      raise errors.OpExecError("Some items were removed before retrieving"
494
                               " their data: %s" % missing)
495

    
496
    # Return expanded names
497
    return self.wanted
498

    
499
  @classmethod
500
  def FieldsQuery(cls, fields):
501
    """Returns list of available fields.
502

503
    @return: List of L{objects.QueryFieldDefinition}
504

505
    """
506
    return query.QueryFields(cls.FIELDS, fields)
507

    
508
  def ExpandNames(self, lu):
509
    """Expand names for this query.
510

511
    See L{LogicalUnit.ExpandNames}.
512

513
    """
514
    raise NotImplementedError()
515

    
516
  def DeclareLocks(self, lu, level):
517
    """Declare locks for this query.
518

519
    See L{LogicalUnit.DeclareLocks}.
520

521
    """
522
    raise NotImplementedError()
523

    
524
  def _GetQueryData(self, lu):
525
    """Collects all data for this query.
526

527
    @return: Query data object
528

529
    """
530
    raise NotImplementedError()
531

    
532
  def NewStyleQuery(self, lu):
533
    """Collect data and execute query.
534

535
    """
536
    return query.GetQueryResponse(self.query, self._GetQueryData(lu),
537
                                  sort_by_name=self.sort_by_name)
538

    
539
  def OldStyleQuery(self, lu):
540
    """Collect data and execute query.
541

542
    """
543
    return self.query.OldStyleQuery(self._GetQueryData(lu),
544
                                    sort_by_name=self.sort_by_name)
545

    
546

    
547
def _GetWantedNodes(lu, nodes):
548
  """Returns list of checked and expanded node names.
549

550
  @type lu: L{LogicalUnit}
551
  @param lu: the logical unit on whose behalf we execute
552
  @type nodes: list
553
  @param nodes: list of node names or None for all nodes
554
  @rtype: list
555
  @return: the list of nodes, sorted
556
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
557

558
  """
559
  if nodes:
560
    return [_ExpandNodeName(lu.cfg, name) for name in nodes]
561

    
562
  return utils.NiceSort(lu.cfg.GetNodeList())
563

    
564

    
565
def _GetWantedInstances(lu, instances):
566
  """Returns list of checked and expanded instance names.
567

568
  @type lu: L{LogicalUnit}
569
  @param lu: the logical unit on whose behalf we execute
570
  @type instances: list
571
  @param instances: list of instance names or None for all instances
572
  @rtype: list
573
  @return: the list of instances, sorted
574
  @raise errors.OpPrereqError: if the instances parameter is wrong type
575
  @raise errors.OpPrereqError: if any of the passed instances is not found
576

577
  """
578
  if instances:
579
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
580
  else:
581
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
582
  return wanted
583

    
584

    
585
def _GetUpdatedParams(old_params, update_dict,
586
                      use_default=True, use_none=False):
587
  """Return the new version of a parameter dictionary.
588

589
  @type old_params: dict
590
  @param old_params: old parameters
591
  @type update_dict: dict
592
  @param update_dict: dict containing new parameter values, or
593
      constants.VALUE_DEFAULT to reset the parameter to its default
594
      value
595
  @param use_default: boolean
596
  @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
597
      values as 'to be deleted' values
598
  @param use_none: boolean
599
  @type use_none: whether to recognise C{None} values as 'to be
600
      deleted' values
601
  @rtype: dict
602
  @return: the new parameter dictionary
603

604
  """
605
  params_copy = copy.deepcopy(old_params)
606
  for key, val in update_dict.iteritems():
607
    if ((use_default and val == constants.VALUE_DEFAULT) or
608
        (use_none and val is None)):
609
      try:
610
        del params_copy[key]
611
      except KeyError:
612
        pass
613
    else:
614
      params_copy[key] = val
615
  return params_copy
616

    
617

    
618
def _CheckOutputFields(static, dynamic, selected):
619
  """Checks whether all selected fields are valid.
620

621
  @type static: L{utils.FieldSet}
622
  @param static: static fields set
623
  @type dynamic: L{utils.FieldSet}
624
  @param dynamic: dynamic fields set
625

626
  """
627
  f = utils.FieldSet()
628
  f.Extend(static)
629
  f.Extend(dynamic)
630

    
631
  delta = f.NonMatching(selected)
632
  if delta:
633
    raise errors.OpPrereqError("Unknown output fields selected: %s"
634
                               % ",".join(delta), errors.ECODE_INVAL)
635

    
636

    
637
def _CheckGlobalHvParams(params):
638
  """Validates that given hypervisor params are not global ones.
639

640
  This will ensure that instances don't get customised versions of
641
  global params.
642

643
  """
644
  used_globals = constants.HVC_GLOBALS.intersection(params)
645
  if used_globals:
646
    msg = ("The following hypervisor parameters are global and cannot"
647
           " be customized at instance level, please modify them at"
648
           " cluster level: %s" % utils.CommaJoin(used_globals))
649
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
650

    
651

    
652
def _CheckNodeOnline(lu, node, msg=None):
653
  """Ensure that a given node is online.
654

655
  @param lu: the LU on behalf of which we make the check
656
  @param node: the node to check
657
  @param msg: if passed, should be a message to replace the default one
658
  @raise errors.OpPrereqError: if the node is offline
659

660
  """
661
  if msg is None:
662
    msg = "Can't use offline node"
663
  if lu.cfg.GetNodeInfo(node).offline:
664
    raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
665

    
666

    
667
def _CheckNodeNotDrained(lu, node):
668
  """Ensure that a given node is not drained.
669

670
  @param lu: the LU on behalf of which we make the check
671
  @param node: the node to check
672
  @raise errors.OpPrereqError: if the node is drained
673

674
  """
675
  if lu.cfg.GetNodeInfo(node).drained:
676
    raise errors.OpPrereqError("Can't use drained node %s" % node,
677
                               errors.ECODE_STATE)
678

    
679

    
680
def _CheckNodeVmCapable(lu, node):
681
  """Ensure that a given node is vm capable.
682

683
  @param lu: the LU on behalf of which we make the check
684
  @param node: the node to check
685
  @raise errors.OpPrereqError: if the node is not vm capable
686

687
  """
688
  if not lu.cfg.GetNodeInfo(node).vm_capable:
689
    raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
690
                               errors.ECODE_STATE)
691

    
692

    
693
def _CheckNodeHasOS(lu, node, os_name, force_variant):
694
  """Ensure that a node supports a given OS.
695

696
  @param lu: the LU on behalf of which we make the check
697
  @param node: the node to check
698
  @param os_name: the OS to query about
699
  @param force_variant: whether to ignore variant errors
700
  @raise errors.OpPrereqError: if the node is not supporting the OS
701

702
  """
703
  result = lu.rpc.call_os_get(node, os_name)
704
  result.Raise("OS '%s' not in supported OS list for node %s" %
705
               (os_name, node),
706
               prereq=True, ecode=errors.ECODE_INVAL)
707
  if not force_variant:
708
    _CheckOSVariant(result.payload, os_name)
709

    
710

    
711
def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
712
  """Ensure that a node has the given secondary ip.
713

714
  @type lu: L{LogicalUnit}
715
  @param lu: the LU on behalf of which we make the check
716
  @type node: string
717
  @param node: the node to check
718
  @type secondary_ip: string
719
  @param secondary_ip: the ip to check
720
  @type prereq: boolean
721
  @param prereq: whether to throw a prerequisite or an execute error
722
  @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
723
  @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
724

725
  """
726
  result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
727
  result.Raise("Failure checking secondary ip on node %s" % node,
728
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
729
  if not result.payload:
730
    msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
731
           " please fix and re-run this command" % secondary_ip)
732
    if prereq:
733
      raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
734
    else:
735
      raise errors.OpExecError(msg)
736

    
737

    
738
def _GetClusterDomainSecret():
739
  """Reads the cluster domain secret.
740

741
  """
742
  return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
743
                               strict=True)
744

    
745

    
746
def _CheckInstanceDown(lu, instance, reason):
747
  """Ensure that an instance is not running."""
748
  if instance.admin_up:
749
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
750
                               (instance.name, reason), errors.ECODE_STATE)
751

    
752
  pnode = instance.primary_node
753
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
754
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
755
              prereq=True, ecode=errors.ECODE_ENVIRON)
756

    
757
  if instance.name in ins_l.payload:
758
    raise errors.OpPrereqError("Instance %s is running, %s" %
759
                               (instance.name, reason), errors.ECODE_STATE)
760

    
761

    
762
def _ExpandItemName(fn, name, kind):
763
  """Expand an item name.
764

765
  @param fn: the function to use for expansion
766
  @param name: requested item name
767
  @param kind: text description ('Node' or 'Instance')
768
  @return: the resolved (full) name
769
  @raise errors.OpPrereqError: if the item is not found
770

771
  """
772
  full_name = fn(name)
773
  if full_name is None:
774
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
775
                               errors.ECODE_NOENT)
776
  return full_name
777

    
778

    
779
def _ExpandNodeName(cfg, name):
780
  """Wrapper over L{_ExpandItemName} for nodes."""
781
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
782

    
783

    
784
def _ExpandInstanceName(cfg, name):
785
  """Wrapper over L{_ExpandItemName} for instance."""
786
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
787

    
788

    
789
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
790
                          memory, vcpus, nics, disk_template, disks,
791
                          bep, hvp, hypervisor_name):
792
  """Builds instance related env variables for hooks
793

794
  This builds the hook environment from individual variables.
795

796
  @type name: string
797
  @param name: the name of the instance
798
  @type primary_node: string
799
  @param primary_node: the name of the instance's primary node
800
  @type secondary_nodes: list
801
  @param secondary_nodes: list of secondary nodes as strings
802
  @type os_type: string
803
  @param os_type: the name of the instance's OS
804
  @type status: boolean
805
  @param status: the should_run status of the instance
806
  @type memory: string
807
  @param memory: the memory size of the instance
808
  @type vcpus: string
809
  @param vcpus: the count of VCPUs the instance has
810
  @type nics: list
811
  @param nics: list of tuples (ip, mac, mode, link) representing
812
      the NICs the instance has
813
  @type disk_template: string
814
  @param disk_template: the disk template of the instance
815
  @type disks: list
816
  @param disks: the list of (size, mode) pairs
817
  @type bep: dict
818
  @param bep: the backend parameters for the instance
819
  @type hvp: dict
820
  @param hvp: the hypervisor parameters for the instance
821
  @type hypervisor_name: string
822
  @param hypervisor_name: the hypervisor for the instance
823
  @rtype: dict
824
  @return: the hook environment for this instance
825

826
  """
827
  if status:
828
    str_status = "up"
829
  else:
830
    str_status = "down"
831
  env = {
832
    "OP_TARGET": name,
833
    "INSTANCE_NAME": name,
834
    "INSTANCE_PRIMARY": primary_node,
835
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
836
    "INSTANCE_OS_TYPE": os_type,
837
    "INSTANCE_STATUS": str_status,
838
    "INSTANCE_MEMORY": memory,
839
    "INSTANCE_VCPUS": vcpus,
840
    "INSTANCE_DISK_TEMPLATE": disk_template,
841
    "INSTANCE_HYPERVISOR": hypervisor_name,
842
  }
843

    
844
  if nics:
845
    nic_count = len(nics)
846
    for idx, (ip, mac, mode, link) in enumerate(nics):
847
      if ip is None:
848
        ip = ""
849
      env["INSTANCE_NIC%d_IP" % idx] = ip
850
      env["INSTANCE_NIC%d_MAC" % idx] = mac
851
      env["INSTANCE_NIC%d_MODE" % idx] = mode
852
      env["INSTANCE_NIC%d_LINK" % idx] = link
853
      if mode == constants.NIC_MODE_BRIDGED:
854
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
855
  else:
856
    nic_count = 0
857

    
858
  env["INSTANCE_NIC_COUNT"] = nic_count
859

    
860
  if disks:
861
    disk_count = len(disks)
862
    for idx, (size, mode) in enumerate(disks):
863
      env["INSTANCE_DISK%d_SIZE" % idx] = size
864
      env["INSTANCE_DISK%d_MODE" % idx] = mode
865
  else:
866
    disk_count = 0
867

    
868
  env["INSTANCE_DISK_COUNT"] = disk_count
869

    
870
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
871
    for key, value in source.items():
872
      env["INSTANCE_%s_%s" % (kind, key)] = value
873

    
874
  return env
875

    
876

    
877
def _NICListToTuple(lu, nics):
878
  """Build a list of nic information tuples.
879

880
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
881
  value in LUInstanceQueryData.
882

883
  @type lu:  L{LogicalUnit}
884
  @param lu: the logical unit on whose behalf we execute
885
  @type nics: list of L{objects.NIC}
886
  @param nics: list of nics to convert to hooks tuples
887

888
  """
889
  hooks_nics = []
890
  cluster = lu.cfg.GetClusterInfo()
891
  for nic in nics:
892
    ip = nic.ip
893
    mac = nic.mac
894
    filled_params = cluster.SimpleFillNIC(nic.nicparams)
895
    mode = filled_params[constants.NIC_MODE]
896
    link = filled_params[constants.NIC_LINK]
897
    hooks_nics.append((ip, mac, mode, link))
898
  return hooks_nics
899

    
900

    
901
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
902
  """Builds instance related env variables for hooks from an object.
903

904
  @type lu: L{LogicalUnit}
905
  @param lu: the logical unit on whose behalf we execute
906
  @type instance: L{objects.Instance}
907
  @param instance: the instance for which we should build the
908
      environment
909
  @type override: dict
910
  @param override: dictionary with key/values that will override
911
      our values
912
  @rtype: dict
913
  @return: the hook environment dictionary
914

915
  """
916
  cluster = lu.cfg.GetClusterInfo()
917
  bep = cluster.FillBE(instance)
918
  hvp = cluster.FillHV(instance)
919
  args = {
920
    'name': instance.name,
921
    'primary_node': instance.primary_node,
922
    'secondary_nodes': instance.secondary_nodes,
923
    'os_type': instance.os,
924
    'status': instance.admin_up,
925
    'memory': bep[constants.BE_MEMORY],
926
    'vcpus': bep[constants.BE_VCPUS],
927
    'nics': _NICListToTuple(lu, instance.nics),
928
    'disk_template': instance.disk_template,
929
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
930
    'bep': bep,
931
    'hvp': hvp,
932
    'hypervisor_name': instance.hypervisor,
933
  }
934
  if override:
935
    args.update(override)
936
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
937

    
938

    
939
def _AdjustCandidatePool(lu, exceptions):
940
  """Adjust the candidate pool after node operations.
941

942
  """
943
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
944
  if mod_list:
945
    lu.LogInfo("Promoted nodes to master candidate role: %s",
946
               utils.CommaJoin(node.name for node in mod_list))
947
    for name in mod_list:
948
      lu.context.ReaddNode(name)
949
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
950
  if mc_now > mc_max:
951
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
952
               (mc_now, mc_max))
953

    
954

    
955
def _DecideSelfPromotion(lu, exceptions=None):
956
  """Decide whether I should promote myself as a master candidate.
957

958
  """
959
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
960
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
961
  # the new node will increase mc_max with one, so:
962
  mc_should = min(mc_should + 1, cp_size)
963
  return mc_now < mc_should
964

    
965

    
966
def _CheckNicsBridgesExist(lu, target_nics, target_node):
967
  """Check that the brigdes needed by a list of nics exist.
968

969
  """
970
  cluster = lu.cfg.GetClusterInfo()
971
  paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
972
  brlist = [params[constants.NIC_LINK] for params in paramslist
973
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
974
  if brlist:
975
    result = lu.rpc.call_bridges_exist(target_node, brlist)
976
    result.Raise("Error checking bridges on destination node '%s'" %
977
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
978

    
979

    
980
def _CheckInstanceBridgesExist(lu, instance, node=None):
981
  """Check that the brigdes needed by an instance exist.
982

983
  """
984
  if node is None:
985
    node = instance.primary_node
986
  _CheckNicsBridgesExist(lu, instance.nics, node)
987

    
988

    
989
def _CheckOSVariant(os_obj, name):
990
  """Check whether an OS name conforms to the os variants specification.
991

992
  @type os_obj: L{objects.OS}
993
  @param os_obj: OS object to check
994
  @type name: string
995
  @param name: OS name passed by the user, to check for validity
996

997
  """
998
  if not os_obj.supported_variants:
999
    return
1000
  variant = objects.OS.GetVariant(name)
1001
  if not variant:
1002
    raise errors.OpPrereqError("OS name must include a variant",
1003
                               errors.ECODE_INVAL)
1004

    
1005
  if variant not in os_obj.supported_variants:
1006
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1007

    
1008

    
1009
def _GetNodeInstancesInner(cfg, fn):
1010
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1011

    
1012

    
1013
def _GetNodeInstances(cfg, node_name):
1014
  """Returns a list of all primary and secondary instances on a node.
1015

1016
  """
1017

    
1018
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1019

    
1020

    
1021
def _GetNodePrimaryInstances(cfg, node_name):
1022
  """Returns primary instances on a node.
1023

1024
  """
1025
  return _GetNodeInstancesInner(cfg,
1026
                                lambda inst: node_name == inst.primary_node)
1027

    
1028

    
1029
def _GetNodeSecondaryInstances(cfg, node_name):
1030
  """Returns secondary instances on a node.
1031

1032
  """
1033
  return _GetNodeInstancesInner(cfg,
1034
                                lambda inst: node_name in inst.secondary_nodes)
1035

    
1036

    
1037
def _GetStorageTypeArgs(cfg, storage_type):
1038
  """Returns the arguments for a storage type.
1039

1040
  """
1041
  # Special case for file storage
1042
  if storage_type == constants.ST_FILE:
1043
    # storage.FileStorage wants a list of storage directories
1044
    return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1045

    
1046
  return []
1047

    
1048

    
1049
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1050
  faulty = []
1051

    
1052
  for dev in instance.disks:
1053
    cfg.SetDiskID(dev, node_name)
1054

    
1055
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1056
  result.Raise("Failed to get disk status from node %s" % node_name,
1057
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
1058

    
1059
  for idx, bdev_status in enumerate(result.payload):
1060
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1061
      faulty.append(idx)
1062

    
1063
  return faulty
1064

    
1065

    
1066
def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1067
  """Check the sanity of iallocator and node arguments and use the
1068
  cluster-wide iallocator if appropriate.
1069

1070
  Check that at most one of (iallocator, node) is specified. If none is
1071
  specified, then the LU's opcode's iallocator slot is filled with the
1072
  cluster-wide default iallocator.
1073

1074
  @type iallocator_slot: string
1075
  @param iallocator_slot: the name of the opcode iallocator slot
1076
  @type node_slot: string
1077
  @param node_slot: the name of the opcode target node slot
1078

1079
  """
1080
  node = getattr(lu.op, node_slot, None)
1081
  iallocator = getattr(lu.op, iallocator_slot, None)
1082

    
1083
  if node is not None and iallocator is not None:
1084
    raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1085
                               errors.ECODE_INVAL)
1086
  elif node is None and iallocator is None:
1087
    default_iallocator = lu.cfg.GetDefaultIAllocator()
1088
    if default_iallocator:
1089
      setattr(lu.op, iallocator_slot, default_iallocator)
1090
    else:
1091
      raise errors.OpPrereqError("No iallocator or node given and no"
1092
                                 " cluster-wide default iallocator found."
1093
                                 " Please specify either an iallocator or a"
1094
                                 " node, or set a cluster-wide default"
1095
                                 " iallocator.")
1096

    
1097

    
1098
class LUClusterPostInit(LogicalUnit):
1099
  """Logical unit for running hooks after cluster initialization.
1100

1101
  """
1102
  HPATH = "cluster-init"
1103
  HTYPE = constants.HTYPE_CLUSTER
1104

    
1105
  def BuildHooksEnv(self):
1106
    """Build hooks env.
1107

1108
    """
1109
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1110
    mn = self.cfg.GetMasterNode()
1111
    return env, [], [mn]
1112

    
1113
  def Exec(self, feedback_fn):
1114
    """Nothing to do.
1115

1116
    """
1117
    return True
1118

    
1119

    
1120
class LUClusterDestroy(LogicalUnit):
1121
  """Logical unit for destroying the cluster.
1122

1123
  """
1124
  HPATH = "cluster-destroy"
1125
  HTYPE = constants.HTYPE_CLUSTER
1126

    
1127
  def BuildHooksEnv(self):
1128
    """Build hooks env.
1129

1130
    """
1131
    env = {"OP_TARGET": self.cfg.GetClusterName()}
1132
    return env, [], []
1133

    
1134
  def CheckPrereq(self):
1135
    """Check prerequisites.
1136

1137
    This checks whether the cluster is empty.
1138

1139
    Any errors are signaled by raising errors.OpPrereqError.
1140

1141
    """
1142
    master = self.cfg.GetMasterNode()
1143

    
1144
    nodelist = self.cfg.GetNodeList()
1145
    if len(nodelist) != 1 or nodelist[0] != master:
1146
      raise errors.OpPrereqError("There are still %d node(s) in"
1147
                                 " this cluster." % (len(nodelist) - 1),
1148
                                 errors.ECODE_INVAL)
1149
    instancelist = self.cfg.GetInstanceList()
1150
    if instancelist:
1151
      raise errors.OpPrereqError("There are still %d instance(s) in"
1152
                                 " this cluster." % len(instancelist),
1153
                                 errors.ECODE_INVAL)
1154

    
1155
  def Exec(self, feedback_fn):
1156
    """Destroys the cluster.
1157

1158
    """
1159
    master = self.cfg.GetMasterNode()
1160

    
1161
    # Run post hooks on master node before it's removed
1162
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1163
    try:
1164
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1165
    except:
1166
      # pylint: disable-msg=W0702
1167
      self.LogWarning("Errors occurred running hooks on %s" % master)
1168

    
1169
    result = self.rpc.call_node_stop_master(master, False)
1170
    result.Raise("Could not disable the master role")
1171

    
1172
    return master
1173

    
1174

    
1175
def _VerifyCertificate(filename):
1176
  """Verifies a certificate for LUClusterVerify.
1177

1178
  @type filename: string
1179
  @param filename: Path to PEM file
1180

1181
  """
1182
  try:
1183
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1184
                                           utils.ReadFile(filename))
1185
  except Exception, err: # pylint: disable-msg=W0703
1186
    return (LUClusterVerify.ETYPE_ERROR,
1187
            "Failed to load X509 certificate %s: %s" % (filename, err))
1188

    
1189
  (errcode, msg) = \
1190
    utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1191
                                constants.SSL_CERT_EXPIRATION_ERROR)
1192

    
1193
  if msg:
1194
    fnamemsg = "While verifying %s: %s" % (filename, msg)
1195
  else:
1196
    fnamemsg = None
1197

    
1198
  if errcode is None:
1199
    return (None, fnamemsg)
1200
  elif errcode == utils.CERT_WARNING:
1201
    return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1202
  elif errcode == utils.CERT_ERROR:
1203
    return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1204

    
1205
  raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1206

    
1207

    
1208
class LUClusterVerify(LogicalUnit):
1209
  """Verifies the cluster status.
1210

1211
  """
1212
  HPATH = "cluster-verify"
1213
  HTYPE = constants.HTYPE_CLUSTER
1214
  REQ_BGL = False
1215

    
1216
  TCLUSTER = "cluster"
1217
  TNODE = "node"
1218
  TINSTANCE = "instance"
1219

    
1220
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1221
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1222
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1223
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1224
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1225
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1226
  EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1227
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1228
  EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1229
  ENODEDRBD = (TNODE, "ENODEDRBD")
1230
  ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1231
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1232
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1233
  ENODEHV = (TNODE, "ENODEHV")
1234
  ENODELVM = (TNODE, "ENODELVM")
1235
  ENODEN1 = (TNODE, "ENODEN1")
1236
  ENODENET = (TNODE, "ENODENET")
1237
  ENODEOS = (TNODE, "ENODEOS")
1238
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1239
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1240
  ENODERPC = (TNODE, "ENODERPC")
1241
  ENODESSH = (TNODE, "ENODESSH")
1242
  ENODEVERSION = (TNODE, "ENODEVERSION")
1243
  ENODESETUP = (TNODE, "ENODESETUP")
1244
  ENODETIME = (TNODE, "ENODETIME")
1245
  ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1246

    
1247
  ETYPE_FIELD = "code"
1248
  ETYPE_ERROR = "ERROR"
1249
  ETYPE_WARNING = "WARNING"
1250

    
1251
  _HOOKS_INDENT_RE = re.compile("^", re.M)
1252

    
1253
  class NodeImage(object):
1254
    """A class representing the logical and physical status of a node.
1255

1256
    @type name: string
1257
    @ivar name: the node name to which this object refers
1258
    @ivar volumes: a structure as returned from
1259
        L{ganeti.backend.GetVolumeList} (runtime)
1260
    @ivar instances: a list of running instances (runtime)
1261
    @ivar pinst: list of configured primary instances (config)
1262
    @ivar sinst: list of configured secondary instances (config)
1263
    @ivar sbp: dictionary of {primary-node: list of instances} for all
1264
        instances for which this node is secondary (config)
1265
    @ivar mfree: free memory, as reported by hypervisor (runtime)
1266
    @ivar dfree: free disk, as reported by the node (runtime)
1267
    @ivar offline: the offline status (config)
1268
    @type rpc_fail: boolean
1269
    @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1270
        not whether the individual keys were correct) (runtime)
1271
    @type lvm_fail: boolean
1272
    @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1273
    @type hyp_fail: boolean
1274
    @ivar hyp_fail: whether the RPC call didn't return the instance list
1275
    @type ghost: boolean
1276
    @ivar ghost: whether this is a known node or not (config)
1277
    @type os_fail: boolean
1278
    @ivar os_fail: whether the RPC call didn't return valid OS data
1279
    @type oslist: list
1280
    @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1281
    @type vm_capable: boolean
1282
    @ivar vm_capable: whether the node can host instances
1283

1284
    """
1285
    def __init__(self, offline=False, name=None, vm_capable=True):
1286
      self.name = name
1287
      self.volumes = {}
1288
      self.instances = []
1289
      self.pinst = []
1290
      self.sinst = []
1291
      self.sbp = {}
1292
      self.mfree = 0
1293
      self.dfree = 0
1294
      self.offline = offline
1295
      self.vm_capable = vm_capable
1296
      self.rpc_fail = False
1297
      self.lvm_fail = False
1298
      self.hyp_fail = False
1299
      self.ghost = False
1300
      self.os_fail = False
1301
      self.oslist = {}
1302

    
1303
  def ExpandNames(self):
1304
    self.needed_locks = {
1305
      locking.LEVEL_NODE: locking.ALL_SET,
1306
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1307
    }
1308
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1309

    
1310
  def _Error(self, ecode, item, msg, *args, **kwargs):
1311
    """Format an error message.
1312

1313
    Based on the opcode's error_codes parameter, either format a
1314
    parseable error code, or a simpler error string.
1315

1316
    This must be called only from Exec and functions called from Exec.
1317

1318
    """
1319
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1320
    itype, etxt = ecode
1321
    # first complete the msg
1322
    if args:
1323
      msg = msg % args
1324
    # then format the whole message
1325
    if self.op.error_codes:
1326
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1327
    else:
1328
      if item:
1329
        item = " " + item
1330
      else:
1331
        item = ""
1332
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1333
    # and finally report it via the feedback_fn
1334
    self._feedback_fn("  - %s" % msg)
1335

    
1336
  def _ErrorIf(self, cond, *args, **kwargs):
1337
    """Log an error message if the passed condition is True.
1338

1339
    """
1340
    cond = bool(cond) or self.op.debug_simulate_errors
1341
    if cond:
1342
      self._Error(*args, **kwargs)
1343
    # do not mark the operation as failed for WARN cases only
1344
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1345
      self.bad = self.bad or cond
1346

    
1347
  def _VerifyNode(self, ninfo, nresult):
1348
    """Perform some basic validation on data returned from a node.
1349

1350
      - check the result data structure is well formed and has all the
1351
        mandatory fields
1352
      - check ganeti version
1353

1354
    @type ninfo: L{objects.Node}
1355
    @param ninfo: the node to check
1356
    @param nresult: the results from the node
1357
    @rtype: boolean
1358
    @return: whether overall this call was successful (and we can expect
1359
         reasonable values in the respose)
1360

1361
    """
1362
    node = ninfo.name
1363
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1364

    
1365
    # main result, nresult should be a non-empty dict
1366
    test = not nresult or not isinstance(nresult, dict)
1367
    _ErrorIf(test, self.ENODERPC, node,
1368
                  "unable to verify node: no data returned")
1369
    if test:
1370
      return False
1371

    
1372
    # compares ganeti version
1373
    local_version = constants.PROTOCOL_VERSION
1374
    remote_version = nresult.get("version", None)
1375
    test = not (remote_version and
1376
                isinstance(remote_version, (list, tuple)) and
1377
                len(remote_version) == 2)
1378
    _ErrorIf(test, self.ENODERPC, node,
1379
             "connection to node returned invalid data")
1380
    if test:
1381
      return False
1382

    
1383
    test = local_version != remote_version[0]
1384
    _ErrorIf(test, self.ENODEVERSION, node,
1385
             "incompatible protocol versions: master %s,"
1386
             " node %s", local_version, remote_version[0])
1387
    if test:
1388
      return False
1389

    
1390
    # node seems compatible, we can actually try to look into its results
1391

    
1392
    # full package version
1393
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1394
                  self.ENODEVERSION, node,
1395
                  "software version mismatch: master %s, node %s",
1396
                  constants.RELEASE_VERSION, remote_version[1],
1397
                  code=self.ETYPE_WARNING)
1398

    
1399
    hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1400
    if ninfo.vm_capable and isinstance(hyp_result, dict):
1401
      for hv_name, hv_result in hyp_result.iteritems():
1402
        test = hv_result is not None
1403
        _ErrorIf(test, self.ENODEHV, node,
1404
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1405

    
1406
    hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1407
    if ninfo.vm_capable and isinstance(hvp_result, list):
1408
      for item, hv_name, hv_result in hvp_result:
1409
        _ErrorIf(True, self.ENODEHV, node,
1410
                 "hypervisor %s parameter verify failure (source %s): %s",
1411
                 hv_name, item, hv_result)
1412

    
1413
    test = nresult.get(constants.NV_NODESETUP,
1414
                           ["Missing NODESETUP results"])
1415
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1416
             "; ".join(test))
1417

    
1418
    return True
1419

    
1420
  def _VerifyNodeTime(self, ninfo, nresult,
1421
                      nvinfo_starttime, nvinfo_endtime):
1422
    """Check the node time.
1423

1424
    @type ninfo: L{objects.Node}
1425
    @param ninfo: the node to check
1426
    @param nresult: the remote results for the node
1427
    @param nvinfo_starttime: the start time of the RPC call
1428
    @param nvinfo_endtime: the end time of the RPC call
1429

1430
    """
1431
    node = ninfo.name
1432
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1433

    
1434
    ntime = nresult.get(constants.NV_TIME, None)
1435
    try:
1436
      ntime_merged = utils.MergeTime(ntime)
1437
    except (ValueError, TypeError):
1438
      _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1439
      return
1440

    
1441
    if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1442
      ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1443
    elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1444
      ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1445
    else:
1446
      ntime_diff = None
1447

    
1448
    _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1449
             "Node time diverges by at least %s from master node time",
1450
             ntime_diff)
1451

    
1452
  def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1453
    """Check the node time.
1454

1455
    @type ninfo: L{objects.Node}
1456
    @param ninfo: the node to check
1457
    @param nresult: the remote results for the node
1458
    @param vg_name: the configured VG name
1459

1460
    """
1461
    if vg_name is None:
1462
      return
1463

    
1464
    node = ninfo.name
1465
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1466

    
1467
    # checks vg existence and size > 20G
1468
    vglist = nresult.get(constants.NV_VGLIST, None)
1469
    test = not vglist
1470
    _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1471
    if not test:
1472
      vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1473
                                            constants.MIN_VG_SIZE)
1474
      _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1475

    
1476
    # check pv names
1477
    pvlist = nresult.get(constants.NV_PVLIST, None)
1478
    test = pvlist is None
1479
    _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1480
    if not test:
1481
      # check that ':' is not present in PV names, since it's a
1482
      # special character for lvcreate (denotes the range of PEs to
1483
      # use on the PV)
1484
      for _, pvname, owner_vg in pvlist:
1485
        test = ":" in pvname
1486
        _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1487
                 " '%s' of VG '%s'", pvname, owner_vg)
1488

    
1489
  def _VerifyNodeNetwork(self, ninfo, nresult):
1490
    """Check the node time.
1491

1492
    @type ninfo: L{objects.Node}
1493
    @param ninfo: the node to check
1494
    @param nresult: the remote results for the node
1495

1496
    """
1497
    node = ninfo.name
1498
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1499

    
1500
    test = constants.NV_NODELIST not in nresult
1501
    _ErrorIf(test, self.ENODESSH, node,
1502
             "node hasn't returned node ssh connectivity data")
1503
    if not test:
1504
      if nresult[constants.NV_NODELIST]:
1505
        for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1506
          _ErrorIf(True, self.ENODESSH, node,
1507
                   "ssh communication with node '%s': %s", a_node, a_msg)
1508

    
1509
    test = constants.NV_NODENETTEST not in nresult
1510
    _ErrorIf(test, self.ENODENET, node,
1511
             "node hasn't returned node tcp connectivity data")
1512
    if not test:
1513
      if nresult[constants.NV_NODENETTEST]:
1514
        nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1515
        for anode in nlist:
1516
          _ErrorIf(True, self.ENODENET, node,
1517
                   "tcp communication with node '%s': %s",
1518
                   anode, nresult[constants.NV_NODENETTEST][anode])
1519

    
1520
    test = constants.NV_MASTERIP not in nresult
1521
    _ErrorIf(test, self.ENODENET, node,
1522
             "node hasn't returned node master IP reachability data")
1523
    if not test:
1524
      if not nresult[constants.NV_MASTERIP]:
1525
        if node == self.master_node:
1526
          msg = "the master node cannot reach the master IP (not configured?)"
1527
        else:
1528
          msg = "cannot reach the master IP"
1529
        _ErrorIf(True, self.ENODENET, node, msg)
1530

    
1531
  def _VerifyInstance(self, instance, instanceconfig, node_image,
1532
                      diskstatus):
1533
    """Verify an instance.
1534

1535
    This function checks to see if the required block devices are
1536
    available on the instance's node.
1537

1538
    """
1539
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1540
    node_current = instanceconfig.primary_node
1541

    
1542
    node_vol_should = {}
1543
    instanceconfig.MapLVsByNode(node_vol_should)
1544

    
1545
    for node in node_vol_should:
1546
      n_img = node_image[node]
1547
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1548
        # ignore missing volumes on offline or broken nodes
1549
        continue
1550
      for volume in node_vol_should[node]:
1551
        test = volume not in n_img.volumes
1552
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1553
                 "volume %s missing on node %s", volume, node)
1554

    
1555
    if instanceconfig.admin_up:
1556
      pri_img = node_image[node_current]
1557
      test = instance not in pri_img.instances and not pri_img.offline
1558
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1559
               "instance not running on its primary node %s",
1560
               node_current)
1561

    
1562
    for node, n_img in node_image.items():
1563
      if node != node_current:
1564
        test = instance in n_img.instances
1565
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1566
                 "instance should not run on node %s", node)
1567

    
1568
    diskdata = [(nname, success, status, idx)
1569
                for (nname, disks) in diskstatus.items()
1570
                for idx, (success, status) in enumerate(disks)]
1571

    
1572
    for nname, success, bdev_status, idx in diskdata:
1573
      # the 'ghost node' construction in Exec() ensures that we have a
1574
      # node here
1575
      snode = node_image[nname]
1576
      bad_snode = snode.ghost or snode.offline
1577
      _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1578
               self.EINSTANCEFAULTYDISK, instance,
1579
               "couldn't retrieve status for disk/%s on %s: %s",
1580
               idx, nname, bdev_status)
1581
      _ErrorIf((instanceconfig.admin_up and success and
1582
                bdev_status.ldisk_status == constants.LDS_FAULTY),
1583
               self.EINSTANCEFAULTYDISK, instance,
1584
               "disk/%s on %s is faulty", idx, nname)
1585

    
1586
  def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1587
    """Verify if there are any unknown volumes in the cluster.
1588

1589
    The .os, .swap and backup volumes are ignored. All other volumes are
1590
    reported as unknown.
1591

1592
    @type reserved: L{ganeti.utils.FieldSet}
1593
    @param reserved: a FieldSet of reserved volume names
1594

1595
    """
1596
    for node, n_img in node_image.items():
1597
      if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1598
        # skip non-healthy nodes
1599
        continue
1600
      for volume in n_img.volumes:
1601
        test = ((node not in node_vol_should or
1602
                volume not in node_vol_should[node]) and
1603
                not reserved.Matches(volume))
1604
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1605
                      "volume %s is unknown", volume)
1606

    
1607
  def _VerifyOrphanInstances(self, instancelist, node_image):
1608
    """Verify the list of running instances.
1609

1610
    This checks what instances are running but unknown to the cluster.
1611

1612
    """
1613
    for node, n_img in node_image.items():
1614
      for o_inst in n_img.instances:
1615
        test = o_inst not in instancelist
1616
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1617
                      "instance %s on node %s should not exist", o_inst, node)
1618

    
1619
  def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1620
    """Verify N+1 Memory Resilience.
1621

1622
    Check that if one single node dies we can still start all the
1623
    instances it was primary for.
1624

1625
    """
1626
    cluster_info = self.cfg.GetClusterInfo()
1627
    for node, n_img in node_image.items():
1628
      # This code checks that every node which is now listed as
1629
      # secondary has enough memory to host all instances it is
1630
      # supposed to should a single other node in the cluster fail.
1631
      # FIXME: not ready for failover to an arbitrary node
1632
      # FIXME: does not support file-backed instances
1633
      # WARNING: we currently take into account down instances as well
1634
      # as up ones, considering that even if they're down someone
1635
      # might want to start them even in the event of a node failure.
1636
      if n_img.offline:
1637
        # we're skipping offline nodes from the N+1 warning, since
1638
        # most likely we don't have good memory infromation from them;
1639
        # we already list instances living on such nodes, and that's
1640
        # enough warning
1641
        continue
1642
      for prinode, instances in n_img.sbp.items():
1643
        needed_mem = 0
1644
        for instance in instances:
1645
          bep = cluster_info.FillBE(instance_cfg[instance])
1646
          if bep[constants.BE_AUTO_BALANCE]:
1647
            needed_mem += bep[constants.BE_MEMORY]
1648
        test = n_img.mfree < needed_mem
1649
        self._ErrorIf(test, self.ENODEN1, node,
1650
                      "not enough memory to accomodate instance failovers"
1651
                      " should node %s fail", prinode)
1652

    
1653
  def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1654
                       master_files):
1655
    """Verifies and computes the node required file checksums.
1656

1657
    @type ninfo: L{objects.Node}
1658
    @param ninfo: the node to check
1659
    @param nresult: the remote results for the node
1660
    @param file_list: required list of files
1661
    @param local_cksum: dictionary of local files and their checksums
1662
    @param master_files: list of files that only masters should have
1663

1664
    """
1665
    node = ninfo.name
1666
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1667

    
1668
    remote_cksum = nresult.get(constants.NV_FILELIST, None)
1669
    test = not isinstance(remote_cksum, dict)
1670
    _ErrorIf(test, self.ENODEFILECHECK, node,
1671
             "node hasn't returned file checksum data")
1672
    if test:
1673
      return
1674

    
1675
    for file_name in file_list:
1676
      node_is_mc = ninfo.master_candidate
1677
      must_have = (file_name not in master_files) or node_is_mc
1678
      # missing
1679
      test1 = file_name not in remote_cksum
1680
      # invalid checksum
1681
      test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1682
      # existing and good
1683
      test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1684
      _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1685
               "file '%s' missing", file_name)
1686
      _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1687
               "file '%s' has wrong checksum", file_name)
1688
      # not candidate and this is not a must-have file
1689
      _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1690
               "file '%s' should not exist on non master"
1691
               " candidates (and the file is outdated)", file_name)
1692
      # all good, except non-master/non-must have combination
1693
      _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1694
               "file '%s' should not exist"
1695
               " on non master candidates", file_name)
1696

    
1697
  def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1698
                      drbd_map):
1699
    """Verifies and the node DRBD status.
1700

1701
    @type ninfo: L{objects.Node}
1702
    @param ninfo: the node to check
1703
    @param nresult: the remote results for the node
1704
    @param instanceinfo: the dict of instances
1705
    @param drbd_helper: the configured DRBD usermode helper
1706
    @param drbd_map: the DRBD map as returned by
1707
        L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1708

1709
    """
1710
    node = ninfo.name
1711
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1712

    
1713
    if drbd_helper:
1714
      helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1715
      test = (helper_result == None)
1716
      _ErrorIf(test, self.ENODEDRBDHELPER, node,
1717
               "no drbd usermode helper returned")
1718
      if helper_result:
1719
        status, payload = helper_result
1720
        test = not status
1721
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1722
                 "drbd usermode helper check unsuccessful: %s", payload)
1723
        test = status and (payload != drbd_helper)
1724
        _ErrorIf(test, self.ENODEDRBDHELPER, node,
1725
                 "wrong drbd usermode helper: %s", payload)
1726

    
1727
    # compute the DRBD minors
1728
    node_drbd = {}
1729
    for minor, instance in drbd_map[node].items():
1730
      test = instance not in instanceinfo
1731
      _ErrorIf(test, self.ECLUSTERCFG, None,
1732
               "ghost instance '%s' in temporary DRBD map", instance)
1733
        # ghost instance should not be running, but otherwise we
1734
        # don't give double warnings (both ghost instance and
1735
        # unallocated minor in use)
1736
      if test:
1737
        node_drbd[minor] = (instance, False)
1738
      else:
1739
        instance = instanceinfo[instance]
1740
        node_drbd[minor] = (instance.name, instance.admin_up)
1741

    
1742
    # and now check them
1743
    used_minors = nresult.get(constants.NV_DRBDLIST, [])
1744
    test = not isinstance(used_minors, (tuple, list))
1745
    _ErrorIf(test, self.ENODEDRBD, node,
1746
             "cannot parse drbd status file: %s", str(used_minors))
1747
    if test:
1748
      # we cannot check drbd status
1749
      return
1750

    
1751
    for minor, (iname, must_exist) in node_drbd.items():
1752
      test = minor not in used_minors and must_exist
1753
      _ErrorIf(test, self.ENODEDRBD, node,
1754
               "drbd minor %d of instance %s is not active", minor, iname)
1755
    for minor in used_minors:
1756
      test = minor not in node_drbd
1757
      _ErrorIf(test, self.ENODEDRBD, node,
1758
               "unallocated drbd minor %d is in use", minor)
1759

    
1760
  def _UpdateNodeOS(self, ninfo, nresult, nimg):
1761
    """Builds the node OS structures.
1762

1763
    @type ninfo: L{objects.Node}
1764
    @param ninfo: the node to check
1765
    @param nresult: the remote results for the node
1766
    @param nimg: the node image object
1767

1768
    """
1769
    node = ninfo.name
1770
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1771

    
1772
    remote_os = nresult.get(constants.NV_OSLIST, None)
1773
    test = (not isinstance(remote_os, list) or
1774
            not compat.all(isinstance(v, list) and len(v) == 7
1775
                           for v in remote_os))
1776

    
1777
    _ErrorIf(test, self.ENODEOS, node,
1778
             "node hasn't returned valid OS data")
1779

    
1780
    nimg.os_fail = test
1781

    
1782
    if test:
1783
      return
1784

    
1785
    os_dict = {}
1786

    
1787
    for (name, os_path, status, diagnose,
1788
         variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1789

    
1790
      if name not in os_dict:
1791
        os_dict[name] = []
1792

    
1793
      # parameters is a list of lists instead of list of tuples due to
1794
      # JSON lacking a real tuple type, fix it:
1795
      parameters = [tuple(v) for v in parameters]
1796
      os_dict[name].append((os_path, status, diagnose,
1797
                            set(variants), set(parameters), set(api_ver)))
1798

    
1799
    nimg.oslist = os_dict
1800

    
1801
  def _VerifyNodeOS(self, ninfo, nimg, base):
1802
    """Verifies the node OS list.
1803

1804
    @type ninfo: L{objects.Node}
1805
    @param ninfo: the node to check
1806
    @param nimg: the node image object
1807
    @param base: the 'template' node we match against (e.g. from the master)
1808

1809
    """
1810
    node = ninfo.name
1811
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1812

    
1813
    assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1814

    
1815
    for os_name, os_data in nimg.oslist.items():
1816
      assert os_data, "Empty OS status for OS %s?!" % os_name
1817
      f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1818
      _ErrorIf(not f_status, self.ENODEOS, node,
1819
               "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1820
      _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1821
               "OS '%s' has multiple entries (first one shadows the rest): %s",
1822
               os_name, utils.CommaJoin([v[0] for v in os_data]))
1823
      # this will catched in backend too
1824
      _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1825
               and not f_var, self.ENODEOS, node,
1826
               "OS %s with API at least %d does not declare any variant",
1827
               os_name, constants.OS_API_V15)
1828
      # comparisons with the 'base' image
1829
      test = os_name not in base.oslist
1830
      _ErrorIf(test, self.ENODEOS, node,
1831
               "Extra OS %s not present on reference node (%s)",
1832
               os_name, base.name)
1833
      if test:
1834
        continue
1835
      assert base.oslist[os_name], "Base node has empty OS status?"
1836
      _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1837
      if not b_status:
1838
        # base OS is invalid, skipping
1839
        continue
1840
      for kind, a, b in [("API version", f_api, b_api),
1841
                         ("variants list", f_var, b_var),
1842
                         ("parameters", f_param, b_param)]:
1843
        _ErrorIf(a != b, self.ENODEOS, node,
1844
                 "OS %s %s differs from reference node %s: %s vs. %s",
1845
                 kind, os_name, base.name,
1846
                 utils.CommaJoin(a), utils.CommaJoin(b))
1847

    
1848
    # check any missing OSes
1849
    missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1850
    _ErrorIf(missing, self.ENODEOS, node,
1851
             "OSes present on reference node %s but missing on this node: %s",
1852
             base.name, utils.CommaJoin(missing))
1853

    
1854
  def _VerifyOob(self, ninfo, nresult):
1855
    """Verifies out of band functionality of a node.
1856

1857
    @type ninfo: L{objects.Node}
1858
    @param ninfo: the node to check
1859
    @param nresult: the remote results for the node
1860

1861
    """
1862
    node = ninfo.name
1863
    # We just have to verify the paths on master and/or master candidates
1864
    # as the oob helper is invoked on the master
1865
    if ((ninfo.master_candidate or ninfo.master_capable) and
1866
        constants.NV_OOB_PATHS in nresult):
1867
      for path_result in nresult[constants.NV_OOB_PATHS]:
1868
        self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1869

    
1870
  def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1871
    """Verifies and updates the node volume data.
1872

1873
    This function will update a L{NodeImage}'s internal structures
1874
    with data from the remote call.
1875

1876
    @type ninfo: L{objects.Node}
1877
    @param ninfo: the node to check
1878
    @param nresult: the remote results for the node
1879
    @param nimg: the node image object
1880
    @param vg_name: the configured VG name
1881

1882
    """
1883
    node = ninfo.name
1884
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1885

    
1886
    nimg.lvm_fail = True
1887
    lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1888
    if vg_name is None:
1889
      pass
1890
    elif isinstance(lvdata, basestring):
1891
      _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1892
               utils.SafeEncode(lvdata))
1893
    elif not isinstance(lvdata, dict):
1894
      _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1895
    else:
1896
      nimg.volumes = lvdata
1897
      nimg.lvm_fail = False
1898

    
1899
  def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1900
    """Verifies and updates the node instance list.
1901

1902
    If the listing was successful, then updates this node's instance
1903
    list. Otherwise, it marks the RPC call as failed for the instance
1904
    list key.
1905

1906
    @type ninfo: L{objects.Node}
1907
    @param ninfo: the node to check
1908
    @param nresult: the remote results for the node
1909
    @param nimg: the node image object
1910

1911
    """
1912
    idata = nresult.get(constants.NV_INSTANCELIST, None)
1913
    test = not isinstance(idata, list)
1914
    self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1915
                  " (instancelist): %s", utils.SafeEncode(str(idata)))
1916
    if test:
1917
      nimg.hyp_fail = True
1918
    else:
1919
      nimg.instances = idata
1920

    
1921
  def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1922
    """Verifies and computes a node information map
1923

1924
    @type ninfo: L{objects.Node}
1925
    @param ninfo: the node to check
1926
    @param nresult: the remote results for the node
1927
    @param nimg: the node image object
1928
    @param vg_name: the configured VG name
1929

1930
    """
1931
    node = ninfo.name
1932
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1933

    
1934
    # try to read free memory (from the hypervisor)
1935
    hv_info = nresult.get(constants.NV_HVINFO, None)
1936
    test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1937
    _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1938
    if not test:
1939
      try:
1940
        nimg.mfree = int(hv_info["memory_free"])
1941
      except (ValueError, TypeError):
1942
        _ErrorIf(True, self.ENODERPC, node,
1943
                 "node returned invalid nodeinfo, check hypervisor")
1944

    
1945
    # FIXME: devise a free space model for file based instances as well
1946
    if vg_name is not None:
1947
      test = (constants.NV_VGLIST not in nresult or
1948
              vg_name not in nresult[constants.NV_VGLIST])
1949
      _ErrorIf(test, self.ENODELVM, node,
1950
               "node didn't return data for the volume group '%s'"
1951
               " - it is either missing or broken", vg_name)
1952
      if not test:
1953
        try:
1954
          nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1955
        except (ValueError, TypeError):
1956
          _ErrorIf(True, self.ENODERPC, node,
1957
                   "node returned invalid LVM info, check LVM status")
1958

    
1959
  def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1960
    """Gets per-disk status information for all instances.
1961

1962
    @type nodelist: list of strings
1963
    @param nodelist: Node names
1964
    @type node_image: dict of (name, L{objects.Node})
1965
    @param node_image: Node objects
1966
    @type instanceinfo: dict of (name, L{objects.Instance})
1967
    @param instanceinfo: Instance objects
1968
    @rtype: {instance: {node: [(succes, payload)]}}
1969
    @return: a dictionary of per-instance dictionaries with nodes as
1970
        keys and disk information as values; the disk information is a
1971
        list of tuples (success, payload)
1972

1973
    """
1974
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1975

    
1976
    node_disks = {}
1977
    node_disks_devonly = {}
1978
    diskless_instances = set()
1979
    diskless = constants.DT_DISKLESS
1980

    
1981
    for nname in nodelist:
1982
      node_instances = list(itertools.chain(node_image[nname].pinst,
1983
                                            node_image[nname].sinst))
1984
      diskless_instances.update(inst for inst in node_instances
1985
                                if instanceinfo[inst].disk_template == diskless)
1986
      disks = [(inst, disk)
1987
               for inst in node_instances
1988
               for disk in instanceinfo[inst].disks]
1989

    
1990
      if not disks:
1991
        # No need to collect data
1992
        continue
1993

    
1994
      node_disks[nname] = disks
1995

    
1996
      # Creating copies as SetDiskID below will modify the objects and that can
1997
      # lead to incorrect data returned from nodes
1998
      devonly = [dev.Copy() for (_, dev) in disks]
1999

    
2000
      for dev in devonly:
2001
        self.cfg.SetDiskID(dev, nname)
2002

    
2003
      node_disks_devonly[nname] = devonly
2004

    
2005
    assert len(node_disks) == len(node_disks_devonly)
2006

    
2007
    # Collect data from all nodes with disks
2008
    result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2009
                                                          node_disks_devonly)
2010

    
2011
    assert len(result) == len(node_disks)
2012

    
2013
    instdisk = {}
2014

    
2015
    for (nname, nres) in result.items():
2016
      disks = node_disks[nname]
2017

    
2018
      if nres.offline:
2019
        # No data from this node
2020
        data = len(disks) * [(False, "node offline")]
2021
      else:
2022
        msg = nres.fail_msg
2023
        _ErrorIf(msg, self.ENODERPC, nname,
2024
                 "while getting disk information: %s", msg)
2025
        if msg:
2026
          # No data from this node
2027
          data = len(disks) * [(False, msg)]
2028
        else:
2029
          data = []
2030
          for idx, i in enumerate(nres.payload):
2031
            if isinstance(i, (tuple, list)) and len(i) == 2:
2032
              data.append(i)
2033
            else:
2034
              logging.warning("Invalid result from node %s, entry %d: %s",
2035
                              nname, idx, i)
2036
              data.append((False, "Invalid result from the remote node"))
2037

    
2038
      for ((inst, _), status) in zip(disks, data):
2039
        instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2040

    
2041
    # Add empty entries for diskless instances.
2042
    for inst in diskless_instances:
2043
      assert inst not in instdisk
2044
      instdisk[inst] = {}
2045

    
2046
    assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2047
                      len(nnames) <= len(instanceinfo[inst].all_nodes) and
2048
                      compat.all(isinstance(s, (tuple, list)) and
2049
                                 len(s) == 2 for s in statuses)
2050
                      for inst, nnames in instdisk.items()
2051
                      for nname, statuses in nnames.items())
2052
    assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2053

    
2054
    return instdisk
2055

    
2056
  def _VerifyHVP(self, hvp_data):
2057
    """Verifies locally the syntax of the hypervisor parameters.
2058

2059
    """
2060
    for item, hv_name, hv_params in hvp_data:
2061
      msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2062
             (item, hv_name))
2063
      try:
2064
        hv_class = hypervisor.GetHypervisor(hv_name)
2065
        utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2066
        hv_class.CheckParameterSyntax(hv_params)
2067
      except errors.GenericError, err:
2068
        self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2069

    
2070

    
2071
  def BuildHooksEnv(self):
2072
    """Build hooks env.
2073

2074
    Cluster-Verify hooks just ran in the post phase and their failure makes
2075
    the output be logged in the verify output and the verification to fail.
2076

2077
    """
2078
    all_nodes = self.cfg.GetNodeList()
2079
    env = {
2080
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2081
      }
2082
    for node in self.cfg.GetAllNodesInfo().values():
2083
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2084

    
2085
    return env, [], all_nodes
2086

    
2087
  def Exec(self, feedback_fn):
2088
    """Verify integrity of cluster, performing various test on nodes.
2089

2090
    """
2091
    # This method has too many local variables. pylint: disable-msg=R0914
2092
    self.bad = False
2093
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2094
    verbose = self.op.verbose
2095
    self._feedback_fn = feedback_fn
2096
    feedback_fn("* Verifying global settings")
2097
    for msg in self.cfg.VerifyConfig():
2098
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2099

    
2100
    # Check the cluster certificates
2101
    for cert_filename in constants.ALL_CERT_FILES:
2102
      (errcode, msg) = _VerifyCertificate(cert_filename)
2103
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2104

    
2105
    vg_name = self.cfg.GetVGName()
2106
    drbd_helper = self.cfg.GetDRBDHelper()
2107
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2108
    cluster = self.cfg.GetClusterInfo()
2109
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
2110
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2111
    nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2112
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2113
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2114
                        for iname in instancelist)
2115
    groupinfo = self.cfg.GetAllNodeGroupsInfo()
2116
    i_non_redundant = [] # Non redundant instances
2117
    i_non_a_balanced = [] # Non auto-balanced instances
2118
    n_offline = 0 # Count of offline nodes
2119
    n_drained = 0 # Count of nodes being drained
2120
    node_vol_should = {}
2121

    
2122
    # FIXME: verify OS list
2123
    # do local checksums
2124
    master_files = [constants.CLUSTER_CONF_FILE]
2125
    master_node = self.master_node = self.cfg.GetMasterNode()
2126
    master_ip = self.cfg.GetMasterIP()
2127

    
2128
    file_names = ssconf.SimpleStore().GetFileList()
2129
    file_names.extend(constants.ALL_CERT_FILES)
2130
    file_names.extend(master_files)
2131
    if cluster.modify_etc_hosts:
2132
      file_names.append(constants.ETC_HOSTS)
2133

    
2134
    local_checksums = utils.FingerprintFiles(file_names)
2135

    
2136
    # Compute the set of hypervisor parameters
2137
    hvp_data = []
2138
    for hv_name in hypervisors:
2139
      hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2140
    for os_name, os_hvp in cluster.os_hvp.items():
2141
      for hv_name, hv_params in os_hvp.items():
2142
        if not hv_params:
2143
          continue
2144
        full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2145
        hvp_data.append(("os %s" % os_name, hv_name, full_params))
2146
    # TODO: collapse identical parameter values in a single one
2147
    for instance in instanceinfo.values():
2148
      if not instance.hvparams:
2149
        continue
2150
      hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2151
                       cluster.FillHV(instance)))
2152
    # and verify them locally
2153
    self._VerifyHVP(hvp_data)
2154

    
2155
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2156
    node_verify_param = {
2157
      constants.NV_FILELIST: file_names,
2158
      constants.NV_NODELIST: [node.name for node in nodeinfo
2159
                              if not node.offline],
2160
      constants.NV_HYPERVISOR: hypervisors,
2161
      constants.NV_HVPARAMS: hvp_data,
2162
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2163
                                  node.secondary_ip) for node in nodeinfo
2164
                                 if not node.offline],
2165
      constants.NV_INSTANCELIST: hypervisors,
2166
      constants.NV_VERSION: None,
2167
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2168
      constants.NV_NODESETUP: None,
2169
      constants.NV_TIME: None,
2170
      constants.NV_MASTERIP: (master_node, master_ip),
2171
      constants.NV_OSLIST: None,
2172
      constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2173
      }
2174

    
2175
    if vg_name is not None:
2176
      node_verify_param[constants.NV_VGLIST] = None
2177
      node_verify_param[constants.NV_LVLIST] = vg_name
2178
      node_verify_param[constants.NV_PVLIST] = [vg_name]
2179
      node_verify_param[constants.NV_DRBDLIST] = None
2180

    
2181
    if drbd_helper:
2182
      node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2183

    
2184
    # Build our expected cluster state
2185
    node_image = dict((node.name, self.NodeImage(offline=node.offline,
2186
                                                 name=node.name,
2187
                                                 vm_capable=node.vm_capable))
2188
                      for node in nodeinfo)
2189

    
2190
    # Gather OOB paths
2191
    oob_paths = []
2192
    for node in nodeinfo:
2193
      path = _SupportsOob(self.cfg, node)
2194
      if path and path not in oob_paths:
2195
        oob_paths.append(path)
2196

    
2197
    if oob_paths:
2198
      node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2199

    
2200
    for instance in instancelist:
2201
      inst_config = instanceinfo[instance]
2202

    
2203
      for nname in inst_config.all_nodes:
2204
        if nname not in node_image:
2205
          # ghost node
2206
          gnode = self.NodeImage(name=nname)
2207
          gnode.ghost = True
2208
          node_image[nname] = gnode
2209

    
2210
      inst_config.MapLVsByNode(node_vol_should)
2211

    
2212
      pnode = inst_config.primary_node
2213
      node_image[pnode].pinst.append(instance)
2214

    
2215
      for snode in inst_config.secondary_nodes:
2216
        nimg = node_image[snode]
2217
        nimg.sinst.append(instance)
2218
        if pnode not in nimg.sbp:
2219
          nimg.sbp[pnode] = []
2220
        nimg.sbp[pnode].append(instance)
2221

    
2222
    # At this point, we have the in-memory data structures complete,
2223
    # except for the runtime information, which we'll gather next
2224

    
2225
    # Due to the way our RPC system works, exact response times cannot be
2226
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2227
    # time before and after executing the request, we can at least have a time
2228
    # window.
2229
    nvinfo_starttime = time.time()
2230
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2231
                                           self.cfg.GetClusterName())
2232
    nvinfo_endtime = time.time()
2233

    
2234
    all_drbd_map = self.cfg.ComputeDRBDMap()
2235

    
2236
    feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2237
    instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2238

    
2239
    feedback_fn("* Verifying node status")
2240

    
2241
    refos_img = None
2242

    
2243
    for node_i in nodeinfo:
2244
      node = node_i.name
2245
      nimg = node_image[node]
2246

    
2247
      if node_i.offline:
2248
        if verbose:
2249
          feedback_fn("* Skipping offline node %s" % (node,))
2250
        n_offline += 1
2251
        continue
2252

    
2253
      if node == master_node:
2254
        ntype = "master"
2255
      elif node_i.master_candidate:
2256
        ntype = "master candidate"
2257
      elif node_i.drained:
2258
        ntype = "drained"
2259
        n_drained += 1
2260
      else:
2261
        ntype = "regular"
2262
      if verbose:
2263
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2264

    
2265
      msg = all_nvinfo[node].fail_msg
2266
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2267
      if msg:
2268
        nimg.rpc_fail = True
2269
        continue
2270

    
2271
      nresult = all_nvinfo[node].payload
2272

    
2273
      nimg.call_ok = self._VerifyNode(node_i, nresult)
2274
      self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2275
      self._VerifyNodeNetwork(node_i, nresult)
2276
      self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2277
                            master_files)
2278

    
2279
      self._VerifyOob(node_i, nresult)
2280

    
2281
      if nimg.vm_capable:
2282
        self._VerifyNodeLVM(node_i, nresult, vg_name)
2283
        self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2284
                             all_drbd_map)
2285

    
2286
        self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2287
        self._UpdateNodeInstances(node_i, nresult, nimg)
2288
        self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2289
        self._UpdateNodeOS(node_i, nresult, nimg)
2290
        if not nimg.os_fail:
2291
          if refos_img is None:
2292
            refos_img = nimg
2293
          self._VerifyNodeOS(node_i, nimg, refos_img)
2294

    
2295
    feedback_fn("* Verifying instance status")
2296
    for instance in instancelist:
2297
      if verbose:
2298
        feedback_fn("* Verifying instance %s" % instance)
2299
      inst_config = instanceinfo[instance]
2300
      self._VerifyInstance(instance, inst_config, node_image,
2301
                           instdisk[instance])
2302
      inst_nodes_offline = []
2303

    
2304
      pnode = inst_config.primary_node
2305
      pnode_img = node_image[pnode]
2306
      _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2307
               self.ENODERPC, pnode, "instance %s, connection to"
2308
               " primary node failed", instance)
2309

    
2310
      _ErrorIf(pnode_img.offline, self.EINSTANCEBADNODE, instance,
2311
               "instance lives on offline node %s", inst_config.primary_node)
2312

    
2313
      # If the instance is non-redundant we cannot survive losing its primary
2314
      # node, so we are not N+1 compliant. On the other hand we have no disk
2315
      # templates with more than one secondary so that situation is not well
2316
      # supported either.
2317
      # FIXME: does not support file-backed instances
2318
      if not inst_config.secondary_nodes:
2319
        i_non_redundant.append(instance)
2320

    
2321
      _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2322
               instance, "instance has multiple secondary nodes: %s",
2323
               utils.CommaJoin(inst_config.secondary_nodes),
2324
               code=self.ETYPE_WARNING)
2325

    
2326
      if inst_config.disk_template in constants.DTS_INT_MIRROR:
2327
        pnode = inst_config.primary_node
2328
        instance_nodes = utils.NiceSort(inst_config.all_nodes)
2329
        instance_groups = {}
2330

    
2331
        for node in instance_nodes:
2332
          instance_groups.setdefault(nodeinfo_byname[node].group,
2333
                                     []).append(node)
2334

    
2335
        pretty_list = [
2336
          "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2337
          # Sort so that we always list the primary node first.
2338
          for group, nodes in sorted(instance_groups.items(),
2339
                                     key=lambda (_, nodes): pnode in nodes,
2340
                                     reverse=True)]
2341

    
2342
        self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2343
                      instance, "instance has primary and secondary nodes in"
2344
                      " different groups: %s", utils.CommaJoin(pretty_list),
2345
                      code=self.ETYPE_WARNING)
2346

    
2347
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2348
        i_non_a_balanced.append(instance)
2349

    
2350
      for snode in inst_config.secondary_nodes:
2351
        s_img = node_image[snode]
2352
        _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2353
                 "instance %s, connection to secondary node failed", instance)
2354

    
2355
        if s_img.offline:
2356
          inst_nodes_offline.append(snode)
2357

    
2358
      # warn that the instance lives on offline nodes
2359
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2360
               "instance has offline secondary node(s) %s",
2361
               utils.CommaJoin(inst_nodes_offline))
2362
      # ... or ghost/non-vm_capable nodes
2363
      for node in inst_config.all_nodes:
2364
        _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2365
                 "instance lives on ghost node %s", node)
2366
        _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2367
                 instance, "instance lives on non-vm_capable node %s", node)
2368

    
2369
    feedback_fn("* Verifying orphan volumes")
2370
    reserved = utils.FieldSet(*cluster.reserved_lvs)
2371
    self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2372

    
2373
    feedback_fn("* Verifying orphan instances")
2374
    self._VerifyOrphanInstances(instancelist, node_image)
2375

    
2376
    if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2377
      feedback_fn("* Verifying N+1 Memory redundancy")
2378
      self._VerifyNPlusOneMemory(node_image, instanceinfo)
2379

    
2380
    feedback_fn("* Other Notes")
2381
    if i_non_redundant:
2382
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2383
                  % len(i_non_redundant))
2384

    
2385
    if i_non_a_balanced:
2386
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2387
                  % len(i_non_a_balanced))
2388

    
2389
    if n_offline:
2390
      feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2391

    
2392
    if n_drained:
2393
      feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2394

    
2395
    return not self.bad
2396

    
2397
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2398
    """Analyze the post-hooks' result
2399

2400
    This method analyses the hook result, handles it, and sends some
2401
    nicely-formatted feedback back to the user.
2402

2403
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
2404
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2405
    @param hooks_results: the results of the multi-node hooks rpc call
2406
    @param feedback_fn: function used send feedback back to the caller
2407
    @param lu_result: previous Exec result
2408
    @return: the new Exec result, based on the previous result
2409
        and hook results
2410

2411
    """
2412
    # We only really run POST phase hooks, and are only interested in
2413
    # their results
2414
    if phase == constants.HOOKS_PHASE_POST:
2415
      # Used to change hooks' output to proper indentation
2416
      feedback_fn("* Hooks Results")
2417
      assert hooks_results, "invalid result from hooks"
2418

    
2419
      for node_name in hooks_results:
2420
        res = hooks_results[node_name]
2421
        msg = res.fail_msg
2422
        test = msg and not res.offline
2423
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
2424
                      "Communication failure in hooks execution: %s", msg)
2425
        if res.offline or msg:
2426
          # No need to investigate payload if node is offline or gave an error.
2427
          # override manually lu_result here as _ErrorIf only
2428
          # overrides self.bad
2429
          lu_result = 1
2430
          continue
2431
        for script, hkr, output in res.payload:
2432
          test = hkr == constants.HKR_FAIL
2433
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
2434
                        "Script %s failed, output:", script)
2435
          if test:
2436
            output = self._HOOKS_INDENT_RE.sub('      ', output)
2437
            feedback_fn("%s" % output)
2438
            lu_result = 0
2439

    
2440
      return lu_result
2441

    
2442

    
2443
class LUClusterVerifyDisks(NoHooksLU):
2444
  """Verifies the cluster disks status.
2445

2446
  """
2447
  REQ_BGL = False
2448

    
2449
  def ExpandNames(self):
2450
    self.needed_locks = {
2451
      locking.LEVEL_NODE: locking.ALL_SET,
2452
      locking.LEVEL_INSTANCE: locking.ALL_SET,
2453
    }
2454
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2455

    
2456
  def Exec(self, feedback_fn):
2457
    """Verify integrity of cluster disks.
2458

2459
    @rtype: tuple of three items
2460
    @return: a tuple of (dict of node-to-node_error, list of instances
2461
        which need activate-disks, dict of instance: (node, volume) for
2462
        missing volumes
2463

2464
    """
2465
    result = res_nodes, res_instances, res_missing = {}, [], {}
2466

    
2467
    nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2468
    instances = self.cfg.GetAllInstancesInfo().values()
2469

    
2470
    nv_dict = {}
2471
    for inst in instances:
2472
      inst_lvs = {}
2473
      if not inst.admin_up:
2474
        continue
2475
      inst.MapLVsByNode(inst_lvs)
2476
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2477
      for node, vol_list in inst_lvs.iteritems():
2478
        for vol in vol_list:
2479
          nv_dict[(node, vol)] = inst
2480

    
2481
    if not nv_dict:
2482
      return result
2483

    
2484
    node_lvs = self.rpc.call_lv_list(nodes, [])
2485
    for node, node_res in node_lvs.items():
2486
      if node_res.offline:
2487
        continue
2488
      msg = node_res.fail_msg
2489
      if msg:
2490
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2491
        res_nodes[node] = msg
2492
        continue
2493

    
2494
      lvs = node_res.payload
2495
      for lv_name, (_, _, lv_online) in lvs.items():
2496
        inst = nv_dict.pop((node, lv_name), None)
2497
        if (not lv_online and inst is not None
2498
            and inst.name not in res_instances):
2499
          res_instances.append(inst.name)
2500

    
2501
    # any leftover items in nv_dict are missing LVs, let's arrange the
2502
    # data better
2503
    for key, inst in nv_dict.iteritems():
2504
      if inst.name not in res_missing:
2505
        res_missing[inst.name] = []
2506
      res_missing[inst.name].append(key)
2507

    
2508
    return result
2509

    
2510

    
2511
class LUClusterRepairDiskSizes(NoHooksLU):
2512
  """Verifies the cluster disks sizes.
2513

2514
  """
2515
  REQ_BGL = False
2516

    
2517
  def ExpandNames(self):
2518
    if self.op.instances:
2519
      self.wanted_names = []
2520
      for name in self.op.instances:
2521
        full_name = _ExpandInstanceName(self.cfg, name)
2522
        self.wanted_names.append(full_name)
2523
      self.needed_locks = {
2524
        locking.LEVEL_NODE: [],
2525
        locking.LEVEL_INSTANCE: self.wanted_names,
2526
        }
2527
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2528
    else:
2529
      self.wanted_names = None
2530
      self.needed_locks = {
2531
        locking.LEVEL_NODE: locking.ALL_SET,
2532
        locking.LEVEL_INSTANCE: locking.ALL_SET,
2533
        }
2534
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2535

    
2536
  def DeclareLocks(self, level):
2537
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
2538
      self._LockInstancesNodes(primary_only=True)
2539

    
2540
  def CheckPrereq(self):
2541
    """Check prerequisites.
2542

2543
    This only checks the optional instance list against the existing names.
2544

2545
    """
2546
    if self.wanted_names is None:
2547
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2548

    
2549
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2550
                             in self.wanted_names]
2551

    
2552
  def _EnsureChildSizes(self, disk):
2553
    """Ensure children of the disk have the needed disk size.
2554

2555
    This is valid mainly for DRBD8 and fixes an issue where the
2556
    children have smaller disk size.
2557

2558
    @param disk: an L{ganeti.objects.Disk} object
2559

2560
    """
2561
    if disk.dev_type == constants.LD_DRBD8:
2562
      assert disk.children, "Empty children for DRBD8?"
2563
      fchild = disk.children[0]
2564
      mismatch = fchild.size < disk.size
2565
      if mismatch:
2566
        self.LogInfo("Child disk has size %d, parent %d, fixing",
2567
                     fchild.size, disk.size)
2568
        fchild.size = disk.size
2569

    
2570
      # and we recurse on this child only, not on the metadev
2571
      return self._EnsureChildSizes(fchild) or mismatch
2572
    else:
2573
      return False
2574

    
2575
  def Exec(self, feedback_fn):
2576
    """Verify the size of cluster disks.
2577

2578
    """
2579
    # TODO: check child disks too
2580
    # TODO: check differences in size between primary/secondary nodes
2581
    per_node_disks = {}
2582
    for instance in self.wanted_instances:
2583
      pnode = instance.primary_node
2584
      if pnode not in per_node_disks:
2585
        per_node_disks[pnode] = []
2586
      for idx, disk in enumerate(instance.disks):
2587
        per_node_disks[pnode].append((instance, idx, disk))
2588

    
2589
    changed = []
2590
    for node, dskl in per_node_disks.items():
2591
      newl = [v[2].Copy() for v in dskl]
2592
      for dsk in newl:
2593
        self.cfg.SetDiskID(dsk, node)
2594
      result = self.rpc.call_blockdev_getsize(node, newl)
2595
      if result.fail_msg:
2596
        self.LogWarning("Failure in blockdev_getsize call to node"
2597
                        " %s, ignoring", node)
2598
        continue
2599
      if len(result.payload) != len(dskl):
2600
        logging.warning("Invalid result from node %s: len(dksl)=%d,"
2601
                        " result.payload=%s", node, len(dskl), result.payload)
2602
        self.LogWarning("Invalid result from node %s, ignoring node results",
2603
                        node)
2604
        continue
2605
      for ((instance, idx, disk), size) in zip(dskl, result.payload):
2606
        if size is None:
2607
          self.LogWarning("Disk %d of instance %s did not return size"
2608
                          " information, ignoring", idx, instance.name)
2609
          continue
2610
        if not isinstance(size, (int, long)):
2611
          self.LogWarning("Disk %d of instance %s did not return valid"
2612
                          " size information, ignoring", idx, instance.name)
2613
          continue
2614
        size = size >> 20
2615
        if size != disk.size:
2616
          self.LogInfo("Disk %d of instance %s has mismatched size,"
2617
                       " correcting: recorded %d, actual %d", idx,
2618
                       instance.name, disk.size, size)
2619
          disk.size = size
2620
          self.cfg.Update(instance, feedback_fn)
2621
          changed.append((instance.name, idx, size))
2622
        if self._EnsureChildSizes(disk):
2623
          self.cfg.Update(instance, feedback_fn)
2624
          changed.append((instance.name, idx, disk.size))
2625
    return changed
2626

    
2627

    
2628
class LUClusterRename(LogicalUnit):
2629
  """Rename the cluster.
2630

2631
  """
2632
  HPATH = "cluster-rename"
2633
  HTYPE = constants.HTYPE_CLUSTER
2634

    
2635
  def BuildHooksEnv(self):
2636
    """Build hooks env.
2637

2638
    """
2639
    env = {
2640
      "OP_TARGET": self.cfg.GetClusterName(),
2641
      "NEW_NAME": self.op.name,
2642
      }
2643
    mn = self.cfg.GetMasterNode()
2644
    all_nodes = self.cfg.GetNodeList()
2645
    return env, [mn], all_nodes
2646

    
2647
  def CheckPrereq(self):
2648
    """Verify that the passed name is a valid one.
2649

2650
    """
2651
    hostname = netutils.GetHostname(name=self.op.name,
2652
                                    family=self.cfg.GetPrimaryIPFamily())
2653

    
2654
    new_name = hostname.name
2655
    self.ip = new_ip = hostname.ip
2656
    old_name = self.cfg.GetClusterName()
2657
    old_ip = self.cfg.GetMasterIP()
2658
    if new_name == old_name and new_ip == old_ip:
2659
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
2660
                                 " cluster has changed",
2661
                                 errors.ECODE_INVAL)
2662
    if new_ip != old_ip:
2663
      if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2664
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
2665
                                   " reachable on the network" %
2666
                                   new_ip, errors.ECODE_NOTUNIQUE)
2667

    
2668
    self.op.name = new_name
2669

    
2670
  def Exec(self, feedback_fn):
2671
    """Rename the cluster.
2672

2673
    """
2674
    clustername = self.op.name
2675
    ip = self.ip
2676

    
2677
    # shutdown the master IP
2678
    master = self.cfg.GetMasterNode()
2679
    result = self.rpc.call_node_stop_master(master, False)
2680
    result.Raise("Could not disable the master role")
2681

    
2682
    try:
2683
      cluster = self.cfg.GetClusterInfo()
2684
      cluster.cluster_name = clustername
2685
      cluster.master_ip = ip
2686
      self.cfg.Update(cluster, feedback_fn)
2687

    
2688
      # update the known hosts file
2689
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2690
      node_list = self.cfg.GetOnlineNodeList()
2691
      try:
2692
        node_list.remove(master)
2693
      except ValueError:
2694
        pass
2695
      _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2696
    finally:
2697
      result = self.rpc.call_node_start_master(master, False, False)
2698
      msg = result.fail_msg
2699
      if msg:
2700
        self.LogWarning("Could not re-enable the master role on"
2701
                        " the master, please restart manually: %s", msg)
2702

    
2703
    return clustername
2704

    
2705

    
2706
class LUClusterSetParams(LogicalUnit):
2707
  """Change the parameters of the cluster.
2708

2709
  """
2710
  HPATH = "cluster-modify"
2711
  HTYPE = constants.HTYPE_CLUSTER
2712
  REQ_BGL = False
2713

    
2714
  def CheckArguments(self):
2715
    """Check parameters
2716

2717
    """
2718
    if self.op.uid_pool:
2719
      uidpool.CheckUidPool(self.op.uid_pool)
2720

    
2721
    if self.op.add_uids:
2722
      uidpool.CheckUidPool(self.op.add_uids)
2723

    
2724
    if self.op.remove_uids:
2725
      uidpool.CheckUidPool(self.op.remove_uids)
2726

    
2727
  def ExpandNames(self):
2728
    # FIXME: in the future maybe other cluster params won't require checking on
2729
    # all nodes to be modified.
2730
    self.needed_locks = {
2731
      locking.LEVEL_NODE: locking.ALL_SET,
2732
    }
2733
    self.share_locks[locking.LEVEL_NODE] = 1
2734

    
2735
  def BuildHooksEnv(self):
2736
    """Build hooks env.
2737

2738
    """
2739
    env = {
2740
      "OP_TARGET": self.cfg.GetClusterName(),
2741
      "NEW_VG_NAME": self.op.vg_name,
2742
      }
2743
    mn = self.cfg.GetMasterNode()
2744
    return env, [mn], [mn]
2745

    
2746
  def CheckPrereq(self):
2747
    """Check prerequisites.
2748

2749
    This checks whether the given params don't conflict and
2750
    if the given volume group is valid.
2751

2752
    """
2753
    if self.op.vg_name is not None and not self.op.vg_name:
2754
      if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2755
        raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2756
                                   " instances exist", errors.ECODE_INVAL)
2757

    
2758
    if self.op.drbd_helper is not None and not self.op.drbd_helper:
2759
      if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2760
        raise errors.OpPrereqError("Cannot disable drbd helper while"
2761
                                   " drbd-based instances exist",
2762
                                   errors.ECODE_INVAL)
2763

    
2764
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2765

    
2766
    # if vg_name not None, checks given volume group on all nodes
2767
    if self.op.vg_name:
2768
      vglist = self.rpc.call_vg_list(node_list)
2769
      for node in node_list:
2770
        msg = vglist[node].fail_msg
2771
        if msg:
2772
          # ignoring down node
2773
          self.LogWarning("Error while gathering data on node %s"
2774
                          " (ignoring node): %s", node, msg)
2775
          continue
2776
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2777
                                              self.op.vg_name,
2778
                                              constants.MIN_VG_SIZE)
2779
        if vgstatus:
2780
          raise errors.OpPrereqError("Error on node '%s': %s" %
2781
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2782

    
2783
    if self.op.drbd_helper:
2784
      # checks given drbd helper on all nodes
2785
      helpers = self.rpc.call_drbd_helper(node_list)
2786
      for node in node_list:
2787
        ninfo = self.cfg.GetNodeInfo(node)
2788
        if ninfo.offline:
2789
          self.LogInfo("Not checking drbd helper on offline node %s", node)
2790
          continue
2791
        msg = helpers[node].fail_msg
2792
        if msg:
2793
          raise errors.OpPrereqError("Error checking drbd helper on node"
2794
                                     " '%s': %s" % (node, msg),
2795
                                     errors.ECODE_ENVIRON)
2796
        node_helper = helpers[node].payload
2797
        if node_helper != self.op.drbd_helper:
2798
          raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2799
                                     (node, node_helper), errors.ECODE_ENVIRON)
2800

    
2801
    self.cluster = cluster = self.cfg.GetClusterInfo()
2802
    # validate params changes
2803
    if self.op.beparams:
2804
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2805
      self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2806

    
2807
    if self.op.ndparams:
2808
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2809
      self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2810

    
2811
    if self.op.nicparams:
2812
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2813
      self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2814
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2815
      nic_errors = []
2816

    
2817
      # check all instances for consistency
2818
      for instance in self.cfg.GetAllInstancesInfo().values():
2819
        for nic_idx, nic in enumerate(instance.nics):
2820
          params_copy = copy.deepcopy(nic.nicparams)
2821
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2822

    
2823
          # check parameter syntax
2824
          try:
2825
            objects.NIC.CheckParameterSyntax(params_filled)
2826
          except errors.ConfigurationError, err:
2827
            nic_errors.append("Instance %s, nic/%d: %s" %
2828
                              (instance.name, nic_idx, err))
2829

    
2830
          # if we're moving instances to routed, check that they have an ip
2831
          target_mode = params_filled[constants.NIC_MODE]
2832
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2833
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2834
                              (instance.name, nic_idx))
2835
      if nic_errors:
2836
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2837
                                   "\n".join(nic_errors))
2838

    
2839
    # hypervisor list/parameters
2840
    self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2841
    if self.op.hvparams:
2842
      for hv_name, hv_dict in self.op.hvparams.items():
2843
        if hv_name not in self.new_hvparams:
2844
          self.new_hvparams[hv_name] = hv_dict
2845
        else:
2846
          self.new_hvparams[hv_name].update(hv_dict)
2847

    
2848
    # os hypervisor parameters
2849
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2850
    if self.op.os_hvp:
2851
      for os_name, hvs in self.op.os_hvp.items():
2852
        if os_name not in self.new_os_hvp:
2853
          self.new_os_hvp[os_name] = hvs
2854
        else:
2855
          for hv_name, hv_dict in hvs.items():
2856
            if hv_name not in self.new_os_hvp[os_name]:
2857
              self.new_os_hvp[os_name][hv_name] = hv_dict
2858
            else:
2859
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2860

    
2861
    # os parameters
2862
    self.new_osp = objects.FillDict(cluster.osparams, {})
2863
    if self.op.osparams:
2864
      for os_name, osp in self.op.osparams.items():
2865
        if os_name not in self.new_osp:
2866
          self.new_osp[os_name] = {}
2867

    
2868
        self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2869
                                                  use_none=True)
2870

    
2871
        if not self.new_osp[os_name]:
2872
          # we removed all parameters
2873
          del self.new_osp[os_name]
2874
        else:
2875
          # check the parameter validity (remote check)
2876
          _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2877
                         os_name, self.new_osp[os_name])
2878

    
2879
    # changes to the hypervisor list
2880
    if self.op.enabled_hypervisors is not None:
2881
      self.hv_list = self.op.enabled_hypervisors
2882
      for hv in self.hv_list:
2883
        # if the hypervisor doesn't already exist in the cluster
2884
        # hvparams, we initialize it to empty, and then (in both
2885
        # cases) we make sure to fill the defaults, as we might not
2886
        # have a complete defaults list if the hypervisor wasn't
2887
        # enabled before
2888
        if hv not in new_hvp:
2889
          new_hvp[hv] = {}
2890
        new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2891
        utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2892
    else:
2893
      self.hv_list = cluster.enabled_hypervisors
2894

    
2895
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2896
      # either the enabled list has changed, or the parameters have, validate
2897
      for hv_name, hv_params in self.new_hvparams.items():
2898
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2899
            (self.op.enabled_hypervisors and
2900
             hv_name in self.op.enabled_hypervisors)):
2901
          # either this is a new hypervisor, or its parameters have changed
2902
          hv_class = hypervisor.GetHypervisor(hv_name)
2903
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2904
          hv_class.CheckParameterSyntax(hv_params)
2905
          _CheckHVParams(self, node_list, hv_name, hv_params)
2906

    
2907
    if self.op.os_hvp:
2908
      # no need to check any newly-enabled hypervisors, since the
2909
      # defaults have already been checked in the above code-block
2910
      for os_name, os_hvp in self.new_os_hvp.items():
2911
        for hv_name, hv_params in os_hvp.items():
2912
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2913
          # we need to fill in the new os_hvp on top of the actual hv_p
2914
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2915
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2916
          hv_class = hypervisor.GetHypervisor(hv_name)
2917
          hv_class.CheckParameterSyntax(new_osp)
2918
          _CheckHVParams(self, node_list, hv_name, new_osp)
2919

    
2920
    if self.op.default_iallocator:
2921
      alloc_script = utils.FindFile(self.op.default_iallocator,
2922
                                    constants.IALLOCATOR_SEARCH_PATH,
2923
                                    os.path.isfile)
2924
      if alloc_script is None:
2925
        raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2926
                                   " specified" % self.op.default_iallocator,
2927
                                   errors.ECODE_INVAL)
2928

    
2929
  def Exec(self, feedback_fn):
2930
    """Change the parameters of the cluster.
2931

2932
    """
2933
    if self.op.vg_name is not None:
2934
      new_volume = self.op.vg_name
2935
      if not new_volume:
2936
        new_volume = None
2937
      if new_volume != self.cfg.GetVGName():
2938
        self.cfg.SetVGName(new_volume)
2939
      else:
2940
        feedback_fn("Cluster LVM configuration already in desired"
2941
                    " state, not changing")
2942
    if self.op.drbd_helper is not None:
2943
      new_helper = self.op.drbd_helper
2944
      if not new_helper:
2945
        new_helper = None
2946
      if new_helper != self.cfg.GetDRBDHelper():
2947
        self.cfg.SetDRBDHelper(new_helper)
2948
      else:
2949
        feedback_fn("Cluster DRBD helper already in desired state,"
2950
                    " not changing")
2951
    if self.op.hvparams:
2952
      self.cluster.hvparams = self.new_hvparams
2953
    if self.op.os_hvp:
2954
      self.cluster.os_hvp = self.new_os_hvp
2955
    if self.op.enabled_hypervisors is not None:
2956
      self.cluster.hvparams = self.new_hvparams
2957
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2958
    if self.op.beparams:
2959
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2960
    if self.op.nicparams:
2961
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2962
    if self.op.osparams:
2963
      self.cluster.osparams = self.new_osp
2964
    if self.op.ndparams:
2965
      self.cluster.ndparams = self.new_ndparams
2966

    
2967
    if self.op.candidate_pool_size is not None:
2968
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2969
      # we need to update the pool size here, otherwise the save will fail
2970
      _AdjustCandidatePool(self, [])
2971

    
2972
    if self.op.maintain_node_health is not None:
2973
      self.cluster.maintain_node_health = self.op.maintain_node_health
2974

    
2975
    if self.op.prealloc_wipe_disks is not None:
2976
      self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2977

    
2978
    if self.op.add_uids is not None:
2979
      uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2980

    
2981
    if self.op.remove_uids is not None:
2982
      uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2983

    
2984
    if self.op.uid_pool is not None:
2985
      self.cluster.uid_pool = self.op.uid_pool
2986

    
2987
    if self.op.default_iallocator is not None:
2988
      self.cluster.default_iallocator = self.op.default_iallocator
2989

    
2990
    if self.op.reserved_lvs is not None:
2991
      self.cluster.reserved_lvs = self.op.reserved_lvs
2992

    
2993
    def helper_os(aname, mods, desc):
2994
      desc += " OS list"
2995
      lst = getattr(self.cluster, aname)
2996
      for key, val in mods:
2997
        if key == constants.DDM_ADD:
2998
          if val in lst:
2999
            feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3000
          else:
3001
            lst.append(val)
3002
        elif key == constants.DDM_REMOVE:
3003
          if val in lst:
3004
            lst.remove(val)
3005
          else:
3006
            feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3007
        else:
3008
          raise errors.ProgrammerError("Invalid modification '%s'" % key)
3009

    
3010
    if self.op.hidden_os:
3011
      helper_os("hidden_os", self.op.hidden_os, "hidden")
3012

    
3013
    if self.op.blacklisted_os:
3014
      helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3015

    
3016
    if self.op.master_netdev:
3017
      master = self.cfg.GetMasterNode()
3018
      feedback_fn("Shutting down master ip on the current netdev (%s)" %
3019
                  self.cluster.master_netdev)
3020
      result = self.rpc.call_node_stop_master(master, False)
3021
      result.Raise("Could not disable the master ip")
3022
      feedback_fn("Changing master_netdev from %s to %s" %
3023
                  (self.cluster.master_netdev, self.op.master_netdev))
3024
      self.cluster.master_netdev = self.op.master_netdev
3025

    
3026
    self.cfg.Update(self.cluster, feedback_fn)
3027

    
3028
    if self.op.master_netdev:
3029
      feedback_fn("Starting the master ip on the new master netdev (%s)" %
3030
                  self.op.master_netdev)
3031
      result = self.rpc.call_node_start_master(master, False, False)
3032
      if result.fail_msg:
3033
        self.LogWarning("Could not re-enable the master ip on"
3034
                        " the master, please restart manually: %s",
3035
                        result.fail_msg)
3036

    
3037

    
3038
def _UploadHelper(lu, nodes, fname):
3039
  """Helper for uploading a file and showing warnings.
3040

3041
  """
3042
  if os.path.exists(fname):
3043
    result = lu.rpc.call_upload_file(nodes, fname)
3044
    for to_node, to_result in result.items():
3045
      msg = to_result.fail_msg
3046
      if msg:
3047
        msg = ("Copy of file %s to node %s failed: %s" %
3048
               (fname, to_node, msg))
3049
        lu.proc.LogWarning(msg)
3050

    
3051

    
3052
def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3053
  """Distribute additional files which are part of the cluster configuration.
3054

3055
  ConfigWriter takes care of distributing the config and ssconf files, but
3056
  there are more files which should be distributed to all nodes. This function
3057
  makes sure those are copied.
3058

3059
  @param lu: calling logical unit
3060
  @param additional_nodes: list of nodes not in the config to distribute to
3061
  @type additional_vm: boolean
3062
  @param additional_vm: whether the additional nodes are vm-capable or not
3063

3064
  """
3065
  # 1. Gather target nodes
3066
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3067
  dist_nodes = lu.cfg.GetOnlineNodeList()
3068
  nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
3069
  vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
3070
  if additional_nodes is not None:
3071
    dist_nodes.extend(additional_nodes)
3072
    if additional_vm:
3073
      vm_nodes.extend(additional_nodes)
3074
  if myself.name in dist_nodes:
3075
    dist_nodes.remove(myself.name)
3076
  if myself.name in vm_nodes:
3077
    vm_nodes.remove(myself.name)
3078

    
3079
  # 2. Gather files to distribute
3080
  dist_files = set([constants.ETC_HOSTS,
3081
                    constants.SSH_KNOWN_HOSTS_FILE,
3082
                    constants.RAPI_CERT_FILE,
3083
                    constants.RAPI_USERS_FILE,
3084
                    constants.CONFD_HMAC_KEY,
3085
                    constants.CLUSTER_DOMAIN_SECRET_FILE,
3086
                   ])
3087

    
3088
  vm_files = set()
3089
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
3090
  for hv_name in enabled_hypervisors:
3091
    hv_class = hypervisor.GetHypervisor(hv_name)
3092
    vm_files.update(hv_class.GetAncillaryFiles())
3093

    
3094
  # 3. Perform the files upload
3095
  for fname in dist_files:
3096
    _UploadHelper(lu, dist_nodes, fname)
3097
  for fname in vm_files:
3098
    _UploadHelper(lu, vm_nodes, fname)
3099

    
3100

    
3101
class LUClusterRedistConf(NoHooksLU):
3102
  """Force the redistribution of cluster configuration.
3103

3104
  This is a very simple LU.
3105

3106
  """
3107
  REQ_BGL = False
3108

    
3109
  def ExpandNames(self):
3110
    self.needed_locks = {
3111
      locking.LEVEL_NODE: locking.ALL_SET,
3112
    }
3113
    self.share_locks[locking.LEVEL_NODE] = 1
3114

    
3115
  def Exec(self, feedback_fn):
3116
    """Redistribute the configuration.
3117

3118
    """
3119
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3120
    _RedistributeAncillaryFiles(self)
3121

    
3122

    
3123
def _WaitForSync(lu, instance, disks=None, oneshot=False):
3124
  """Sleep and poll for an instance's disk to sync.
3125

3126
  """
3127
  if not instance.disks or disks is not None and not disks:
3128
    return True
3129

    
3130
  disks = _ExpandCheckDisks(instance, disks)
3131

    
3132
  if not oneshot:
3133
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3134

    
3135
  node = instance.primary_node
3136

    
3137
  for dev in disks:
3138
    lu.cfg.SetDiskID(dev, node)
3139

    
3140
  # TODO: Convert to utils.Retry
3141

    
3142
  retries = 0
3143
  degr_retries = 10 # in seconds, as we sleep 1 second each time
3144
  while True:
3145
    max_time = 0
3146
    done = True
3147
    cumul_degraded = False
3148
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3149
    msg = rstats.fail_msg
3150
    if msg:
3151
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3152
      retries += 1
3153
      if retries >= 10:
3154
        raise errors.RemoteError("Can't contact node %s for mirror data,"
3155
                                 " aborting." % node)
3156
      time.sleep(6)
3157
      continue
3158
    rstats = rstats.payload
3159
    retries = 0
3160
    for i, mstat in enumerate(rstats):
3161
      if mstat is None:
3162
        lu.LogWarning("Can't compute data for node %s/%s",
3163
                           node, disks[i].iv_name)
3164
        continue
3165

    
3166
      cumul_degraded = (cumul_degraded or
3167
                        (mstat.is_degraded and mstat.sync_percent is None))
3168
      if mstat.sync_percent is not None:
3169
        done = False
3170
        if mstat.estimated_time is not None:
3171
          rem_time = ("%s remaining (estimated)" %
3172
                      utils.FormatSeconds(mstat.estimated_time))
3173
          max_time = mstat.estimated_time
3174
        else:
3175
          rem_time = "no time estimate"
3176
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3177
                        (disks[i].iv_name, mstat.sync_percent, rem_time))
3178

    
3179
    # if we're done but degraded, let's do a few small retries, to
3180
    # make sure we see a stable and not transient situation; therefore
3181
    # we force restart of the loop
3182
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
3183
      logging.info("Degraded disks found, %d retries left", degr_retries)
3184
      degr_retries -= 1
3185
      time.sleep(1)
3186
      continue
3187

    
3188
    if done or oneshot:
3189
      break
3190

    
3191
    time.sleep(min(60, max_time))
3192

    
3193
  if done:
3194
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3195
  return not cumul_degraded
3196

    
3197

    
3198
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3199
  """Check that mirrors are not degraded.
3200

3201
  The ldisk parameter, if True, will change the test from the
3202
  is_degraded attribute (which represents overall non-ok status for
3203
  the device(s)) to the ldisk (representing the local storage status).
3204

3205
  """
3206
  lu.cfg.SetDiskID(dev, node)
3207

    
3208
  result = True
3209

    
3210
  if on_primary or dev.AssembleOnSecondary():
3211
    rstats = lu.rpc.call_blockdev_find(node, dev)
3212
    msg = rstats.fail_msg
3213
    if msg:
3214
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3215
      result = False
3216
    elif not rstats.payload:
3217
      lu.LogWarning("Can't find disk on node %s", node)
3218
      result = False
3219
    else:
3220
      if ldisk:
3221
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3222
      else:
3223
        result = result and not rstats.payload.is_degraded
3224

    
3225
  if dev.children:
3226
    for child in dev.children:
3227
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3228

    
3229
  return result
3230

    
3231

    
3232
class LUOobCommand(NoHooksLU):
3233
  """Logical unit for OOB handling.
3234

3235
  """
3236
  REG_BGL = False
3237
  _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3238

    
3239
  def CheckPrereq(self):
3240
    """Check prerequisites.
3241

3242
    This checks:
3243
     - the node exists in the configuration
3244
     - OOB is supported
3245

3246
    Any errors are signaled by raising errors.OpPrereqError.
3247

3248
    """
3249
    self.nodes = []
3250
    self.master_node = self.cfg.GetMasterNode()
3251

    
3252
    assert self.op.power_delay >= 0.0
3253

    
3254
    if self.op.node_names:
3255
      if self.op.command in self._SKIP_MASTER:
3256
        if self.master_node in self.op.node_names:
3257
          master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3258
          master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3259

    
3260
          if master_oob_handler:
3261
            additional_text = ("Run '%s %s %s' if you want to operate on the"
3262
                               " master regardless") % (master_oob_handler,
3263
                                                        self.op.command,
3264
                                                        self.master_node)
3265
          else:
3266
            additional_text = "The master node does not support out-of-band"
3267

    
3268
          raise errors.OpPrereqError(("Operating on the master node %s is not"
3269
                                      " allowed for %s\n%s") %
3270
                                     (self.master_node, self.op.command,
3271
                                      additional_text), errors.ECODE_INVAL)
3272
    else:
3273
      self.op.node_names = self.cfg.GetNodeList()
3274
      if self.op.command in self._SKIP_MASTER:
3275
        self.op.node_names.remove(self.master_node)
3276

    
3277
    if self.op.command in self._SKIP_MASTER:
3278
      assert self.master_node not in self.op.node_names
3279

    
3280
    for node_name in self.op.node_names:
3281
      node = self.cfg.GetNodeInfo(node_name)
3282

    
3283
      if node is None:
3284
        raise errors.OpPrereqError("Node %s not found" % node_name,
3285
                                   errors.ECODE_NOENT)
3286
      else:
3287
        self.nodes.append(node)
3288

    
3289
      if (not self.op.ignore_status and
3290
          (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3291
        raise errors.OpPrereqError(("Cannot power off node %s because it is"
3292
                                    " not marked offline") % node_name,
3293
                                   errors.ECODE_STATE)
3294

    
3295
  def ExpandNames(self):
3296
    """Gather locks we need.
3297

3298
    """
3299
    if self.op.node_names:
3300
      self.op.node_names = [_ExpandNodeName(self.cfg, name)
3301
                            for name in self.op.node_names]
3302
      lock_names = self.op.node_names
3303
    else:
3304
      lock_names = locking.ALL_SET
3305

    
3306
    self.needed_locks = {
3307
      locking.LEVEL_NODE: lock_names,
3308
      }
3309

    
3310
  def Exec(self, feedback_fn):
3311
    """Execute OOB and return result if we expect any.
3312

3313
    """
3314
    master_node = self.master_node
3315
    ret = []
3316

    
3317
    for idx, node in enumerate(self.nodes):
3318
      node_entry = [(constants.RS_NORMAL, node.name)]
3319
      ret.append(node_entry)
3320

    
3321
      oob_program = _SupportsOob(self.cfg, node)
3322

    
3323
      if not oob_program:
3324
        node_entry.append((constants.RS_UNAVAIL, None))
3325
        continue
3326

    
3327
      logging.info("Executing out-of-band command '%s' using '%s' on %s",
3328
                   self.op.command, oob_program, node.name)
3329
      result = self.rpc.call_run_oob(master_node, oob_program,
3330
                                     self.op.command, node.name,
3331
                                     self.op.timeout)
3332

    
3333
      if result.fail_msg:
3334
        self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3335
                        node.name, result.fail_msg)
3336
        node_entry.append((constants.RS_NODATA, None))
3337
      else:
3338
        try:
3339
          self._CheckPayload(result)
3340
        except errors.OpExecError, err:
3341
          self.LogWarning("The payload returned by '%s' is not valid: %s",
3342
                          node.name, err)
3343
          node_entry.append((constants.RS_NODATA, None))
3344
        else:
3345
          if self.op.command == constants.OOB_HEALTH:
3346
            # For health we should log important events
3347
            for item, status in result.payload:
3348
              if status in [constants.OOB_STATUS_WARNING,
3349
                            constants.OOB_STATUS_CRITICAL]:
3350
                self.LogWarning("On node '%s' item '%s' has status '%s'",
3351
                                node.name, item, status)
3352

    
3353
          if self.op.command == constants.OOB_POWER_ON:
3354
            node.powered = True
3355
          elif self.op.command == constants.OOB_POWER_OFF:
3356
            node.powered = False
3357
          elif self.op.command == constants.OOB_POWER_STATUS:
3358
            powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3359
            if powered != node.powered:
3360
              logging.warning(("Recorded power state (%s) of node '%s' does not"
3361
                               " match actual power state (%s)"), node.powered,
3362
                              node.name, powered)
3363

    
3364
          # For configuration changing commands we should update the node
3365
          if self.op.command in (constants.OOB_POWER_ON,
3366
                                 constants.OOB_POWER_OFF):
3367
            self.cfg.Update(node, feedback_fn)
3368

    
3369
          node_entry.append((constants.RS_NORMAL, result.payload))
3370

    
3371
          if (self.op.command == constants.OOB_POWER_ON and
3372
              idx < len(self.nodes) - 1):
3373
            time.sleep(self.op.power_delay)
3374

    
3375
    return ret
3376

    
3377
  def _CheckPayload(self, result):
3378
    """Checks if the payload is valid.
3379

3380
    @param result: RPC result
3381
    @raises errors.OpExecError: If payload is not valid
3382

3383
    """
3384
    errs = []
3385
    if self.op.command == constants.OOB_HEALTH:
3386
      if not isinstance(result.payload, list):
3387
        errs.append("command 'health' is expected to return a list but got %s" %
3388
                    type(result.payload))
3389
      else:
3390
        for item, status in result.payload:
3391
          if status not in constants.OOB_STATUSES:
3392
            errs.append("health item '%s' has invalid status '%s'" %
3393
                        (item, status))
3394

    
3395
    if self.op.command == constants.OOB_POWER_STATUS:
3396
      if not isinstance(result.payload, dict):
3397
        errs.append("power-status is expected to return a dict but got %s" %
3398
                    type(result.payload))
3399

    
3400
    if self.op.command in [
3401
        constants.OOB_POWER_ON,
3402
        constants.OOB_POWER_OFF,
3403
        constants.OOB_POWER_CYCLE,
3404
        ]:
3405
      if result.payload is not None:
3406
        errs.append("%s is expected to not return payload but got '%s'" %
3407
                    (self.op.command, result.payload))
3408

    
3409
    if errs:
3410
      raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3411
                               utils.CommaJoin(errs))
3412

    
3413
class _OsQuery(_QueryBase):
3414
  FIELDS = query.OS_FIELDS
3415

    
3416
  def ExpandNames(self, lu):
3417
    # Lock all nodes in shared mode
3418
    # Temporary removal of locks, should be reverted later
3419
    # TODO: reintroduce locks when they are lighter-weight
3420
    lu.needed_locks = {}
3421
    #self.share_locks[locking.LEVEL_NODE] = 1
3422
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3423

    
3424
    # The following variables interact with _QueryBase._GetNames
3425
    if self.names:
3426
      self.wanted = self.names
3427
    else:
3428
      self.wanted = locking.ALL_SET
3429

    
3430
    self.do_locking = self.use_locking
3431

    
3432
  def DeclareLocks(self, lu, level):
3433
    pass
3434

    
3435
  @staticmethod
3436
  def _DiagnoseByOS(rlist):
3437
    """Remaps a per-node return list into an a per-os per-node dictionary
3438

3439
    @param rlist: a map with node names as keys and OS objects as values
3440

3441
    @rtype: dict
3442
    @return: a dictionary with osnames as keys and as value another
3443
        map, with nodes as keys and tuples of (path, status, diagnose,
3444
        variants, parameters, api_versions) as values, eg::
3445

3446
          {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3447
                                     (/srv/..., False, "invalid api")],
3448
                           "node2": [(/srv/..., True, "", [], [])]}
3449
          }
3450

3451
    """
3452
    all_os = {}
3453
    # we build here the list of nodes that didn't fail the RPC (at RPC
3454
    # level), so that nodes with a non-responding node daemon don't
3455
    # make all OSes invalid
3456
    good_nodes = [node_name for node_name in rlist
3457
                  if not rlist[node_name].fail_msg]
3458
    for node_name, nr in rlist.items():
3459
      if nr.fail_msg or not nr.payload:
3460
        continue
3461
      for (name, path, status, diagnose, variants,
3462
           params, api_versions) in nr.payload:
3463
        if name not in all_os:
3464
          # build a list of nodes for this os containing empty lists
3465
          # for each node in node_list
3466
          all_os[name] = {}
3467
          for nname in good_nodes:
3468
            all_os[name][nname] = []
3469
        # convert params from [name, help] to (name, help)
3470
        params = [tuple(v) for v in params]
3471
        all_os[name][node_name].append((path, status, diagnose,
3472
                                        variants, params, api_versions))
3473
    return all_os
3474

    
3475
  def _GetQueryData(self, lu):
3476
    """Computes the list of nodes and their attributes.
3477

3478
    """
3479
    # Locking is not used
3480
    assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3481

    
3482
    valid_nodes = [node.name
3483
                   for node in lu.cfg.GetAllNodesInfo().values()
3484
                   if not node.offline and node.vm_capable]
3485
    pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3486
    cluster = lu.cfg.GetClusterInfo()
3487

    
3488
    data = {}
3489

    
3490
    for (os_name, os_data) in pol.items():
3491
      info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3492
                          hidden=(os_name in cluster.hidden_os),
3493
                          blacklisted=(os_name in cluster.blacklisted_os))
3494

    
3495
      variants = set()
3496
      parameters = set()
3497
      api_versions = set()
3498

    
3499
      for idx, osl in enumerate(os_data.values()):
3500
        info.valid = bool(info.valid and osl and osl[0][1])
3501
        if not info.valid:
3502
          break
3503

    
3504
        (node_variants, node_params, node_api) = osl[0][3:6]
3505
        if idx == 0:
3506
          # First entry
3507
          variants.update(node_variants)
3508
          parameters.update(node_params)
3509
          api_versions.update(node_api)
3510
        else:
3511
          # Filter out inconsistent values
3512
          variants.intersection_update(node_variants)
3513
          parameters.intersection_update(node_params)
3514
          api_versions.intersection_update(node_api)
3515

    
3516
      info.variants = list(variants)
3517
      info.parameters = list(parameters)
3518
      info.api_versions = list(api_versions)
3519

    
3520
      data[os_name] = info
3521

    
3522
    # Prepare data in requested order
3523
    return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3524
            if name in data]
3525

    
3526

    
3527
class LUOsDiagnose(NoHooksLU):
3528
  """Logical unit for OS diagnose/query.
3529

3530
  """
3531
  REQ_BGL = False
3532

    
3533
  @staticmethod
3534
  def _BuildFilter(fields, names):
3535
    """Builds a filter for querying OSes.
3536

3537
    """
3538
    name_filter = qlang.MakeSimpleFilter("name", names)
3539

    
3540
    # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3541
    # respective field is not requested
3542
    status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3543
                     for fname in ["hidden", "blacklisted"]
3544
                     if fname not in fields]
3545
    if "valid" not in fields:
3546
      status_filter.append([qlang.OP_TRUE, "valid"])
3547

    
3548
    if status_filter:
3549
      status_filter.insert(0, qlang.OP_AND)
3550
    else:
3551
      status_filter = None
3552

    
3553
    if name_filter and status_filter:
3554
      return [qlang.OP_AND, name_filter, status_filter]
3555
    elif name_filter:
3556
      return name_filter
3557
    else:
3558
      return status_filter
3559

    
3560
  def CheckArguments(self):
3561
    self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3562
                       self.op.output_fields, False)
3563

    
3564
  def ExpandNames(self):
3565
    self.oq.ExpandNames(self)
3566

    
3567
  def Exec(self, feedback_fn):
3568
    return self.oq.OldStyleQuery(self)
3569

    
3570

    
3571
class LUNodeRemove(LogicalUnit):
3572
  """Logical unit for removing a node.
3573

3574
  """
3575
  HPATH = "node-remove"
3576
  HTYPE = constants.HTYPE_NODE
3577

    
3578
  def BuildHooksEnv(self):
3579
    """Build hooks env.
3580

3581
    This doesn't run on the target node in the pre phase as a failed
3582
    node would then be impossible to remove.
3583

3584
    """
3585
    env = {
3586
      "OP_TARGET": self.op.node_name,
3587
      "NODE_NAME": self.op.node_name,
3588
      }
3589
    all_nodes = self.cfg.GetNodeList()
3590
    try:
3591
      all_nodes.remove(self.op.node_name)
3592
    except ValueError:
3593
      logging.warning("Node %s which is about to be removed not found"
3594
                      " in the all nodes list", self.op.node_name)
3595
    return env, all_nodes, all_nodes
3596

    
3597
  def CheckPrereq(self):
3598
    """Check prerequisites.
3599

3600
    This checks:
3601
     - the node exists in the configuration
3602
     - it does not have primary or secondary instances
3603
     - it's not the master
3604

3605
    Any errors are signaled by raising errors.OpPrereqError.
3606

3607
    """
3608
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3609
    node = self.cfg.GetNodeInfo(self.op.node_name)
3610
    assert node is not None
3611

    
3612
    instance_list = self.cfg.GetInstanceList()
3613

    
3614
    masternode = self.cfg.GetMasterNode()
3615
    if node.name == masternode:
3616
      raise errors.OpPrereqError("Node is the master node,"
3617
                                 " you need to failover first.",
3618
                                 errors.ECODE_INVAL)
3619

    
3620
    for instance_name in instance_list:
3621
      instance = self.cfg.GetInstanceInfo(instance_name)
3622
      if node.name in instance.all_nodes:
3623
        raise errors.OpPrereqError("Instance %s is still running on the node,"
3624
                                   " please remove first." % instance_name,
3625
                                   errors.ECODE_INVAL)
3626
    self.op.node_name = node.name
3627
    self.node = node
3628

    
3629
  def Exec(self, feedback_fn):
3630
    """Removes the node from the cluster.
3631

3632
    """
3633
    node = self.node
3634
    logging.info("Stopping the node daemon and removing configs from node %s",
3635
                 node.name)
3636

    
3637
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3638

    
3639
    # Promote nodes to master candidate as needed
3640
    _AdjustCandidatePool(self, exceptions=[node.name])
3641
    self.context.RemoveNode(node.name)
3642

    
3643
    # Run post hooks on the node before it's removed
3644
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3645
    try:
3646
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3647
    except:
3648
      # pylint: disable-msg=W0702
3649
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
3650

    
3651
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3652
    msg = result.fail_msg
3653
    if msg:
3654
      self.LogWarning("Errors encountered on the remote node while leaving"
3655
                      " the cluster: %s", msg)
3656

    
3657
    # Remove node from our /etc/hosts
3658
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3659
      master_node = self.cfg.GetMasterNode()
3660
      result = self.rpc.call_etc_hosts_modify(master_node,
3661
                                              constants.ETC_HOSTS_REMOVE,
3662
                                              node.name, None)
3663
      result.Raise("Can't update hosts file with new host data")
3664
      _RedistributeAncillaryFiles(self)
3665

    
3666

    
3667
class _NodeQuery(_QueryBase):
3668
  FIELDS = query.NODE_FIELDS
3669

    
3670
  def ExpandNames(self, lu):
3671
    lu.needed_locks = {}
3672
    lu.share_locks[locking.LEVEL_NODE] = 1
3673

    
3674
    if self.names:
3675
      self.wanted = _GetWantedNodes(lu, self.names)
3676
    else:
3677
      self.wanted = locking.ALL_SET
3678

    
3679
    self.do_locking = (self.use_locking and
3680
                       query.NQ_LIVE in self.requested_data)
3681

    
3682
    if self.do_locking:
3683
      # if we don't request only static fields, we need to lock the nodes
3684
      lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3685

    
3686
  def DeclareLocks(self, lu, level):
3687
    pass
3688

    
3689
  def _GetQueryData(self, lu):
3690
    """Computes the list of nodes and their attributes.
3691

3692
    """
3693
    all_info = lu.cfg.GetAllNodesInfo()
3694

    
3695
    nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3696

    
3697
    # Gather data as requested
3698
    if query.NQ_LIVE in self.requested_data:
3699
      # filter out non-vm_capable nodes
3700
      toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3701

    
3702
      node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3703
                                        lu.cfg.GetHypervisorType())
3704
      live_data = dict((name, nresult.payload)
3705
                       for (name, nresult) in node_data.items()
3706
                       if not nresult.fail_msg and nresult.payload)
3707
    else:
3708
      live_data = None
3709

    
3710
    if query.NQ_INST in self.requested_data:
3711
      node_to_primary = dict([(name, set()) for name in nodenames])
3712
      node_to_secondary = dict([(name, set()) for name in nodenames])
3713

    
3714
      inst_data = lu.cfg.GetAllInstancesInfo()
3715

    
3716
      for inst in inst_data.values():
3717
        if inst.primary_node in node_to_primary:
3718
          node_to_primary[inst.primary_node].add(inst.name)
3719
        for secnode in inst.secondary_nodes:
3720
          if secnode in node_to_secondary:
3721
            node_to_secondary[secnode].add(inst.name)
3722
    else:
3723
      node_to_primary = None
3724
      node_to_secondary = None
3725

    
3726
    if query.NQ_OOB in self.requested_data:
3727
      oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3728
                         for name, node in all_info.iteritems())
3729
    else:
3730
      oob_support = None
3731

    
3732
    if query.NQ_GROUP in self.requested_data:
3733
      groups = lu.cfg.GetAllNodeGroupsInfo()
3734
    else:
3735
      groups = {}
3736

    
3737
    return query.NodeQueryData([all_info[name] for name in nodenames],
3738
                               live_data, lu.cfg.GetMasterNode(),
3739
                               node_to_primary, node_to_secondary, groups,
3740
                               oob_support, lu.cfg.GetClusterInfo())
3741

    
3742

    
3743
class LUNodeQuery(NoHooksLU):
3744
  """Logical unit for querying nodes.
3745

3746
  """
3747
  # pylint: disable-msg=W0142
3748
  REQ_BGL = False
3749

    
3750
  def CheckArguments(self):
3751
    self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3752
                         self.op.output_fields, self.op.use_locking)
3753

    
3754
  def ExpandNames(self):
3755
    self.nq.ExpandNames(self)
3756

    
3757
  def Exec(self, feedback_fn):
3758
    return self.nq.OldStyleQuery(self)
3759

    
3760

    
3761
class LUNodeQueryvols(NoHooksLU):
3762
  """Logical unit for getting volumes on node(s).
3763

3764
  """
3765
  REQ_BGL = False
3766
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3767
  _FIELDS_STATIC = utils.FieldSet("node")
3768

    
3769
  def CheckArguments(self):
3770
    _CheckOutputFields(static=self._FIELDS_STATIC,
3771
                       dynamic=self._FIELDS_DYNAMIC,
3772
                       selected=self.op.output_fields)
3773

    
3774
  def ExpandNames(self):
3775
    self.needed_locks = {}
3776
    self.share_locks[locking.LEVEL_NODE] = 1
3777
    if not self.op.nodes:
3778
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3779
    else:
3780
      self.needed_locks[locking.LEVEL_NODE] = \
3781
        _GetWantedNodes(self, self.op.nodes)
3782

    
3783
  def Exec(self, feedback_fn):
3784
    """Computes the list of nodes and their attributes.
3785

3786
    """
3787
    nodenames = self.acquired_locks[locking.LEVEL_NODE]
3788
    volumes = self.rpc.call_node_volumes(nodenames)
3789

    
3790
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
3791
             in self.cfg.GetInstanceList()]
3792

    
3793
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3794

    
3795
    output = []
3796
    for node in nodenames:
3797
      nresult = volumes[node]
3798
      if nresult.offline:
3799
        continue
3800
      msg = nresult.fail_msg
3801
      if msg:
3802
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3803
        continue
3804

    
3805
      node_vols = nresult.payload[:]
3806
      node_vols.sort(key=lambda vol: vol['dev'])
3807

    
3808
      for vol in node_vols:
3809
        node_output = []
3810
        for field in self.op.output_fields:
3811
          if field == "node":
3812
            val = node
3813
          elif field == "phys":
3814
            val = vol['dev']
3815
          elif field == "vg":
3816
            val = vol['vg']
3817
          elif field == "name":
3818
            val = vol['name']
3819
          elif field == "size":
3820
            val = int(float(vol['size']))
3821
          elif field == "instance":
3822
            for inst in ilist:
3823
              if node not in lv_by_node[inst]:
3824
                continue
3825
              if vol['name'] in lv_by_node[inst][node]:
3826
                val = inst.name
3827
                break
3828
            else:
3829
              val = '-'
3830
          else:
3831
            raise errors.ParameterError(field)
3832
          node_output.append(str(val))
3833

    
3834
        output.append(node_output)
3835

    
3836
    return output
3837

    
3838

    
3839
class LUNodeQueryStorage(NoHooksLU):
3840
  """Logical unit for getting information on storage units on node(s).
3841

3842
  """
3843
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3844
  REQ_BGL = False
3845

    
3846
  def CheckArguments(self):
3847
    _CheckOutputFields(static=self._FIELDS_STATIC,
3848
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3849
                       selected=self.op.output_fields)
3850

    
3851
  def ExpandNames(self):
3852
    self.needed_locks = {}
3853
    self.share_locks[locking.LEVEL_NODE] = 1
3854

    
3855
    if self.op.nodes:
3856
      self.needed_locks[locking.LEVEL_NODE] = \
3857
        _GetWantedNodes(self, self.op.nodes)
3858
    else:
3859
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3860

    
3861
  def Exec(self, feedback_fn):
3862
    """Computes the list of nodes and their attributes.
3863

3864
    """
3865
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3866

    
3867
    # Always get name to sort by
3868
    if constants.SF_NAME in self.op.output_fields:
3869
      fields = self.op.output_fields[:]
3870
    else:
3871
      fields = [constants.SF_NAME] + self.op.output_fields
3872

    
3873
    # Never ask for node or type as it's only known to the LU
3874
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
3875
      while extra in fields:
3876
        fields.remove(extra)
3877

    
3878
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3879
    name_idx = field_idx[constants.SF_NAME]
3880

    
3881
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3882
    data = self.rpc.call_storage_list(self.nodes,
3883
                                      self.op.storage_type, st_args,
3884
                                      self.op.name, fields)
3885

    
3886
    result = []
3887

    
3888
    for node in utils.NiceSort(self.nodes):
3889
      nresult = data[node]
3890
      if nresult.offline:
3891
        continue
3892

    
3893
      msg = nresult.fail_msg
3894
      if msg:
3895
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3896
        continue
3897

    
3898
      rows = dict([(row[name_idx], row) for row in nresult.payload])
3899

    
3900
      for name in utils.NiceSort(rows.keys()):
3901
        row = rows[name]
3902

    
3903
        out = []
3904

    
3905
        for field in self.op.output_fields:
3906
          if field == constants.SF_NODE:
3907
            val = node
3908
          elif field == constants.SF_TYPE:
3909
            val = self.op.storage_type
3910
          elif field in field_idx:
3911
            val = row[field_idx[field]]
3912
          else:
3913
            raise errors.ParameterError(field)
3914

    
3915
          out.append(val)
3916

    
3917
        result.append(out)
3918

    
3919
    return result
3920

    
3921

    
3922
class _InstanceQuery(_QueryBase):
3923
  FIELDS = query.INSTANCE_FIELDS
3924

    
3925
  def ExpandNames(self, lu):
3926
    lu.needed_locks = {}
3927
    lu.share_locks[locking.LEVEL_INSTANCE] = 1
3928
    lu.share_locks[locking.LEVEL_NODE] = 1
3929

    
3930
    if self.names:
3931
      self.wanted = _GetWantedInstances(lu, self.names)
3932
    else:
3933
      self.wanted = locking.ALL_SET
3934

    
3935
    self.do_locking = (self.use_locking and
3936
                       query.IQ_LIVE in self.requested_data)
3937
    if self.do_locking:
3938
      lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3939
      lu.needed_locks[locking.LEVEL_NODE] = []
3940
      lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3941

    
3942
  def DeclareLocks(self, lu, level):
3943
    if level == locking.LEVEL_NODE and self.do_locking:
3944
      lu._LockInstancesNodes() # pylint: disable-msg=W0212
3945

    
3946
  def _GetQueryData(self, lu):
3947
    """Computes the list of instances and their attributes.
3948

3949
    """
3950
    cluster = lu.cfg.GetClusterInfo()
3951
    all_info = lu.cfg.GetAllInstancesInfo()
3952

    
3953
    instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
3954

    
3955
    instance_list = [all_info[name] for name in instance_names]
3956
    nodes = frozenset(itertools.chain(*(inst.all_nodes
3957
                                        for inst in instance_list)))
3958
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3959
    bad_nodes = []
3960
    offline_nodes = []
3961
    wrongnode_inst = set()
3962

    
3963
    # Gather data as requested
3964
    if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
3965
      live_data = {}
3966
      node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
3967
      for name in nodes:
3968
        result = node_data[name]
3969
        if result.offline:
3970
          # offline nodes will be in both lists
3971
          assert result.fail_msg
3972
          offline_nodes.append(name)
3973
        if result.fail_msg:
3974
          bad_nodes.append(name)
3975
        elif result.payload:
3976
          for inst in result.payload:
3977
            if all_info[inst].primary_node == name:
3978
              live_data.update(result.payload)
3979
            else:
3980
              wrongnode_inst.add(inst)
3981
        # else no instance is alive
3982
    else:
3983
      live_data = {}
3984

    
3985
    if query.IQ_DISKUSAGE in self.requested_data:
3986
      disk_usage = dict((inst.name,
3987
                         _ComputeDiskSize(inst.disk_template,
3988
                                          [{"size": disk.size}
3989
                                           for disk in inst.disks]))
3990
                        for inst in instance_list)
3991
    else:
3992
      disk_usage = None
3993

    
3994
    if query.IQ_CONSOLE in self.requested_data:
3995
      consinfo = {}
3996
      for inst in instance_list:
3997
        if inst.name in live_data:
3998
          # Instance is running
3999
          consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4000
        else:
4001
          consinfo[inst.name] = None
4002
      assert set(consinfo.keys()) == set(instance_names)
4003
    else:
4004
      consinfo = None
4005

    
4006
    return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4007
                                   disk_usage, offline_nodes, bad_nodes,
4008
                                   live_data, wrongnode_inst, consinfo)
4009

    
4010

    
4011
class LUQuery(NoHooksLU):
4012
  """Query for resources/items of a certain kind.
4013

4014
  """
4015
  # pylint: disable-msg=W0142
4016
  REQ_BGL = False
4017

    
4018
  def CheckArguments(self):
4019
    qcls = _GetQueryImplementation(self.op.what)
4020

    
4021
    self.impl = qcls(self.op.filter, self.op.fields, False)
4022

    
4023
  def ExpandNames(self):
4024
    self.impl.ExpandNames(self)
4025

    
4026
  def DeclareLocks(self, level):
4027
    self.impl.DeclareLocks(self, level)
4028

    
4029
  def Exec(self, feedback_fn):
4030
    return self.impl.NewStyleQuery(self)
4031

    
4032

    
4033
class LUQueryFields(NoHooksLU):
4034
  """Query for resources/items of a certain kind.
4035

4036
  """
4037
  # pylint: disable-msg=W0142
4038
  REQ_BGL = False
4039

    
4040
  def CheckArguments(self):
4041
    self.qcls = _GetQueryImplementation(self.op.what)
4042

    
4043
  def ExpandNames(self):
4044
    self.needed_locks = {}
4045

    
4046
  def Exec(self, feedback_fn):
4047
    return self.qcls.FieldsQuery(self.op.fields)
4048

    
4049

    
4050
class LUNodeModifyStorage(NoHooksLU):
4051
  """Logical unit for modifying a storage volume on a node.
4052

4053
  """
4054
  REQ_BGL = False
4055

    
4056
  def CheckArguments(self):
4057
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4058

    
4059
    storage_type = self.op.storage_type
4060

    
4061
    try:
4062
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4063
    except KeyError:
4064
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
4065
                                 " modified" % storage_type,
4066
                                 errors.ECODE_INVAL)
4067

    
4068
    diff = set(self.op.changes.keys()) - modifiable
4069
    if diff:
4070
      raise errors.OpPrereqError("The following fields can not be modified for"
4071
                                 " storage units of type '%s': %r" %
4072
                                 (storage_type, list(diff)),
4073
                                 errors.ECODE_INVAL)
4074

    
4075
  def ExpandNames(self):
4076
    self.needed_locks = {
4077
      locking.LEVEL_NODE: self.op.node_name,
4078
      }
4079

    
4080
  def Exec(self, feedback_fn):
4081
    """Computes the list of nodes and their attributes.
4082

4083
    """
4084
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4085
    result = self.rpc.call_storage_modify(self.op.node_name,
4086
                                          self.op.storage_type, st_args,
4087
                                          self.op.name, self.op.changes)
4088
    result.Raise("Failed to modify storage unit '%s' on %s" %
4089
                 (self.op.name, self.op.node_name))
4090

    
4091

    
4092
class LUNodeAdd(LogicalUnit):
4093
  """Logical unit for adding node to the cluster.
4094

4095
  """
4096
  HPATH = "node-add"
4097
  HTYPE = constants.HTYPE_NODE
4098
  _NFLAGS = ["master_capable", "vm_capable"]
4099

    
4100
  def CheckArguments(self):
4101
    self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4102
    # validate/normalize the node name
4103
    self.hostname = netutils.GetHostname(name=self.op.node_name,
4104
                                         family=self.primary_ip_family)
4105
    self.op.node_name = self.hostname.name
4106
    if self.op.readd and self.op.group:
4107
      raise errors.OpPrereqError("Cannot pass a node group when a node is"
4108
                                 " being readded", errors.ECODE_INVAL)
4109

    
4110
  def BuildHooksEnv(self):
4111
    """Build hooks env.
4112

4113
    This will run on all nodes before, and on all nodes + the new node after.
4114

4115
    """
4116
    env = {
4117
      "OP_TARGET": self.op.node_name,
4118
      "NODE_NAME": self.op.node_name,
4119
      "NODE_PIP": self.op.primary_ip,
4120
      "NODE_SIP": self.op.secondary_ip,
4121
      "MASTER_CAPABLE": str(self.op.master_capable),
4122
      "VM_CAPABLE": str(self.op.vm_capable),
4123
      }
4124
    nodes_0 = self.cfg.GetNodeList()
4125
    nodes_1 = nodes_0 + [self.op.node_name, ]
4126
    return env, nodes_0, nodes_1
4127

    
4128
  def CheckPrereq(self):
4129
    """Check prerequisites.
4130

4131
    This checks:
4132
     - the new node is not already in the config
4133
     - it is resolvable
4134
     - its parameters (single/dual homed) matches the cluster
4135

4136
    Any errors are signaled by raising errors.OpPrereqError.
4137

4138
    """
4139
    cfg = self.cfg
4140
    hostname = self.hostname
4141
    node = hostname.name
4142
    primary_ip = self.op.primary_ip = hostname.ip
4143
    if self.op.secondary_ip is None:
4144
      if self.primary_ip_family == netutils.IP6Address.family:
4145
        raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4146
                                   " IPv4 address must be given as secondary",
4147
                                   errors.ECODE_INVAL)
4148
      self.op.secondary_ip = primary_ip
4149

    
4150
    secondary_ip = self.op.secondary_ip
4151
    if not netutils.IP4Address.IsValid(secondary_ip):
4152
      raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4153
                                 " address" % secondary_ip, errors.ECODE_INVAL)
4154

    
4155
    node_list = cfg.GetNodeList()
4156
    if not self.op.readd and node in node_list:
4157
      raise errors.OpPrereqError("Node %s is already in the configuration" %
4158
                                 node, errors.ECODE_EXISTS)
4159
    elif self.op.readd and node not in node_list:
4160
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4161
                                 errors.ECODE_NOENT)
4162

    
4163
    self.changed_primary_ip = False
4164

    
4165
    for existing_node_name in node_list:
4166
      existing_node = cfg.GetNodeInfo(existing_node_name)
4167

    
4168
      if self.op.readd and node == existing_node_name:
4169
        if existing_node.secondary_ip != secondary_ip:
4170
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
4171
                                     " address configuration as before",
4172
                                     errors.ECODE_INVAL)
4173
        if existing_node.primary_ip != primary_ip:
4174
          self.changed_primary_ip = True
4175

    
4176
        continue
4177

    
4178
      if (existing_node.primary_ip == primary_ip or
4179
          existing_node.secondary_ip == primary_ip or
4180
          existing_node.primary_ip == secondary_ip or
4181
          existing_node.secondary_ip == secondary_ip):
4182
        raise errors.OpPrereqError("New node ip address(es) conflict with"
4183
                                   " existing node %s" % existing_node.name,
4184
                                   errors.ECODE_NOTUNIQUE)
4185

    
4186
    # After this 'if' block, None is no longer a valid value for the
4187
    # _capable op attributes
4188
    if self.op.readd:
4189
      old_node = self.cfg.GetNodeInfo(node)
4190
      assert old_node is not None, "Can't retrieve locked node %s" % node
4191
      for attr in self._NFLAGS:
4192
        if getattr(self.op, attr) is None:
4193
          setattr(self.op, attr, getattr(old_node, attr))
4194
    else:
4195
      for attr in self._NFLAGS:
4196
        if getattr(self.op, attr) is None:
4197
          setattr(self.op, attr, True)
4198

    
4199
    if self.op.readd and not self.op.vm_capable:
4200
      pri, sec = cfg.GetNodeInstances(node)
4201
      if pri or sec:
4202
        raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4203
                                   " flag set to false, but it already holds"
4204
                                   " instances" % node,
4205
                                   errors.ECODE_STATE)
4206

    
4207
    # check that the type of the node (single versus dual homed) is the
4208
    # same as for the master
4209
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4210
    master_singlehomed = myself.secondary_ip == myself.primary_ip
4211
    newbie_singlehomed = secondary_ip == primary_ip
4212
    if master_singlehomed != newbie_singlehomed:
4213
      if master_singlehomed:
4214
        raise errors.OpPrereqError("The master has no secondary ip but the"
4215
                                   " new node has one",
4216
                                   errors.ECODE_INVAL)
4217
      else:
4218
        raise errors.OpPrereqError("The master has a secondary ip but the"
4219
                                   " new node doesn't have one",
4220
                                   errors.ECODE_INVAL)
4221

    
4222
    # checks reachability
4223
    if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4224
      raise errors.OpPrereqError("Node not reachable by ping",
4225
                                 errors.ECODE_ENVIRON)
4226

    
4227
    if not newbie_singlehomed:
4228
      # check reachability from my secondary ip to newbie's secondary ip
4229
      if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4230
                           source=myself.secondary_ip):
4231
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4232
                                   " based ping to node daemon port",
4233
                                   errors.ECODE_ENVIRON)
4234

    
4235
    if self.op.readd:
4236
      exceptions = [node]
4237
    else:
4238
      exceptions = []
4239

    
4240
    if self.op.master_capable:
4241
      self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4242
    else:
4243
      self.master_candidate = False
4244

    
4245
    if self.op.readd:
4246
      self.new_node = old_node
4247
    else:
4248
      node_group = cfg.LookupNodeGroup(self.op.group)
4249
      self.new_node = objects.Node(name=node,
4250
                                   primary_ip=primary_ip,
4251
                                   secondary_ip=secondary_ip,
4252
                                   master_candidate=self.master_candidate,
4253
                                   offline=False, drained=False,
4254
                                   group=node_group)
4255

    
4256
    if self.op.ndparams:
4257
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4258

    
4259
  def Exec(self, feedback_fn):
4260
    """Adds the new node to the cluster.
4261

4262
    """
4263
    new_node = self.new_node
4264
    node = new_node.name
4265

    
4266
    # We adding a new node so we assume it's powered
4267
    new_node.powered = True
4268

    
4269
    # for re-adds, reset the offline/drained/master-candidate flags;
4270
    # we need to reset here, otherwise offline would prevent RPC calls
4271
    # later in the procedure; this also means that if the re-add
4272
    # fails, we are left with a non-offlined, broken node
4273
    if self.op.readd:
4274
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4275
      self.LogInfo("Readding a node, the offline/drained flags were reset")
4276
      # if we demote the node, we do cleanup later in the procedure
4277
      new_node.master_candidate = self.master_candidate
4278
      if self.changed_primary_ip:
4279
        new_node.primary_ip = self.op.primary_ip
4280

    
4281
    # copy the master/vm_capable flags
4282
    for attr in self._NFLAGS:
4283
      setattr(new_node, attr, getattr(self.op, attr))
4284

    
4285
    # notify the user about any possible mc promotion
4286
    if new_node.master_candidate:
4287
      self.LogInfo("Node will be a master candidate")
4288

    
4289
    if self.op.ndparams:
4290
      new_node.ndparams = self.op.ndparams
4291
    else:
4292
      new_node.ndparams = {}
4293

    
4294
    # check connectivity
4295
    result = self.rpc.call_version([node])[node]
4296
    result.Raise("Can't get version information from node %s" % node)
4297
    if constants.PROTOCOL_VERSION == result.payload:
4298
      logging.info("Communication to node %s fine, sw version %s match",
4299
                   node, result.payload)
4300
    else:
4301
      raise errors.OpExecError("Version mismatch master version %s,"
4302
                               " node version %s" %
4303
                               (constants.PROTOCOL_VERSION, result.payload))
4304

    
4305
    # Add node to our /etc/hosts, and add key to known_hosts
4306
    if self.cfg.GetClusterInfo().modify_etc_hosts:
4307
      master_node = self.cfg.GetMasterNode()
4308
      result = self.rpc.call_etc_hosts_modify(master_node,
4309
                                              constants.ETC_HOSTS_ADD,
4310
                                              self.hostname.name,
4311
                                              self.hostname.ip)
4312
      result.Raise("Can't update hosts file with new host data")
4313

    
4314
    if new_node.secondary_ip != new_node.primary_ip:
4315
      _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4316
                               False)
4317

    
4318
    node_verify_list = [self.cfg.GetMasterNode()]
4319
    node_verify_param = {
4320
      constants.NV_NODELIST: [node],
4321
      # TODO: do a node-net-test as well?
4322
    }
4323

    
4324
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4325
                                       self.cfg.GetClusterName())
4326
    for verifier in node_verify_list:
4327
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
4328
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
4329
      if nl_payload:
4330
        for failed in nl_payload:
4331
          feedback_fn("ssh/hostname verification failed"
4332
                      " (checking from %s): %s" %
4333
                      (verifier, nl_payload[failed]))
4334
        raise errors.OpExecError("ssh/hostname verification failed.")
4335

    
4336
    if self.op.readd:
4337
      _RedistributeAncillaryFiles(self)
4338
      self.context.ReaddNode(new_node)
4339
      # make sure we redistribute the config
4340
      self.cfg.Update(new_node, feedback_fn)
4341
      # and make sure the new node will not have old files around
4342
      if not new_node.master_candidate:
4343
        result = self.rpc.call_node_demote_from_mc(new_node.name)
4344
        msg = result.fail_msg
4345
        if msg:
4346
          self.LogWarning("Node failed to demote itself from master"
4347
                          " candidate status: %s" % msg)
4348
    else:
4349
      _RedistributeAncillaryFiles(self, additional_nodes=[node],
4350
                                  additional_vm=self.op.vm_capable)
4351
      self.context.AddNode(new_node, self.proc.GetECId())
4352

    
4353

    
4354
class LUNodeSetParams(LogicalUnit):
4355
  """Modifies the parameters of a node.
4356

4357
  @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4358
      to the node role (as _ROLE_*)
4359
  @cvar _R2F: a dictionary from node role to tuples of flags
4360
  @cvar _FLAGS: a list of attribute names corresponding to the flags
4361

4362
  """
4363
  HPATH = "node-modify"
4364
  HTYPE = constants.HTYPE_NODE
4365
  REQ_BGL = False
4366
  (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4367
  _F2R = {
4368
    (True, False, False): _ROLE_CANDIDATE,
4369
    (False, True, False): _ROLE_DRAINED,
4370
    (False, False, True): _ROLE_OFFLINE,
4371
    (False, False, False): _ROLE_REGULAR,
4372
    }
4373
  _R2F = dict((v, k) for k, v in _F2R.items())
4374
  _FLAGS = ["master_candidate", "drained", "offline"]
4375

    
4376
  def CheckArguments(self):
4377
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4378
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4379
                self.op.master_capable, self.op.vm_capable,
4380
                self.op.secondary_ip, self.op.ndparams]
4381
    if all_mods.count(None) == len(all_mods):
4382
      raise errors.OpPrereqError("Please pass at least one modification",
4383
                                 errors.ECODE_INVAL)
4384
    if all_mods.count(True) > 1:
4385
      raise errors.OpPrereqError("Can't set the node into more than one"
4386
                                 " state at the same time",
4387
                                 errors.ECODE_INVAL)
4388

    
4389
    # Boolean value that tells us whether we might be demoting from MC
4390
    self.might_demote = (self.op.master_candidate == False or
4391
                         self.op.offline == True or
4392
                         self.op.drained == True or
4393
                         self.op.master_capable == False)
4394

    
4395
    if self.op.secondary_ip:
4396
      if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4397
        raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4398
                                   " address" % self.op.secondary_ip,
4399
                                   errors.ECODE_INVAL)
4400

    
4401
    self.lock_all = self.op.auto_promote and self.might_demote
4402
    self.lock_instances = self.op.secondary_ip is not None
4403

    
4404
  def ExpandNames(self):
4405
    if self.lock_all:
4406
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4407
    else:
4408
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4409

    
4410
    if self.lock_instances:
4411
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4412

    
4413
  def DeclareLocks(self, level):
4414
    # If we have locked all instances, before waiting to lock nodes, release
4415
    # all the ones living on nodes unrelated to the current operation.
4416
    if level == locking.LEVEL_NODE and self.lock_instances:
4417
      instances_release = []
4418
      instances_keep = []
4419
      self.affected_instances = []
4420
      if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4421
        for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4422
          instance = self.context.cfg.GetInstanceInfo(instance_name)
4423
          i_mirrored = instance.disk_template in constants.DTS_INT_MIRROR
4424
          if i_mirrored and self.op.node_name in instance.all_nodes:
4425
            instances_keep.append(instance_name)
4426
            self.affected_instances.append(instance)
4427
          else:
4428
            instances_release.append(instance_name)
4429
        if instances_release:
4430
          self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4431
          self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4432

    
4433
  def BuildHooksEnv(self):
4434
    """Build hooks env.
4435

4436
    This runs on the master node.
4437

4438
    """
4439
    env = {
4440
      "OP_TARGET": self.op.node_name,
4441
      "MASTER_CANDIDATE": str(self.op.master_candidate),
4442
      "OFFLINE": str(self.op.offline),
4443
      "DRAINED": str(self.op.drained),
4444
      "MASTER_CAPABLE": str(self.op.master_capable),
4445
      "VM_CAPABLE": str(self.op.vm_capable),
4446
      }
4447
    nl = [self.cfg.GetMasterNode(),
4448
          self.op.node_name]
4449
    return env, nl, nl
4450

    
4451
  def CheckPrereq(self):
4452
    """Check prerequisites.
4453

4454
    This only checks the instance list against the existing names.
4455

4456
    """
4457
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4458

    
4459
    if (self.op.master_candidate is not None or
4460
        self.op.drained is not None or
4461
        self.op.offline is not None):
4462
      # we can't change the master's node flags
4463
      if self.op.node_name == self.cfg.GetMasterNode():
4464
        raise errors.OpPrereqError("The master role can be changed"
4465
                                   " only via master-failover",
4466
                                   errors.ECODE_INVAL)
4467

    
4468
    if self.op.master_candidate and not node.master_capable:
4469
      raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4470
                                 " it a master candidate" % node.name,
4471
                                 errors.ECODE_STATE)
4472

    
4473
    if self.op.vm_capable == False:
4474
      (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4475
      if ipri or isec:
4476
        raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4477
                                   " the vm_capable flag" % node.name,
4478
                                   errors.ECODE_STATE)
4479

    
4480
    if node.master_candidate and self.might_demote and not self.lock_all:
4481
      assert not self.op.auto_promote, "auto_promote set but lock_all not"
4482
      # check if after removing the current node, we're missing master
4483
      # candidates
4484
      (mc_remaining, mc_should, _) = \
4485
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4486
      if mc_remaining < mc_should:
4487
        raise errors.OpPrereqError("Not enough master candidates, please"
4488
                                   " pass auto promote option to allow"
4489
                                   " promotion", errors.ECODE_STATE)
4490

    
4491
    self.old_flags = old_flags = (node.master_candidate,
4492
                                  node.drained, node.offline)
4493
    assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4494
    self.old_role = old_role = self._F2R[old_flags]
4495

    
4496
    # Check for ineffective changes
4497
    for attr in self._FLAGS:
4498
      if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4499
        self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4500
        setattr(self.op, attr, None)
4501

    
4502
    # Past this point, any flag change to False means a transition
4503
    # away from the respective state, as only real changes are kept
4504

    
4505
    # TODO: We might query the real power state if it supports OOB
4506
    if _SupportsOob(self.cfg, node):
4507
      if self.op.offline is False and not (node.powered or
4508
                                           self.op.powered == True):
4509
        raise errors.OpPrereqError(("Please power on node %s first before you"
4510
                                    " can reset offline state") %
4511
                                   self.op.node_name)
4512
    elif self.op.powered is not None:
4513
      raise errors.OpPrereqError(("Unable to change powered state for node %s"
4514
                                  " which does not support out-of-band"
4515
                                  " handling") % self.op.node_name)
4516

    
4517
    # If we're being deofflined/drained, we'll MC ourself if needed
4518
    if (self.op.drained == False or self.op.offline == False or
4519
        (self.op.master_capable and not node.master_capable)):
4520
      if _DecideSelfPromotion(self):
4521
        self.op.master_candidate = True
4522
        self.LogInfo("Auto-promoting node to master candidate")
4523

    
4524
    # If we're no longer master capable, we'll demote ourselves from MC
4525
    if self.op.master_capable == False and node.master_candidate:
4526
      self.LogInfo("Demoting from master candidate")
4527
      self.op.master_candidate = False
4528

    
4529
    # Compute new role
4530
    assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4531
    if self.op.master_candidate:
4532
      new_role = self._ROLE_CANDIDATE
4533
    elif self.op.drained:
4534
      new_role = self._ROLE_DRAINED
4535
    elif self.op.offline:
4536
      new_role = self._ROLE_OFFLINE
4537
    elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4538
      # False is still in new flags, which means we're un-setting (the
4539
      # only) True flag
4540
      new_role = self._ROLE_REGULAR
4541
    else: # no new flags, nothing, keep old role
4542
      new_role = old_role
4543

    
4544
    self.new_role = new_role
4545

    
4546
    if old_role == self._ROLE_OFFLINE and new_role != old_role:
4547
      # Trying to transition out of offline status
4548
      result = self.rpc.call_version([node.name])[node.name]
4549
      if result.fail_msg:
4550
        raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4551
                                   " to report its version: %s" %
4552
                                   (node.name, result.fail_msg),
4553
                                   errors.ECODE_STATE)
4554
      else:
4555
        self.LogWarning("Transitioning node from offline to online state"
4556
                        " without using re-add. Please make sure the node"
4557
                        " is healthy!")
4558

    
4559
    if self.op.secondary_ip:
4560
      # Ok even without locking, because this can't be changed by any LU
4561
      master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4562
      master_singlehomed = master.secondary_ip == master.primary_ip
4563
      if master_singlehomed and self.op.secondary_ip:
4564
        raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4565
                                   " homed cluster", errors.ECODE_INVAL)
4566

    
4567
      if node.offline:
4568
        if self.affected_instances:
4569
          raise errors.OpPrereqError("Cannot change secondary ip: offline"
4570
                                     " node has instances (%s) configured"
4571
                                     " to use it" % self.affected_instances)
4572
      else:
4573
        # On online nodes, check that no instances are running, and that
4574
        # the node has the new ip and we can reach it.
4575
        for instance in self.affected_instances:
4576
          _CheckInstanceDown(self, instance, "cannot change secondary ip")
4577

    
4578
        _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4579
        if master.name != node.name:
4580
          # check reachability from master secondary ip to new secondary ip
4581
          if not netutils.TcpPing(self.op.secondary_ip,
4582
                                  constants.DEFAULT_NODED_PORT,
4583
                                  source=master.secondary_ip):
4584
            raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4585
                                       " based ping to node daemon port",
4586
                                       errors.ECODE_ENVIRON)
4587

    
4588
    if self.op.ndparams:
4589
      new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4590
      utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4591
      self.new_ndparams = new_ndparams
4592

    
4593
  def Exec(self, feedback_fn):
4594
    """Modifies a node.
4595

4596
    """
4597
    node = self.node
4598
    old_role = self.old_role
4599
    new_role = self.new_role
4600

    
4601
    result = []
4602

    
4603
    if self.op.ndparams:
4604
      node.ndparams = self.new_ndparams
4605

    
4606
    if self.op.powered is not None:
4607
      node.powered = self.op.powered
4608

    
4609
    for attr in ["master_capable", "vm_capable"]:
4610
      val = getattr(self.op, attr)
4611
      if val is not None:
4612
        setattr(node, attr, val)
4613
        result.append((attr, str(val)))
4614

    
4615
    if new_role != old_role:
4616
      # Tell the node to demote itself, if no longer MC and not offline
4617
      if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4618
        msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4619
        if msg:
4620
          self.LogWarning("Node failed to demote itself: %s", msg)
4621

    
4622
      new_flags = self._R2F[new_role]
4623
      for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4624
        if of != nf:
4625
          result.append((desc, str(nf)))
4626
      (node.master_candidate, node.drained, node.offline) = new_flags
4627

    
4628
      # we locked all nodes, we adjust the CP before updating this node
4629
      if self.lock_all:
4630
        _AdjustCandidatePool(self, [node.name])
4631

    
4632
    if self.op.secondary_ip:
4633
      node.secondary_ip = self.op.secondary_ip
4634
      result.append(("secondary_ip", self.op.secondary_ip))
4635

    
4636
    # this will trigger configuration file update, if needed
4637
    self.cfg.Update(node, feedback_fn)
4638

    
4639
    # this will trigger job queue propagation or cleanup if the mc
4640
    # flag changed
4641
    if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4642
      self.context.ReaddNode(node)
4643

    
4644
    return result
4645

    
4646

    
4647
class LUNodePowercycle(NoHooksLU):
4648
  """Powercycles a node.
4649

4650
  """
4651
  REQ_BGL = False
4652

    
4653
  def CheckArguments(self):
4654
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4655
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4656
      raise errors.OpPrereqError("The node is the master and the force"
4657
                                 " parameter was not set",
4658
                                 errors.ECODE_INVAL)
4659

    
4660
  def ExpandNames(self):
4661
    """Locking for PowercycleNode.
4662

4663
    This is a last-resort option and shouldn't block on other
4664
    jobs. Therefore, we grab no locks.
4665

4666
    """
4667
    self.needed_locks = {}
4668

    
4669
  def Exec(self, feedback_fn):
4670
    """Reboots a node.
4671

4672
    """
4673
    result = self.rpc.call_node_powercycle(self.op.node_name,
4674
                                           self.cfg.GetHypervisorType())
4675
    result.Raise("Failed to schedule the reboot")
4676
    return result.payload
4677

    
4678

    
4679
class LUClusterQuery(NoHooksLU):
4680
  """Query cluster configuration.
4681

4682
  """
4683
  REQ_BGL = False
4684

    
4685
  def ExpandNames(self):
4686
    self.needed_locks = {}
4687

    
4688
  def Exec(self, feedback_fn):
4689
    """Return cluster config.
4690

4691
    """
4692
    cluster = self.cfg.GetClusterInfo()
4693
    os_hvp = {}
4694

    
4695
    # Filter just for enabled hypervisors
4696
    for os_name, hv_dict in cluster.os_hvp.items():
4697
      os_hvp[os_name] = {}
4698
      for hv_name, hv_params in hv_dict.items():
4699
        if hv_name in cluster.enabled_hypervisors:
4700
          os_hvp[os_name][hv_name] = hv_params
4701

    
4702
    # Convert ip_family to ip_version
4703
    primary_ip_version = constants.IP4_VERSION
4704
    if cluster.primary_ip_family == netutils.IP6Address.family:
4705
      primary_ip_version = constants.IP6_VERSION
4706

    
4707
    result = {
4708
      "software_version": constants.RELEASE_VERSION,
4709
      "protocol_version": constants.PROTOCOL_VERSION,
4710
      "config_version": constants.CONFIG_VERSION,
4711
      "os_api_version": max(constants.OS_API_VERSIONS),
4712
      "export_version": constants.EXPORT_VERSION,
4713
      "architecture": (platform.architecture()[0], platform.machine()),
4714
      "name": cluster.cluster_name,
4715
      "master": cluster.master_node,
4716
      "default_hypervisor": cluster.enabled_hypervisors[0],
4717
      "enabled_hypervisors": cluster.enabled_hypervisors,
4718
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4719
                        for hypervisor_name in cluster.enabled_hypervisors]),
4720
      "os_hvp": os_hvp,
4721
      "beparams": cluster.beparams,
4722
      "osparams": cluster.osparams,
4723
      "nicparams": cluster.nicparams,
4724
      "ndparams": cluster.ndparams,
4725
      "candidate_pool_size": cluster.candidate_pool_size,
4726
      "master_netdev": cluster.master_netdev,
4727
      "volume_group_name": cluster.volume_group_name,
4728
      "drbd_usermode_helper": cluster.drbd_usermode_helper,
4729
      "file_storage_dir": cluster.file_storage_dir,
4730
      "shared_file_storage_dir": cluster.shared_file_storage_dir,
4731
      "maintain_node_health": cluster.maintain_node_health,
4732
      "ctime": cluster.ctime,
4733
      "mtime": cluster.mtime,
4734
      "uuid": cluster.uuid,
4735
      "tags": list(cluster.GetTags()),
4736
      "uid_pool": cluster.uid_pool,
4737
      "default_iallocator": cluster.default_iallocator,
4738
      "reserved_lvs": cluster.reserved_lvs,
4739
      "primary_ip_version": primary_ip_version,
4740
      "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4741
      "hidden_os": cluster.hidden_os,
4742
      "blacklisted_os": cluster.blacklisted_os,
4743
      }
4744

    
4745
    return result
4746

    
4747

    
4748
class LUClusterConfigQuery(NoHooksLU):
4749
  """Return configuration values.
4750

4751
  """
4752
  REQ_BGL = False
4753
  _FIELDS_DYNAMIC = utils.FieldSet()
4754
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4755
                                  "watcher_pause", "volume_group_name")
4756

    
4757
  def CheckArguments(self):
4758
    _CheckOutputFields(static=self._FIELDS_STATIC,
4759
                       dynamic=self._FIELDS_DYNAMIC,
4760
                       selected=self.op.output_fields)
4761

    
4762
  def ExpandNames(self):
4763
    self.needed_locks = {}
4764

    
4765
  def Exec(self, feedback_fn):
4766
    """Dump a representation of the cluster config to the standard output.
4767

4768
    """
4769
    values = []
4770
    for field in self.op.output_fields:
4771
      if field == "cluster_name":
4772
        entry = self.cfg.GetClusterName()
4773
      elif field == "master_node":
4774
        entry = self.cfg.GetMasterNode()
4775
      elif field == "drain_flag":
4776
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4777
      elif field == "watcher_pause":
4778
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4779
      elif field == "volume_group_name":
4780
        entry = self.cfg.GetVGName()
4781
      else:
4782
        raise errors.ParameterError(field)
4783
      values.append(entry)
4784
    return values
4785

    
4786

    
4787
class LUInstanceActivateDisks(NoHooksLU):
4788
  """Bring up an instance's disks.
4789

4790
  """
4791
  REQ_BGL = False
4792

    
4793
  def ExpandNames(self):
4794
    self._ExpandAndLockInstance()
4795
    self.needed_locks[locking.LEVEL_NODE] = []
4796
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4797

    
4798
  def DeclareLocks(self, level):
4799
    if level == locking.LEVEL_NODE:
4800
      self._LockInstancesNodes()
4801

    
4802
  def CheckPrereq(self):
4803
    """Check prerequisites.
4804

4805
    This checks that the instance is in the cluster.
4806

4807
    """
4808
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4809
    assert self.instance is not None, \
4810
      "Cannot retrieve locked instance %s" % self.op.instance_name
4811
    _CheckNodeOnline(self, self.instance.primary_node)
4812

    
4813
  def Exec(self, feedback_fn):
4814
    """Activate the disks.
4815

4816
    """
4817
    disks_ok, disks_info = \
4818
              _AssembleInstanceDisks(self, self.instance,
4819
                                     ignore_size=self.op.ignore_size)
4820
    if not disks_ok:
4821
      raise errors.OpExecError("Cannot activate block devices")
4822

    
4823
    return disks_info
4824

    
4825

    
4826
def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4827
                           ignore_size=False):
4828
  """Prepare the block devices for an instance.
4829

4830
  This sets up the block devices on all nodes.
4831

4832
  @type lu: L{LogicalUnit}
4833
  @param lu: the logical unit on whose behalf we execute
4834
  @type instance: L{objects.Instance}
4835
  @param instance: the instance for whose disks we assemble
4836
  @type disks: list of L{objects.Disk} or None
4837
  @param disks: which disks to assemble (or all, if None)
4838
  @type ignore_secondaries: boolean
4839
  @param ignore_secondaries: if true, errors on secondary nodes
4840
      won't result in an error return from the function
4841
  @type ignore_size: boolean
4842
  @param ignore_size: if true, the current known size of the disk
4843
      will not be used during the disk activation, useful for cases
4844
      when the size is wrong
4845
  @return: False if the operation failed, otherwise a list of
4846
      (host, instance_visible_name, node_visible_name)
4847
      with the mapping from node devices to instance devices
4848

4849
  """
4850
  device_info = []
4851
  disks_ok = True
4852
  iname = instance.name
4853
  disks = _ExpandCheckDisks(instance, disks)
4854

    
4855
  # With the two passes mechanism we try to reduce the window of
4856
  # opportunity for the race condition of switching DRBD to primary
4857
  # before handshaking occured, but we do not eliminate it
4858

    
4859
  # The proper fix would be to wait (with some limits) until the
4860
  # connection has been made and drbd transitions from WFConnection
4861
  # into any other network-connected state (Connected, SyncTarget,
4862
  # SyncSource, etc.)
4863

    
4864
  # 1st pass, assemble on all nodes in secondary mode
4865
  for idx, inst_disk in enumerate(disks):
4866
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4867
      if ignore_size:
4868
        node_disk = node_disk.Copy()
4869
        node_disk.UnsetSize()
4870
      lu.cfg.SetDiskID(node_disk, node)
4871
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
4872
      msg = result.fail_msg
4873
      if msg:
4874
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4875
                           " (is_primary=False, pass=1): %s",
4876
                           inst_disk.iv_name, node, msg)
4877
        if not ignore_secondaries:
4878
          disks_ok = False
4879

    
4880
  # FIXME: race condition on drbd migration to primary
4881

    
4882
  # 2nd pass, do only the primary node
4883
  for idx, inst_disk in enumerate(disks):
4884
    dev_path = None
4885

    
4886
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4887
      if node != instance.primary_node:
4888
        continue
4889
      if ignore_size:
4890
        node_disk = node_disk.Copy()
4891
        node_disk.UnsetSize()
4892
      lu.cfg.SetDiskID(node_disk, node)
4893
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
4894
      msg = result.fail_msg
4895
      if msg:
4896
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
4897
                           " (is_primary=True, pass=2): %s",
4898
                           inst_disk.iv_name, node, msg)
4899
        disks_ok = False
4900
      else:
4901
        dev_path = result.payload
4902

    
4903
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4904

    
4905
  # leave the disks configured for the primary node
4906
  # this is a workaround that would be fixed better by
4907
  # improving the logical/physical id handling
4908
  for disk in disks:
4909
    lu.cfg.SetDiskID(disk, instance.primary_node)
4910

    
4911
  return disks_ok, device_info
4912

    
4913

    
4914
def _StartInstanceDisks(lu, instance, force):
4915
  """Start the disks of an instance.
4916

4917
  """
4918
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4919
                                           ignore_secondaries=force)
4920
  if not disks_ok:
4921
    _ShutdownInstanceDisks(lu, instance)
4922
    if force is not None and not force:
4923
      lu.proc.LogWarning("", hint="If the message above refers to a"
4924
                         " secondary node,"
4925
                         " you can retry the operation using '--force'.")
4926
    raise errors.OpExecError("Disk consistency error")
4927

    
4928

    
4929
class LUInstanceDeactivateDisks(NoHooksLU):
4930
  """Shutdown an instance's disks.
4931

4932
  """
4933
  REQ_BGL = False
4934

    
4935
  def ExpandNames(self):
4936
    self._ExpandAndLockInstance()
4937
    self.needed_locks[locking.LEVEL_NODE] = []
4938
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4939

    
4940
  def DeclareLocks(self, level):
4941
    if level == locking.LEVEL_NODE:
4942
      self._LockInstancesNodes()
4943

    
4944
  def CheckPrereq(self):
4945
    """Check prerequisites.
4946

4947
    This checks that the instance is in the cluster.
4948

4949
    """
4950
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4951
    assert self.instance is not None, \
4952
      "Cannot retrieve locked instance %s" % self.op.instance_name
4953

    
4954
  def Exec(self, feedback_fn):
4955
    """Deactivate the disks
4956

4957
    """
4958
    instance = self.instance
4959
    if self.op.force:
4960
      _ShutdownInstanceDisks(self, instance)
4961
    else:
4962
      _SafeShutdownInstanceDisks(self, instance)
4963

    
4964

    
4965
def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4966
  """Shutdown block devices of an instance.
4967

4968
  This function checks if an instance is running, before calling
4969
  _ShutdownInstanceDisks.
4970

4971
  """
4972
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4973
  _ShutdownInstanceDisks(lu, instance, disks=disks)
4974

    
4975

    
4976
def _ExpandCheckDisks(instance, disks):
4977
  """Return the instance disks selected by the disks list
4978

4979
  @type disks: list of L{objects.Disk} or None
4980
  @param disks: selected disks
4981
  @rtype: list of L{objects.Disk}
4982
  @return: selected instance disks to act on
4983

4984
  """
4985
  if disks is None:
4986
    return instance.disks
4987
  else:
4988
    if not set(disks).issubset(instance.disks):
4989
      raise errors.ProgrammerError("Can only act on disks belonging to the"
4990
                                   " target instance")
4991
    return disks
4992

    
4993

    
4994
def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4995
  """Shutdown block devices of an instance.
4996

4997
  This does the shutdown on all nodes of the instance.
4998

4999
  If the ignore_primary is false, errors on the primary node are
5000
  ignored.
5001

5002
  """
5003
  all_result = True
5004
  disks = _ExpandCheckDisks(instance, disks)
5005

    
5006
  for disk in disks:
5007
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5008
      lu.cfg.SetDiskID(top_disk, node)
5009
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5010
      msg = result.fail_msg
5011
      if msg:
5012
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5013
                      disk.iv_name, node, msg)
5014
        if ((node == instance.primary_node and not ignore_primary) or
5015
            (node != instance.primary_node and not result.offline)):
5016
          all_result = False
5017
  return all_result
5018

    
5019

    
5020
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5021
  """Checks if a node has enough free memory.
5022

5023
  This function check if a given node has the needed amount of free
5024
  memory. In case the node has less memory or we cannot get the
5025
  information from the node, this function raise an OpPrereqError
5026
  exception.
5027

5028
  @type lu: C{LogicalUnit}
5029
  @param lu: a logical unit from which we get configuration data
5030
  @type node: C{str}
5031
  @param node: the node to check
5032
  @type reason: C{str}
5033
  @param reason: string to use in the error message
5034
  @type requested: C{int}
5035
  @param requested: the amount of memory in MiB to check for
5036
  @type hypervisor_name: C{str}
5037
  @param hypervisor_name: the hypervisor to ask for memory stats
5038
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5039
      we cannot check the node
5040

5041
  """
5042
  nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5043
  nodeinfo[node].Raise("Can't get data from node %s" % node,
5044
                       prereq=True, ecode=errors.ECODE_ENVIRON)
5045
  free_mem = nodeinfo[node].payload.get('memory_free', None)
5046
  if not isinstance(free_mem, int):
5047
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5048
                               " was '%s'" % (node, free_mem),
5049
                               errors.ECODE_ENVIRON)
5050
  if requested > free_mem:
5051
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5052
                               " needed %s MiB, available %s MiB" %
5053
                               (node, reason, requested, free_mem),
5054
                               errors.ECODE_NORES)
5055

    
5056

    
5057
def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5058
  """Checks if nodes have enough free disk space in the all VGs.
5059

5060
  This function check if all given nodes have the needed amount of
5061
  free disk. In case any node has less disk or we cannot get the
5062
  information from the node, this function raise an OpPrereqError
5063
  exception.
5064

5065
  @type lu: C{LogicalUnit}
5066
  @param lu: a logical unit from which we get configuration data
5067
  @type nodenames: C{list}
5068
  @param nodenames: the list of node names to check
5069
  @type req_sizes: C{dict}
5070
  @param req_sizes: the hash of vg and corresponding amount of disk in
5071
      MiB to check for
5072
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5073
      or we cannot check the node
5074

5075
  """
5076
  for vg, req_size in req_sizes.items():
5077
    _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5078

    
5079

    
5080
def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5081
  """Checks if nodes have enough free disk space in the specified VG.
5082

5083
  This function check if all given nodes have the needed amount of
5084
  free disk. In case any node has less disk or we cannot get the
5085
  information from the node, this function raise an OpPrereqError
5086
  exception.
5087

5088
  @type lu: C{LogicalUnit}
5089
  @param lu: a logical unit from which we get configuration data
5090
  @type nodenames: C{list}
5091
  @param nodenames: the list of node names to check
5092
  @type vg: C{str}
5093
  @param vg: the volume group to check
5094
  @type requested: C{int}
5095
  @param requested: the amount of disk in MiB to check for
5096
  @raise errors.OpPrereqError: if the node doesn't have enough disk,
5097
      or we cannot check the node
5098

5099
  """
5100
  nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5101
  for node in nodenames:
5102
    info = nodeinfo[node]
5103
    info.Raise("Cannot get current information from node %s" % node,
5104
               prereq=True, ecode=errors.ECODE_ENVIRON)
5105
    vg_free = info.payload.get("vg_free", None)
5106
    if not isinstance(vg_free, int):
5107
      raise errors.OpPrereqError("Can't compute free disk space on node"
5108
                                 " %s for vg %s, result was '%s'" %
5109
                                 (node, vg, vg_free), errors.ECODE_ENVIRON)
5110
    if requested > vg_free:
5111
      raise errors.OpPrereqError("Not enough disk space on target node %s"
5112
                                 " vg %s: required %d MiB, available %d MiB" %
5113
                                 (node, vg, requested, vg_free),
5114
                                 errors.ECODE_NORES)
5115

    
5116

    
5117
class LUInstanceStartup(LogicalUnit):
5118
  """Starts an instance.
5119

5120
  """
5121
  HPATH = "instance-start"
5122
  HTYPE = constants.HTYPE_INSTANCE
5123
  REQ_BGL = False
5124

    
5125
  def CheckArguments(self):
5126
    # extra beparams
5127
    if self.op.beparams:
5128
      # fill the beparams dict
5129
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5130

    
5131
  def ExpandNames(self):
5132
    self._ExpandAndLockInstance()
5133

    
5134
  def BuildHooksEnv(self):
5135
    """Build hooks env.
5136

5137
    This runs on master, primary and secondary nodes of the instance.
5138

5139
    """
5140
    env = {
5141
      "FORCE": self.op.force,
5142
      }
5143
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5144
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5145
    return env, nl, nl
5146

    
5147
  def CheckPrereq(self):
5148
    """Check prerequisites.
5149

5150
    This checks that the instance is in the cluster.
5151

5152
    """
5153
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5154
    assert self.instance is not None, \
5155
      "Cannot retrieve locked instance %s" % self.op.instance_name
5156

    
5157
    # extra hvparams
5158
    if self.op.hvparams:
5159
      # check hypervisor parameter syntax (locally)
5160
      cluster = self.cfg.GetClusterInfo()
5161
      utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5162
      filled_hvp = cluster.FillHV(instance)
5163
      filled_hvp.update(self.op.hvparams)
5164
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5165
      hv_type.CheckParameterSyntax(filled_hvp)
5166
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5167

    
5168
    self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5169

    
5170
    if self.primary_offline and self.op.ignore_offline_nodes:
5171
      self.proc.LogWarning("Ignoring offline primary node")
5172

    
5173
      if self.op.hvparams or self.op.beparams:
5174
        self.proc.LogWarning("Overridden parameters are ignored")
5175
    else:
5176
      _CheckNodeOnline(self, instance.primary_node)
5177

    
5178
      bep = self.cfg.GetClusterInfo().FillBE(instance)
5179

    
5180
      # check bridges existence
5181
      _CheckInstanceBridgesExist(self, instance)
5182

    
5183
      remote_info = self.rpc.call_instance_info(instance.primary_node,
5184
                                                instance.name,
5185
                                                instance.hypervisor)
5186
      remote_info.Raise("Error checking node %s" % instance.primary_node,
5187
                        prereq=True, ecode=errors.ECODE_ENVIRON)
5188
      if not remote_info.payload: # not running already
5189
        _CheckNodeFreeMemory(self, instance.primary_node,
5190
                             "starting instance %s" % instance.name,
5191
                             bep[constants.BE_MEMORY], instance.hypervisor)
5192

    
5193
  def Exec(self, feedback_fn):
5194
    """Start the instance.
5195

5196
    """
5197
    instance = self.instance
5198
    force = self.op.force
5199

    
5200
    self.cfg.MarkInstanceUp(instance.name)
5201

    
5202
    if self.primary_offline:
5203
      assert self.op.ignore_offline_nodes
5204
      self.proc.LogInfo("Primary node offline, marked instance as started")
5205
    else:
5206
      node_current = instance.primary_node
5207

    
5208
      _StartInstanceDisks(self, instance, force)
5209

    
5210
      result = self.rpc.call_instance_start(node_current, instance,
5211
                                            self.op.hvparams, self.op.beparams)
5212
      msg = result.fail_msg
5213
      if msg:
5214
        _ShutdownInstanceDisks(self, instance)
5215
        raise errors.OpExecError("Could not start instance: %s" % msg)
5216

    
5217

    
5218
class LUInstanceReboot(LogicalUnit):
5219
  """Reboot an instance.
5220

5221
  """
5222
  HPATH = "instance-reboot"
5223
  HTYPE = constants.HTYPE_INSTANCE
5224
  REQ_BGL = False
5225

    
5226
  def ExpandNames(self):
5227
    self._ExpandAndLockInstance()
5228

    
5229
  def BuildHooksEnv(self):
5230
    """Build hooks env.
5231

5232
    This runs on master, primary and secondary nodes of the instance.
5233

5234
    """
5235
    env = {
5236
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5237
      "REBOOT_TYPE": self.op.reboot_type,
5238
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5239
      }
5240
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5241
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5242
    return env, nl, nl
5243

    
5244
  def CheckPrereq(self):
5245
    """Check prerequisites.
5246

5247
    This checks that the instance is in the cluster.
5248

5249
    """
5250
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5251
    assert self.instance is not None, \
5252
      "Cannot retrieve locked instance %s" % self.op.instance_name
5253

    
5254
    _CheckNodeOnline(self, instance.primary_node)
5255

    
5256
    # check bridges existence
5257
    _CheckInstanceBridgesExist(self, instance)
5258

    
5259
  def Exec(self, feedback_fn):
5260
    """Reboot the instance.
5261

5262
    """
5263
    instance = self.instance
5264
    ignore_secondaries = self.op.ignore_secondaries
5265
    reboot_type = self.op.reboot_type
5266

    
5267
    remote_info = self.rpc.call_instance_info(instance.primary_node,
5268
                                              instance.name,
5269
                                              instance.hypervisor)
5270
    remote_info.Raise("Error checking node %s" % instance.primary_node)
5271
    instance_running = bool(remote_info.payload)
5272

    
5273
    node_current = instance.primary_node
5274

    
5275
    if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5276
                                            constants.INSTANCE_REBOOT_HARD]:
5277
      for disk in instance.disks:
5278
        self.cfg.SetDiskID(disk, node_current)
5279
      result = self.rpc.call_instance_reboot(node_current, instance,
5280
                                             reboot_type,
5281
                                             self.op.shutdown_timeout)
5282
      result.Raise("Could not reboot instance")
5283
    else:
5284
      if instance_running:
5285
        result = self.rpc.call_instance_shutdown(node_current, instance,
5286
                                                 self.op.shutdown_timeout)
5287
        result.Raise("Could not shutdown instance for full reboot")
5288
        _ShutdownInstanceDisks(self, instance)
5289
      else:
5290
        self.LogInfo("Instance %s was already stopped, starting now",
5291
                     instance.name)
5292
      _StartInstanceDisks(self, instance, ignore_secondaries)
5293
      result = self.rpc.call_instance_start(node_current, instance, None, None)
5294
      msg = result.fail_msg
5295
      if msg:
5296
        _ShutdownInstanceDisks(self, instance)
5297
        raise errors.OpExecError("Could not start instance for"
5298
                                 " full reboot: %s" % msg)
5299

    
5300
    self.cfg.MarkInstanceUp(instance.name)
5301

    
5302

    
5303
class LUInstanceShutdown(LogicalUnit):
5304
  """Shutdown an instance.
5305

5306
  """
5307
  HPATH = "instance-stop"
5308
  HTYPE = constants.HTYPE_INSTANCE
5309
  REQ_BGL = False
5310

    
5311
  def ExpandNames(self):
5312
    self._ExpandAndLockInstance()
5313

    
5314
  def BuildHooksEnv(self):
5315
    """Build hooks env.
5316

5317
    This runs on master, primary and secondary nodes of the instance.
5318

5319
    """
5320
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5321
    env["TIMEOUT"] = self.op.timeout
5322
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5323
    return env, nl, nl
5324

    
5325
  def CheckPrereq(self):
5326
    """Check prerequisites.
5327

5328
    This checks that the instance is in the cluster.
5329

5330
    """
5331
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5332
    assert self.instance is not None, \
5333
      "Cannot retrieve locked instance %s" % self.op.instance_name
5334

    
5335
    self.primary_offline = \
5336
      self.cfg.GetNodeInfo(self.instance.primary_node).offline
5337

    
5338
    if self.primary_offline and self.op.ignore_offline_nodes:
5339
      self.proc.LogWarning("Ignoring offline primary node")
5340
    else:
5341
      _CheckNodeOnline(self, self.instance.primary_node)
5342

    
5343
  def Exec(self, feedback_fn):
5344
    """Shutdown the instance.
5345

5346
    """
5347
    instance = self.instance
5348
    node_current = instance.primary_node
5349
    timeout = self.op.timeout
5350

    
5351
    self.cfg.MarkInstanceDown(instance.name)
5352

    
5353
    if self.primary_offline:
5354
      assert self.op.ignore_offline_nodes
5355
      self.proc.LogInfo("Primary node offline, marked instance as stopped")
5356
    else:
5357
      result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5358
      msg = result.fail_msg
5359
      if msg:
5360
        self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5361

    
5362
      _ShutdownInstanceDisks(self, instance)
5363

    
5364

    
5365
class LUInstanceReinstall(LogicalUnit):
5366
  """Reinstall an instance.
5367

5368
  """
5369
  HPATH = "instance-reinstall"
5370
  HTYPE = constants.HTYPE_INSTANCE
5371
  REQ_BGL = False
5372

    
5373
  def ExpandNames(self):
5374
    self._ExpandAndLockInstance()
5375

    
5376
  def BuildHooksEnv(self):
5377
    """Build hooks env.
5378

5379
    This runs on master, primary and secondary nodes of the instance.
5380

5381
    """
5382
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5383
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5384
    return env, nl, nl
5385

    
5386
  def CheckPrereq(self):
5387
    """Check prerequisites.
5388

5389
    This checks that the instance is in the cluster and is not running.
5390

5391
    """
5392
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5393
    assert instance is not None, \
5394
      "Cannot retrieve locked instance %s" % self.op.instance_name
5395
    _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5396
                     " offline, cannot reinstall")
5397
    for node in instance.secondary_nodes:
5398
      _CheckNodeOnline(self, node, "Instance secondary node offline,"
5399
                       " cannot reinstall")
5400

    
5401
    if instance.disk_template == constants.DT_DISKLESS:
5402
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5403
                                 self.op.instance_name,
5404
                                 errors.ECODE_INVAL)
5405
    _CheckInstanceDown(self, instance, "cannot reinstall")
5406

    
5407
    if self.op.os_type is not None:
5408
      # OS verification
5409
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5410
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5411
      instance_os = self.op.os_type
5412
    else:
5413
      instance_os = instance.os
5414

    
5415
    nodelist = list(instance.all_nodes)
5416

    
5417
    if self.op.osparams:
5418
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5419
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5420
      self.os_inst = i_osdict # the new dict (without defaults)
5421
    else:
5422
      self.os_inst = None
5423

    
5424
    self.instance = instance
5425

    
5426
  def Exec(self, feedback_fn):
5427
    """Reinstall the instance.
5428

5429
    """
5430
    inst = self.instance
5431

    
5432
    if self.op.os_type is not None:
5433
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5434
      inst.os = self.op.os_type
5435
      # Write to configuration
5436
      self.cfg.Update(inst, feedback_fn)
5437

    
5438
    _StartInstanceDisks(self, inst, None)
5439
    try:
5440
      feedback_fn("Running the instance OS create scripts...")
5441
      # FIXME: pass debug option from opcode to backend
5442
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5443
                                             self.op.debug_level,
5444
                                             osparams=self.os_inst)
5445
      result.Raise("Could not install OS for instance %s on node %s" %
5446
                   (inst.name, inst.primary_node))
5447
    finally:
5448
      _ShutdownInstanceDisks(self, inst)
5449

    
5450

    
5451
class LUInstanceRecreateDisks(LogicalUnit):
5452
  """Recreate an instance's missing disks.
5453

5454
  """
5455
  HPATH = "instance-recreate-disks"
5456
  HTYPE = constants.HTYPE_INSTANCE
5457
  REQ_BGL = False
5458

    
5459
  def ExpandNames(self):
5460
    self._ExpandAndLockInstance()
5461

    
5462
  def BuildHooksEnv(self):
5463
    """Build hooks env.
5464

5465
    This runs on master, primary and secondary nodes of the instance.
5466

5467
    """
5468
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5469
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5470
    return env, nl, nl
5471

    
5472
  def CheckPrereq(self):
5473
    """Check prerequisites.
5474

5475
    This checks that the instance is in the cluster and is not running.
5476

5477
    """
5478
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5479
    assert instance is not None, \
5480
      "Cannot retrieve locked instance %s" % self.op.instance_name
5481
    _CheckNodeOnline(self, instance.primary_node)
5482

    
5483
    if instance.disk_template == constants.DT_DISKLESS:
5484
      raise errors.OpPrereqError("Instance '%s' has no disks" %
5485
                                 self.op.instance_name, errors.ECODE_INVAL)
5486
    _CheckInstanceDown(self, instance, "cannot recreate disks")
5487

    
5488
    if not self.op.disks:
5489
      self.op.disks = range(len(instance.disks))
5490
    else:
5491
      for idx in self.op.disks:
5492
        if idx >= len(instance.disks):
5493
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5494
                                     errors.ECODE_INVAL)
5495

    
5496
    self.instance = instance
5497

    
5498
  def Exec(self, feedback_fn):
5499
    """Recreate the disks.
5500

5501
    """
5502
    to_skip = []
5503
    for idx, _ in enumerate(self.instance.disks):
5504
      if idx not in self.op.disks: # disk idx has not been passed in
5505
        to_skip.append(idx)
5506
        continue
5507

    
5508
    _CreateDisks(self, self.instance, to_skip=to_skip)
5509

    
5510

    
5511
class LUInstanceRename(LogicalUnit):
5512
  """Rename an instance.
5513

5514
  """
5515
  HPATH = "instance-rename"
5516
  HTYPE = constants.HTYPE_INSTANCE
5517

    
5518
  def CheckArguments(self):
5519
    """Check arguments.
5520

5521
    """
5522
    if self.op.ip_check and not self.op.name_check:
5523
      # TODO: make the ip check more flexible and not depend on the name check
5524
      raise errors.OpPrereqError("Cannot do ip check without a name check",
5525
                                 errors.ECODE_INVAL)
5526

    
5527
  def BuildHooksEnv(self):
5528
    """Build hooks env.
5529

5530
    This runs on master, primary and secondary nodes of the instance.
5531

5532
    """
5533
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5534
    env["INSTANCE_NEW_NAME"] = self.op.new_name
5535
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5536
    return env, nl, nl
5537

    
5538
  def CheckPrereq(self):
5539
    """Check prerequisites.
5540

5541
    This checks that the instance is in the cluster and is not running.
5542

5543
    """
5544
    self.op.instance_name = _ExpandInstanceName(self.cfg,
5545
                                                self.op.instance_name)
5546
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5547
    assert instance is not None
5548
    _CheckNodeOnline(self, instance.primary_node)
5549
    _CheckInstanceDown(self, instance, "cannot rename")
5550
    self.instance = instance
5551

    
5552
    new_name = self.op.new_name
5553
    if self.op.name_check:
5554
      hostname = netutils.GetHostname(name=new_name)
5555
      self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5556
                   hostname.name)
5557
      if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5558
        raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5559
                                    " same as given hostname '%s'") %
5560
                                    (hostname.name, self.op.new_name),
5561
                                    errors.ECODE_INVAL)
5562
      new_name = self.op.new_name = hostname.name
5563
      if (self.op.ip_check and
5564
          netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5565
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5566
                                   (hostname.ip, new_name),
5567
                                   errors.ECODE_NOTUNIQUE)
5568

    
5569
    instance_list = self.cfg.GetInstanceList()
5570
    if new_name in instance_list and new_name != instance.name:
5571
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5572
                                 new_name, errors.ECODE_EXISTS)
5573

    
5574
  def Exec(self, feedback_fn):
5575
    """Rename the instance.
5576

5577
    """
5578
    inst = self.instance
5579
    old_name = inst.name
5580

    
5581
    rename_file_storage = False
5582
    if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5583
        self.op.new_name != inst.name):
5584
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5585
      rename_file_storage = True
5586

    
5587
    self.cfg.RenameInstance(inst.name, self.op.new_name)
5588
    # Change the instance lock. This is definitely safe while we hold the BGL
5589
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5590
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5591

    
5592
    # re-read the instance from the configuration after rename
5593
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
5594

    
5595
    if rename_file_storage:
5596
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5597
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5598
                                                     old_file_storage_dir,
5599
                                                     new_file_storage_dir)
5600
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
5601
                   " (but the instance has been renamed in Ganeti)" %
5602
                   (inst.primary_node, old_file_storage_dir,
5603
                    new_file_storage_dir))
5604

    
5605
    _StartInstanceDisks(self, inst, None)
5606
    try:
5607
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5608
                                                 old_name, self.op.debug_level)
5609
      msg = result.fail_msg
5610
      if msg:
5611
        msg = ("Could not run OS rename script for instance %s on node %s"
5612
               " (but the instance has been renamed in Ganeti): %s" %
5613
               (inst.name, inst.primary_node, msg))
5614
        self.proc.LogWarning(msg)
5615
    finally:
5616
      _ShutdownInstanceDisks(self, inst)
5617

    
5618
    return inst.name
5619

    
5620

    
5621
class LUInstanceRemove(LogicalUnit):
5622
  """Remove an instance.
5623

5624
  """
5625
  HPATH = "instance-remove"
5626
  HTYPE = constants.HTYPE_INSTANCE
5627
  REQ_BGL = False
5628

    
5629
  def ExpandNames(self):
5630
    self._ExpandAndLockInstance()
5631
    self.needed_locks[locking.LEVEL_NODE] = []
5632
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5633

    
5634
  def DeclareLocks(self, level):
5635
    if level == locking.LEVEL_NODE:
5636
      self._LockInstancesNodes()
5637

    
5638
  def BuildHooksEnv(self):
5639
    """Build hooks env.
5640

5641
    This runs on master, primary and secondary nodes of the instance.
5642

5643
    """
5644
    env = _BuildInstanceHookEnvByObject(self, self.instance)
5645
    env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5646
    nl = [self.cfg.GetMasterNode()]
5647
    nl_post = list(self.instance.all_nodes) + nl
5648
    return env, nl, nl_post
5649

    
5650
  def CheckPrereq(self):
5651
    """Check prerequisites.
5652

5653
    This checks that the instance is in the cluster.
5654

5655
    """
5656
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5657
    assert self.instance is not None, \
5658
      "Cannot retrieve locked instance %s" % self.op.instance_name
5659

    
5660
  def Exec(self, feedback_fn):
5661
    """Remove the instance.
5662

5663
    """
5664
    instance = self.instance
5665
    logging.info("Shutting down instance %s on node %s",
5666
                 instance.name, instance.primary_node)
5667

    
5668
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5669
                                             self.op.shutdown_timeout)
5670
    msg = result.fail_msg
5671
    if msg:
5672
      if self.op.ignore_failures:
5673
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
5674
      else:
5675
        raise errors.OpExecError("Could not shutdown instance %s on"
5676
                                 " node %s: %s" %
5677
                                 (instance.name, instance.primary_node, msg))
5678

    
5679
    _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5680

    
5681

    
5682
def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5683
  """Utility function to remove an instance.
5684

5685
  """
5686
  logging.info("Removing block devices for instance %s", instance.name)
5687

    
5688
  if not _RemoveDisks(lu, instance):
5689
    if not ignore_failures:
5690
      raise errors.OpExecError("Can't remove instance's disks")
5691
    feedback_fn("Warning: can't remove instance's disks")
5692

    
5693
  logging.info("Removing instance %s out of cluster config", instance.name)
5694

    
5695
  lu.cfg.RemoveInstance(instance.name)
5696

    
5697
  assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5698
    "Instance lock removal conflict"
5699

    
5700
  # Remove lock for the instance
5701
  lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5702

    
5703

    
5704
class LUInstanceQuery(NoHooksLU):
5705
  """Logical unit for querying instances.
5706

5707
  """
5708
  # pylint: disable-msg=W0142
5709
  REQ_BGL = False
5710

    
5711
  def CheckArguments(self):
5712
    self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5713
                             self.op.output_fields, self.op.use_locking)
5714

    
5715
  def ExpandNames(self):
5716
    self.iq.ExpandNames(self)
5717

    
5718
  def DeclareLocks(self, level):
5719
    self.iq.DeclareLocks(self, level)
5720

    
5721
  def Exec(self, feedback_fn):
5722
    return self.iq.OldStyleQuery(self)
5723

    
5724

    
5725
class LUInstanceFailover(LogicalUnit):
5726
  """Failover an instance.
5727

5728
  """
5729
  HPATH = "instance-failover"
5730
  HTYPE = constants.HTYPE_INSTANCE
5731
  REQ_BGL = False
5732

    
5733
  def CheckArguments(self):
5734
    """Check the arguments.
5735

5736
    """
5737
    self.iallocator = getattr(self.op, "iallocator", None)
5738
    self.target_node = getattr(self.op, "target_node", None)
5739

    
5740
  def ExpandNames(self):
5741
    self._ExpandAndLockInstance()
5742

    
5743
    if self.op.target_node is not None:
5744
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5745

    
5746
    self.needed_locks[locking.LEVEL_NODE] = []
5747
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5748

    
5749
  def DeclareLocks(self, level):
5750
    if level == locking.LEVEL_NODE:
5751
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5752
      if instance.disk_template in constants.DTS_EXT_MIRROR:
5753
        if self.op.target_node is None:
5754
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5755
        else:
5756
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5757
                                                   self.op.target_node]
5758
        del self.recalculate_locks[locking.LEVEL_NODE]
5759
      else:
5760
        self._LockInstancesNodes()
5761

    
5762
  def BuildHooksEnv(self):
5763
    """Build hooks env.
5764

5765
    This runs on master, primary and secondary nodes of the instance.
5766

5767
    """
5768
    instance = self.instance
5769
    source_node = instance.primary_node
5770
    env = {
5771
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5772
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5773
      "OLD_PRIMARY": source_node,
5774
      "NEW_PRIMARY": self.op.target_node,
5775
      }
5776

    
5777
    if instance.disk_template in constants.DTS_INT_MIRROR:
5778
      env["OLD_SECONDARY"] = instance.secondary_nodes[0]
5779
      env["NEW_SECONDARY"] = source_node
5780
    else:
5781
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
5782

    
5783
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5784
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5785
    nl_post = list(nl)
5786
    nl_post.append(source_node)
5787
    return env, nl, nl_post
5788

    
5789
  def CheckPrereq(self):
5790
    """Check prerequisites.
5791

5792
    This checks that the instance is in the cluster.
5793

5794
    """
5795
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5796
    assert self.instance is not None, \
5797
      "Cannot retrieve locked instance %s" % self.op.instance_name
5798

    
5799
    bep = self.cfg.GetClusterInfo().FillBE(instance)
5800
    if instance.disk_template not in constants.DTS_MIRRORED:
5801
      raise errors.OpPrereqError("Instance's disk layout is not"
5802
                                 " mirrored, cannot failover.",
5803
                                 errors.ECODE_STATE)
5804

    
5805
    if instance.disk_template in constants.DTS_EXT_MIRROR:
5806
      _CheckIAllocatorOrNode(self, "iallocator", "target_node")
5807
      if self.op.iallocator:
5808
        self._RunAllocator()
5809
        # Release all unnecessary node locks
5810
        nodes_keep = [instance.primary_node, self.op.target_node]
5811
        nodes_rel = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5812
                     if node not in nodes_keep]
5813
        self.context.glm.release(locking.LEVEL_NODE, nodes_rel)
5814
        self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5815

    
5816
      # self.op.target_node is already populated, either directly or by the
5817
      # iallocator run
5818
      target_node = self.op.target_node
5819

    
5820
    else:
5821
      secondary_nodes = instance.secondary_nodes
5822
      if not secondary_nodes:
5823
        raise errors.ConfigurationError("No secondary node but using"
5824
                                        " %s disk template" %
5825
                                        instance.disk_template)
5826
      target_node = secondary_nodes[0]
5827

    
5828
      if self.op.iallocator or (self.op.target_node and
5829
                                self.op.target_node != target_node):
5830
        raise errors.OpPrereqError("Instances with disk template %s cannot"
5831
                                   " be failed over to arbitrary nodes"
5832
                                   " (neither an iallocator nor a target"
5833
                                   " node can be passed)" %
5834
                                   instance.disk_template, errors.ECODE_INVAL)
5835
    _CheckNodeOnline(self, target_node)
5836
    _CheckNodeNotDrained(self, target_node)
5837

    
5838
    # Save target_node so that we can use it in BuildHooksEnv
5839
    self.op.target_node = target_node
5840

    
5841
    if instance.admin_up:
5842
      # check memory requirements on the secondary node
5843
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5844
                           instance.name, bep[constants.BE_MEMORY],
5845
                           instance.hypervisor)
5846
    else:
5847
      self.LogInfo("Not checking memory on the secondary node as"
5848
                   " instance will not be started")
5849

    
5850
    # check bridge existance
5851
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5852

    
5853
  def Exec(self, feedback_fn):
5854
    """Failover an instance.
5855

5856
    The failover is done by shutting it down on its present node and
5857
    starting it on the secondary.
5858

5859
    """
5860
    instance = self.instance
5861
    primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5862

    
5863
    source_node = instance.primary_node
5864
    target_node = self.op.target_node
5865

    
5866
    if instance.admin_up:
5867
      feedback_fn("* checking disk consistency between source and target")
5868
      for dev in instance.disks:
5869
        # for drbd, these are drbd over lvm
5870
        if not _CheckDiskConsistency(self, dev, target_node, False):
5871
          if not self.op.ignore_consistency:
5872
            raise errors.OpExecError("Disk %s is degraded on target node,"
5873
                                     " aborting failover." % dev.iv_name)
5874
    else:
5875
      feedback_fn("* not checking disk consistency as instance is not running")
5876

    
5877
    feedback_fn("* shutting down instance on source node")
5878
    logging.info("Shutting down instance %s on node %s",
5879
                 instance.name, source_node)
5880

    
5881
    result = self.rpc.call_instance_shutdown(source_node, instance,
5882
                                             self.op.shutdown_timeout)
5883
    msg = result.fail_msg
5884
    if msg:
5885
      if self.op.ignore_consistency or primary_node.offline:
5886
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5887
                             " Proceeding anyway. Please make sure node"
5888
                             " %s is down. Error details: %s",
5889
                             instance.name, source_node, source_node, msg)
5890
      else:
5891
        raise errors.OpExecError("Could not shutdown instance %s on"
5892
                                 " node %s: %s" %
5893
                                 (instance.name, source_node, msg))
5894

    
5895
    feedback_fn("* deactivating the instance's disks on source node")
5896
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5897
      raise errors.OpExecError("Can't shut down the instance's disks.")
5898

    
5899
    instance.primary_node = target_node
5900
    # distribute new instance config to the other nodes
5901
    self.cfg.Update(instance, feedback_fn)
5902

    
5903
    # Only start the instance if it's marked as up
5904
    if instance.admin_up:
5905
      feedback_fn("* activating the instance's disks on target node")
5906
      logging.info("Starting instance %s on node %s",
5907
                   instance.name, target_node)
5908

    
5909
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5910
                                           ignore_secondaries=True)
5911
      if not disks_ok:
5912
        _ShutdownInstanceDisks(self, instance)
5913
        raise errors.OpExecError("Can't activate the instance's disks")
5914

    
5915
      feedback_fn("* starting the instance on the target node")
5916
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5917
      msg = result.fail_msg
5918
      if msg:
5919
        _ShutdownInstanceDisks(self, instance)
5920
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5921
                                 (instance.name, target_node, msg))
5922

    
5923
  def _RunAllocator(self):
5924
    """Run the allocator based on input opcode.
5925

5926
    """
5927
    ial = IAllocator(self.cfg, self.rpc,
5928
                     mode=constants.IALLOCATOR_MODE_RELOC,
5929
                     name=self.instance.name,
5930
                     # TODO See why hail breaks with a single node below
5931
                     relocate_from=[self.instance.primary_node,
5932
                                    self.instance.primary_node],
5933
                     )
5934

    
5935
    ial.Run(self.op.iallocator)
5936

    
5937
    if not ial.success:
5938
      raise errors.OpPrereqError("Can't compute nodes using"
5939
                                 " iallocator '%s': %s" %
5940
                                 (self.op.iallocator, ial.info),
5941
                                 errors.ECODE_NORES)
5942
    if len(ial.result) != ial.required_nodes:
5943
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5944
                                 " of nodes (%s), required %s" %
5945
                                 (self.op.iallocator, len(ial.result),
5946
                                  ial.required_nodes), errors.ECODE_FAULT)
5947
    self.op.target_node = ial.result[0]
5948
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5949
                 self.instance.name, self.op.iallocator,
5950
                 utils.CommaJoin(ial.result))
5951

    
5952

    
5953
class LUInstanceMigrate(LogicalUnit):
5954
  """Migrate an instance.
5955

5956
  This is migration without shutting down, compared to the failover,
5957
  which is done with shutdown.
5958

5959
  """
5960
  HPATH = "instance-migrate"
5961
  HTYPE = constants.HTYPE_INSTANCE
5962
  REQ_BGL = False
5963

    
5964
  def ExpandNames(self):
5965
    self._ExpandAndLockInstance()
5966

    
5967
    if self.op.target_node is not None:
5968
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5969

    
5970
    self.needed_locks[locking.LEVEL_NODE] = []
5971
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5972

    
5973
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
5974
                                       self.op.cleanup, self.op.iallocator,
5975
                                       self.op.target_node)
5976
    self.tasklets = [self._migrater]
5977

    
5978
  def DeclareLocks(self, level):
5979
    if level == locking.LEVEL_NODE:
5980
      instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
5981
      if instance.disk_template in constants.DTS_EXT_MIRROR:
5982
        if self.op.target_node is None:
5983
          self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5984
        else:
5985
          self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
5986
                                                   self.op.target_node]
5987
        del self.recalculate_locks[locking.LEVEL_NODE]
5988
      else:
5989
        self._LockInstancesNodes()
5990

    
5991
  def BuildHooksEnv(self):
5992
    """Build hooks env.
5993

5994
    This runs on master, primary and secondary nodes of the instance.
5995

5996
    """
5997
    instance = self._migrater.instance
5998
    source_node = instance.primary_node
5999
    target_node = self._migrater.target_node
6000
    env = _BuildInstanceHookEnvByObject(self, instance)
6001
    env["MIGRATE_LIVE"] = self._migrater.live
6002
    env["MIGRATE_CLEANUP"] = self.op.cleanup
6003
    env.update({
6004
        "OLD_PRIMARY": source_node,
6005
        "NEW_PRIMARY": target_node,
6006
        })
6007

    
6008
    if instance.disk_template in constants.DTS_INT_MIRROR:
6009
      env["OLD_SECONDARY"] = target_node
6010
      env["NEW_SECONDARY"] = source_node
6011
    else:
6012
      env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6013

    
6014
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6015
    nl_post = list(nl)
6016
    nl_post.append(source_node)
6017
    return env, nl, nl_post
6018

    
6019

    
6020
class LUInstanceMove(LogicalUnit):
6021
  """Move an instance by data-copying.
6022

6023
  """
6024
  HPATH = "instance-move"
6025
  HTYPE = constants.HTYPE_INSTANCE
6026
  REQ_BGL = False
6027

    
6028
  def ExpandNames(self):
6029
    self._ExpandAndLockInstance()
6030
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6031
    self.op.target_node = target_node
6032
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
6033
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6034

    
6035
  def DeclareLocks(self, level):
6036
    if level == locking.LEVEL_NODE:
6037
      self._LockInstancesNodes(primary_only=True)
6038

    
6039
  def BuildHooksEnv(self):
6040
    """Build hooks env.
6041

6042
    This runs on master, primary and secondary nodes of the instance.
6043

6044
    """
6045
    env = {
6046
      "TARGET_NODE": self.op.target_node,
6047
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6048
      }
6049
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6050
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
6051
                                       self.op.target_node]
6052
    return env, nl, nl
6053

    
6054
  def CheckPrereq(self):
6055
    """Check prerequisites.
6056

6057
    This checks that the instance is in the cluster.
6058

6059
    """
6060
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6061
    assert self.instance is not None, \
6062
      "Cannot retrieve locked instance %s" % self.op.instance_name
6063

    
6064
    node = self.cfg.GetNodeInfo(self.op.target_node)
6065
    assert node is not None, \
6066
      "Cannot retrieve locked node %s" % self.op.target_node
6067

    
6068
    self.target_node = target_node = node.name
6069

    
6070
    if target_node == instance.primary_node:
6071
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
6072
                                 (instance.name, target_node),
6073
                                 errors.ECODE_STATE)
6074

    
6075
    bep = self.cfg.GetClusterInfo().FillBE(instance)
6076

    
6077
    for idx, dsk in enumerate(instance.disks):
6078
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6079
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6080
                                   " cannot copy" % idx, errors.ECODE_STATE)
6081

    
6082
    _CheckNodeOnline(self, target_node)
6083
    _CheckNodeNotDrained(self, target_node)
6084
    _CheckNodeVmCapable(self, target_node)
6085

    
6086
    if instance.admin_up:
6087
      # check memory requirements on the secondary node
6088
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6089
                           instance.name, bep[constants.BE_MEMORY],
6090
                           instance.hypervisor)
6091
    else:
6092
      self.LogInfo("Not checking memory on the secondary node as"
6093
                   " instance will not be started")
6094

    
6095
    # check bridge existance
6096
    _CheckInstanceBridgesExist(self, instance, node=target_node)
6097

    
6098
  def Exec(self, feedback_fn):
6099
    """Move an instance.
6100

6101
    The move is done by shutting it down on its present node, copying
6102
    the data over (slow) and starting it on the new node.
6103

6104
    """
6105
    instance = self.instance
6106

    
6107
    source_node = instance.primary_node
6108
    target_node = self.target_node
6109

    
6110
    self.LogInfo("Shutting down instance %s on source node %s",
6111
                 instance.name, source_node)
6112

    
6113
    result = self.rpc.call_instance_shutdown(source_node, instance,
6114
                                             self.op.shutdown_timeout)
6115
    msg = result.fail_msg
6116
    if msg:
6117
      if self.op.ignore_consistency:
6118
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
6119
                             " Proceeding anyway. Please make sure node"
6120
                             " %s is down. Error details: %s",
6121
                             instance.name, source_node, source_node, msg)
6122
      else:
6123
        raise errors.OpExecError("Could not shutdown instance %s on"
6124
                                 " node %s: %s" %
6125
                                 (instance.name, source_node, msg))
6126

    
6127
    # create the target disks
6128
    try:
6129
      _CreateDisks(self, instance, target_node=target_node)
6130
    except errors.OpExecError:
6131
      self.LogWarning("Device creation failed, reverting...")
6132
      try:
6133
        _RemoveDisks(self, instance, target_node=target_node)
6134
      finally:
6135
        self.cfg.ReleaseDRBDMinors(instance.name)
6136
        raise
6137

    
6138
    cluster_name = self.cfg.GetClusterInfo().cluster_name
6139

    
6140
    errs = []
6141
    # activate, get path, copy the data over
6142
    for idx, disk in enumerate(instance.disks):
6143
      self.LogInfo("Copying data for disk %d", idx)
6144
      result = self.rpc.call_blockdev_assemble(target_node, disk,
6145
                                               instance.name, True, idx)
6146
      if result.fail_msg:
6147
        self.LogWarning("Can't assemble newly created disk %d: %s",
6148
                        idx, result.fail_msg)
6149
        errs.append(result.fail_msg)
6150
        break
6151
      dev_path = result.payload
6152
      result = self.rpc.call_blockdev_export(source_node, disk,
6153
                                             target_node, dev_path,
6154
                                             cluster_name)
6155
      if result.fail_msg:
6156
        self.LogWarning("Can't copy data over for disk %d: %s",
6157
                        idx, result.fail_msg)
6158
        errs.append(result.fail_msg)
6159
        break
6160

    
6161
    if errs:
6162
      self.LogWarning("Some disks failed to copy, aborting")
6163
      try:
6164
        _RemoveDisks(self, instance, target_node=target_node)
6165
      finally:
6166
        self.cfg.ReleaseDRBDMinors(instance.name)
6167
        raise errors.OpExecError("Errors during disk copy: %s" %
6168
                                 (",".join(errs),))
6169

    
6170
    instance.primary_node = target_node
6171
    self.cfg.Update(instance, feedback_fn)
6172

    
6173
    self.LogInfo("Removing the disks on the original node")
6174
    _RemoveDisks(self, instance, target_node=source_node)
6175

    
6176
    # Only start the instance if it's marked as up
6177
    if instance.admin_up:
6178
      self.LogInfo("Starting instance %s on node %s",
6179
                   instance.name, target_node)
6180

    
6181
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
6182
                                           ignore_secondaries=True)
6183
      if not disks_ok:
6184
        _ShutdownInstanceDisks(self, instance)
6185
        raise errors.OpExecError("Can't activate the instance's disks")
6186

    
6187
      result = self.rpc.call_instance_start(target_node, instance, None, None)
6188
      msg = result.fail_msg
6189
      if msg:
6190
        _ShutdownInstanceDisks(self, instance)
6191
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6192
                                 (instance.name, target_node, msg))
6193

    
6194

    
6195
class LUNodeMigrate(LogicalUnit):
6196
  """Migrate all instances from a node.
6197

6198
  """
6199
  HPATH = "node-migrate"
6200
  HTYPE = constants.HTYPE_NODE
6201
  REQ_BGL = False
6202

    
6203
  def CheckArguments(self):
6204
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6205

    
6206
  def ExpandNames(self):
6207
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6208

    
6209
    self.needed_locks = {}
6210

    
6211
    # Create tasklets for migrating instances for all instances on this node
6212
    names = []
6213
    tasklets = []
6214

    
6215
    self.lock_all_nodes = False
6216

    
6217
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6218
      logging.debug("Migrating instance %s", inst.name)
6219
      names.append(inst.name)
6220

    
6221
      tasklets.append(TLMigrateInstance(self, inst.name, False,
6222
                                        self.op.iallocator, None))
6223

    
6224
      if inst.disk_template in constants.DTS_EXT_MIRROR:
6225
        # We need to lock all nodes, as the iallocator will choose the
6226
        # destination nodes afterwards
6227
        self.lock_all_nodes = True
6228

    
6229
    self.tasklets = tasklets
6230

    
6231
    # Declare node locks
6232
    if self.lock_all_nodes:
6233
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6234
    else:
6235
      self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6236
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6237

    
6238
    # Declare instance locks
6239
    self.needed_locks[locking.LEVEL_INSTANCE] = names
6240

    
6241
  def DeclareLocks(self, level):
6242
    if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6243
      self._LockInstancesNodes()
6244

    
6245
  def BuildHooksEnv(self):
6246
    """Build hooks env.
6247

6248
    This runs on the master, the primary and all the secondaries.
6249

6250
    """
6251
    env = {
6252
      "NODE_NAME": self.op.node_name,
6253
      }
6254

    
6255
    nl = [self.cfg.GetMasterNode()]
6256

    
6257
    return (env, nl, nl)
6258

    
6259

    
6260
class TLMigrateInstance(Tasklet):
6261
  """Tasklet class for instance migration.
6262

6263
  @type live: boolean
6264
  @ivar live: whether the migration will be done live or non-live;
6265
      this variable is initalized only after CheckPrereq has run
6266

6267
  """
6268
  def __init__(self, lu, instance_name, cleanup,
6269
               iallocator=None, target_node=None):
6270
    """Initializes this class.
6271

6272
    """
6273
    Tasklet.__init__(self, lu)
6274

    
6275
    # Parameters
6276
    self.instance_name = instance_name
6277
    self.cleanup = cleanup
6278
    self.live = False # will be overridden later
6279
    self.iallocator = iallocator
6280
    self.target_node = target_node
6281

    
6282
  def CheckPrereq(self):
6283
    """Check prerequisites.
6284

6285
    This checks that the instance is in the cluster.
6286

6287
    """
6288
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6289
    instance = self.cfg.GetInstanceInfo(instance_name)
6290
    assert instance is not None
6291
    self.instance = instance
6292

    
6293
    if instance.disk_template not in constants.DTS_MIRRORED:
6294
      raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6295
                                 " migrations" % instance.disk_template,
6296
                                 errors.ECODE_STATE)
6297

    
6298
    if instance.disk_template in constants.DTS_EXT_MIRROR:
6299
      _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6300

    
6301
      if self.iallocator:
6302
        self._RunAllocator()
6303

    
6304
      # self.target_node is already populated, either directly or by the
6305
      # iallocator run
6306
      target_node = self.target_node
6307

    
6308
      if len(self.lu.tasklets) == 1:
6309
        # It is safe to remove locks only when we're the only tasklet in the LU
6310
        nodes_keep = [instance.primary_node, self.target_node]
6311
        nodes_rel = [node for node in self.lu.acquired_locks[locking.LEVEL_NODE]
6312
                     if node not in nodes_keep]
6313
        self.lu.context.glm.release(locking.LEVEL_NODE, nodes_rel)
6314
        self.lu.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6315

    
6316
    else:
6317
      secondary_nodes = instance.secondary_nodes
6318
      if not secondary_nodes:
6319
        raise errors.ConfigurationError("No secondary node but using"
6320
                                        " %s disk template" %
6321
                                        instance.disk_template)
6322
      target_node = secondary_nodes[0]
6323
      if self.lu.op.iallocator or (self.lu.op.target_node and
6324
                                   self.lu.op.target_node != target_node):
6325
        raise errors.OpPrereqError("Instances with disk template %s cannot"
6326
                                   " be migrated over to arbitrary nodes"
6327
                                   " (neither an iallocator nor a target"
6328
                                   " node can be passed)" %
6329
                                   instance.disk_template, errors.ECODE_INVAL)
6330

    
6331
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
6332

    
6333
    # check memory requirements on the secondary node
6334
    _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6335
                         instance.name, i_be[constants.BE_MEMORY],
6336
                         instance.hypervisor)
6337

    
6338
    # check bridge existance
6339
    _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6340

    
6341
    if not self.cleanup:
6342
      _CheckNodeNotDrained(self.lu, target_node)
6343
      result = self.rpc.call_instance_migratable(instance.primary_node,
6344
                                                 instance)
6345
      result.Raise("Can't migrate, please use failover",
6346
                   prereq=True, ecode=errors.ECODE_STATE)
6347

    
6348

    
6349
  def _RunAllocator(self):
6350
    """Run the allocator based on input opcode.
6351

6352
    """
6353
    ial = IAllocator(self.cfg, self.rpc,
6354
                     mode=constants.IALLOCATOR_MODE_RELOC,
6355
                     name=self.instance_name,
6356
                     # TODO See why hail breaks with a single node below
6357
                     relocate_from=[self.instance.primary_node,
6358
                                    self.instance.primary_node],
6359
                     )
6360

    
6361
    ial.Run(self.iallocator)
6362

    
6363
    if not ial.success:
6364
      raise errors.OpPrereqError("Can't compute nodes using"
6365
                                 " iallocator '%s': %s" %
6366
                                 (self.iallocator, ial.info),
6367
                                 errors.ECODE_NORES)
6368
    if len(ial.result) != ial.required_nodes:
6369
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6370
                                 " of nodes (%s), required %s" %
6371
                                 (self.iallocator, len(ial.result),
6372
                                  ial.required_nodes), errors.ECODE_FAULT)
6373
    self.target_node = ial.result[0]
6374
    self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6375
                 self.instance_name, self.iallocator,
6376
                 utils.CommaJoin(ial.result))
6377

    
6378
    if self.lu.op.live is not None and self.lu.op.mode is not None:
6379
      raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6380
                                 " parameters are accepted",
6381
                                 errors.ECODE_INVAL)
6382
    if self.lu.op.live is not None:
6383
      if self.lu.op.live:
6384
        self.lu.op.mode = constants.HT_MIGRATION_LIVE
6385
      else:
6386
        self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6387
      # reset the 'live' parameter to None so that repeated
6388
      # invocations of CheckPrereq do not raise an exception
6389
      self.lu.op.live = None
6390
    elif self.lu.op.mode is None:
6391
      # read the default value from the hypervisor
6392
      i_hv = self.cfg.GetClusterInfo().FillHV(self.instance, skip_globals=False)
6393
      self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6394

    
6395
    self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6396

    
6397
  def _WaitUntilSync(self):
6398
    """Poll with custom rpc for disk sync.
6399

6400
    This uses our own step-based rpc call.
6401

6402
    """
6403
    self.feedback_fn("* wait until resync is done")
6404
    all_done = False
6405
    while not all_done:
6406
      all_done = True
6407
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6408
                                            self.nodes_ip,
6409
                                            self.instance.disks)
6410
      min_percent = 100
6411
      for node, nres in result.items():
6412
        nres.Raise("Cannot resync disks on node %s" % node)
6413
        node_done, node_percent = nres.payload
6414
        all_done = all_done and node_done
6415
        if node_percent is not None:
6416
          min_percent = min(min_percent, node_percent)
6417
      if not all_done:
6418
        if min_percent < 100:
6419
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
6420
        time.sleep(2)
6421

    
6422
  def _EnsureSecondary(self, node):
6423
    """Demote a node to secondary.
6424

6425
    """
6426
    self.feedback_fn("* switching node %s to secondary mode" % node)
6427

    
6428
    for dev in self.instance.disks:
6429
      self.cfg.SetDiskID(dev, node)
6430

    
6431
    result = self.rpc.call_blockdev_close(node, self.instance.name,
6432
                                          self.instance.disks)
6433
    result.Raise("Cannot change disk to secondary on node %s" % node)
6434

    
6435
  def _GoStandalone(self):
6436
    """Disconnect from the network.
6437

6438
    """
6439
    self.feedback_fn("* changing into standalone mode")
6440
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6441
                                               self.instance.disks)
6442
    for node, nres in result.items():
6443
      nres.Raise("Cannot disconnect disks node %s" % node)
6444

    
6445
  def _GoReconnect(self, multimaster):
6446
    """Reconnect to the network.
6447

6448
    """
6449
    if multimaster:
6450
      msg = "dual-master"
6451
    else:
6452
      msg = "single-master"
6453
    self.feedback_fn("* changing disks into %s mode" % msg)
6454
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6455
                                           self.instance.disks,
6456
                                           self.instance.name, multimaster)
6457
    for node, nres in result.items():
6458
      nres.Raise("Cannot change disks config on node %s" % node)
6459

    
6460
  def _ExecCleanup(self):
6461
    """Try to cleanup after a failed migration.
6462

6463
    The cleanup is done by:
6464
      - check that the instance is running only on one node
6465
        (and update the config if needed)
6466
      - change disks on its secondary node to secondary
6467
      - wait until disks are fully synchronized
6468
      - disconnect from the network
6469
      - change disks into single-master mode
6470
      - wait again until disks are fully synchronized
6471

6472
    """
6473
    instance = self.instance
6474
    target_node = self.target_node
6475
    source_node = self.source_node
6476

    
6477
    # check running on only one node
6478
    self.feedback_fn("* checking where the instance actually runs"
6479
                     " (if this hangs, the hypervisor might be in"
6480
                     " a bad state)")
6481
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6482
    for node, result in ins_l.items():
6483
      result.Raise("Can't contact node %s" % node)
6484

    
6485
    runningon_source = instance.name in ins_l[source_node].payload
6486
    runningon_target = instance.name in ins_l[target_node].payload
6487

    
6488
    if runningon_source and runningon_target:
6489
      raise errors.OpExecError("Instance seems to be running on two nodes,"
6490
                               " or the hypervisor is confused. You will have"
6491
                               " to ensure manually that it runs only on one"
6492
                               " and restart this operation.")
6493

    
6494
    if not (runningon_source or runningon_target):
6495
      raise errors.OpExecError("Instance does not seem to be running at all."
6496
                               " In this case, it's safer to repair by"
6497
                               " running 'gnt-instance stop' to ensure disk"
6498
                               " shutdown, and then restarting it.")
6499

    
6500
    if runningon_target:
6501
      # the migration has actually succeeded, we need to update the config
6502
      self.feedback_fn("* instance running on secondary node (%s),"
6503
                       " updating config" % target_node)
6504
      instance.primary_node = target_node
6505
      self.cfg.Update(instance, self.feedback_fn)
6506
      demoted_node = source_node
6507
    else:
6508
      self.feedback_fn("* instance confirmed to be running on its"
6509
                       " primary node (%s)" % source_node)
6510
      demoted_node = target_node
6511

    
6512
    if instance.disk_template in constants.DTS_INT_MIRROR:
6513
      self._EnsureSecondary(demoted_node)
6514
      try:
6515
        self._WaitUntilSync()
6516
      except errors.OpExecError:
6517
        # we ignore here errors, since if the device is standalone, it
6518
        # won't be able to sync
6519
        pass
6520
      self._GoStandalone()
6521
      self._GoReconnect(False)
6522
      self._WaitUntilSync()
6523

    
6524
    self.feedback_fn("* done")
6525

    
6526
  def _RevertDiskStatus(self):
6527
    """Try to revert the disk status after a failed migration.
6528

6529
    """
6530
    target_node = self.target_node
6531
    if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6532
      return
6533

    
6534
    try:
6535
      self._EnsureSecondary(target_node)
6536
      self._GoStandalone()
6537
      self._GoReconnect(False)
6538
      self._WaitUntilSync()
6539
    except errors.OpExecError, err:
6540
      self.lu.LogWarning("Migration failed and I can't reconnect the"
6541
                         " drives: error '%s'\n"
6542
                         "Please look and recover the instance status" %
6543
                         str(err))
6544

    
6545
  def _AbortMigration(self):
6546
    """Call the hypervisor code to abort a started migration.
6547

6548
    """
6549
    instance = self.instance
6550
    target_node = self.target_node
6551
    migration_info = self.migration_info
6552

    
6553
    abort_result = self.rpc.call_finalize_migration(target_node,
6554
                                                    instance,
6555
                                                    migration_info,
6556
                                                    False)
6557
    abort_msg = abort_result.fail_msg
6558
    if abort_msg:
6559
      logging.error("Aborting migration failed on target node %s: %s",
6560
                    target_node, abort_msg)
6561
      # Don't raise an exception here, as we stil have to try to revert the
6562
      # disk status, even if this step failed.
6563

    
6564
  def _ExecMigration(self):
6565
    """Migrate an instance.
6566

6567
    The migrate is done by:
6568
      - change the disks into dual-master mode
6569
      - wait until disks are fully synchronized again
6570
      - migrate the instance
6571
      - change disks on the new secondary node (the old primary) to secondary
6572
      - wait until disks are fully synchronized
6573
      - change disks into single-master mode
6574

6575
    """
6576
    instance = self.instance
6577
    target_node = self.target_node
6578
    source_node = self.source_node
6579

    
6580
    self.feedback_fn("* checking disk consistency between source and target")
6581
    for dev in instance.disks:
6582
      if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6583
        raise errors.OpExecError("Disk %s is degraded or not fully"
6584
                                 " synchronized on target node,"
6585
                                 " aborting migrate." % dev.iv_name)
6586

    
6587
    # First get the migration information from the remote node
6588
    result = self.rpc.call_migration_info(source_node, instance)
6589
    msg = result.fail_msg
6590
    if msg:
6591
      log_err = ("Failed fetching source migration information from %s: %s" %
6592
                 (source_node, msg))
6593
      logging.error(log_err)
6594
      raise errors.OpExecError(log_err)
6595

    
6596
    self.migration_info = migration_info = result.payload
6597

    
6598
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6599
      # Then switch the disks to master/master mode
6600
      self._EnsureSecondary(target_node)
6601
      self._GoStandalone()
6602
      self._GoReconnect(True)
6603
      self._WaitUntilSync()
6604

    
6605
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
6606
    result = self.rpc.call_accept_instance(target_node,
6607
                                           instance,
6608
                                           migration_info,
6609
                                           self.nodes_ip[target_node])
6610

    
6611
    msg = result.fail_msg
6612
    if msg:
6613
      logging.error("Instance pre-migration failed, trying to revert"
6614
                    " disk status: %s", msg)
6615
      self.feedback_fn("Pre-migration failed, aborting")
6616
      self._AbortMigration()
6617
      self._RevertDiskStatus()
6618
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6619
                               (instance.name, msg))
6620

    
6621
    self.feedback_fn("* migrating instance to %s" % target_node)
6622
    time.sleep(10)
6623
    result = self.rpc.call_instance_migrate(source_node, instance,
6624
                                            self.nodes_ip[target_node],
6625
                                            self.live)
6626
    msg = result.fail_msg
6627
    if msg:
6628
      logging.error("Instance migration failed, trying to revert"
6629
                    " disk status: %s", msg)
6630
      self.feedback_fn("Migration failed, aborting")
6631
      self._AbortMigration()
6632
      self._RevertDiskStatus()
6633
      raise errors.OpExecError("Could not migrate instance %s: %s" %
6634
                               (instance.name, msg))
6635
    time.sleep(10)
6636

    
6637
    instance.primary_node = target_node
6638
    # distribute new instance config to the other nodes
6639
    self.cfg.Update(instance, self.feedback_fn)
6640

    
6641
    result = self.rpc.call_finalize_migration(target_node,
6642
                                              instance,
6643
                                              migration_info,
6644
                                              True)
6645
    msg = result.fail_msg
6646
    if msg:
6647
      logging.error("Instance migration succeeded, but finalization failed:"
6648
                    " %s", msg)
6649
      raise errors.OpExecError("Could not finalize instance migration: %s" %
6650
                               msg)
6651

    
6652
    if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6653
      self._EnsureSecondary(source_node)
6654
      self._WaitUntilSync()
6655
      self._GoStandalone()
6656
      self._GoReconnect(False)
6657
      self._WaitUntilSync()
6658

    
6659
    self.feedback_fn("* done")
6660

    
6661
  def Exec(self, feedback_fn):
6662
    """Perform the migration.
6663

6664
    """
6665
    feedback_fn("Migrating instance %s" % self.instance.name)
6666

    
6667
    self.feedback_fn = feedback_fn
6668

    
6669
    self.source_node = self.instance.primary_node
6670

    
6671
    # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6672
    if self.instance.disk_template in constants.DTS_INT_MIRROR:
6673
      self.target_node = self.instance.secondary_nodes[0]
6674
      # Otherwise self.target_node has been populated either
6675
      # directly, or through an iallocator.
6676

    
6677
    self.all_nodes = [self.source_node, self.target_node]
6678
    self.nodes_ip = {
6679
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6680
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6681
      }
6682

    
6683
    if self.cleanup:
6684
      return self._ExecCleanup()
6685
    else:
6686
      return self._ExecMigration()
6687

    
6688

    
6689
def _CreateBlockDev(lu, node, instance, device, force_create,
6690
                    info, force_open):
6691
  """Create a tree of block devices on a given node.
6692

6693
  If this device type has to be created on secondaries, create it and
6694
  all its children.
6695

6696
  If not, just recurse to children keeping the same 'force' value.
6697

6698
  @param lu: the lu on whose behalf we execute
6699
  @param node: the node on which to create the device
6700
  @type instance: L{objects.Instance}
6701
  @param instance: the instance which owns the device
6702
  @type device: L{objects.Disk}
6703
  @param device: the device to create
6704
  @type force_create: boolean
6705
  @param force_create: whether to force creation of this device; this
6706
      will be change to True whenever we find a device which has
6707
      CreateOnSecondary() attribute
6708
  @param info: the extra 'metadata' we should attach to the device
6709
      (this will be represented as a LVM tag)
6710
  @type force_open: boolean
6711
  @param force_open: this parameter will be passes to the
6712
      L{backend.BlockdevCreate} function where it specifies
6713
      whether we run on primary or not, and it affects both
6714
      the child assembly and the device own Open() execution
6715

6716
  """
6717
  if device.CreateOnSecondary():
6718
    force_create = True
6719

    
6720
  if device.children:
6721
    for child in device.children:
6722
      _CreateBlockDev(lu, node, instance, child, force_create,
6723
                      info, force_open)
6724

    
6725
  if not force_create:
6726
    return
6727

    
6728
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6729

    
6730

    
6731
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6732
  """Create a single block device on a given node.
6733

6734
  This will not recurse over children of the device, so they must be
6735
  created in advance.
6736

6737
  @param lu: the lu on whose behalf we execute
6738
  @param node: the node on which to create the device
6739
  @type instance: L{objects.Instance}
6740
  @param instance: the instance which owns the device
6741
  @type device: L{objects.Disk}
6742
  @param device: the device to create
6743
  @param info: the extra 'metadata' we should attach to the device
6744
      (this will be represented as a LVM tag)
6745
  @type force_open: boolean
6746
  @param force_open: this parameter will be passes to the
6747
      L{backend.BlockdevCreate} function where it specifies
6748
      whether we run on primary or not, and it affects both
6749
      the child assembly and the device own Open() execution
6750

6751
  """
6752
  lu.cfg.SetDiskID(device, node)
6753
  result = lu.rpc.call_blockdev_create(node, device, device.size,
6754
                                       instance.name, force_open, info)
6755
  result.Raise("Can't create block device %s on"
6756
               " node %s for instance %s" % (device, node, instance.name))
6757
  if device.physical_id is None:
6758
    device.physical_id = result.payload
6759

    
6760

    
6761
def _GenerateUniqueNames(lu, exts):
6762
  """Generate a suitable LV name.
6763

6764
  This will generate a logical volume name for the given instance.
6765

6766
  """
6767
  results = []
6768
  for val in exts:
6769
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6770
    results.append("%s%s" % (new_id, val))
6771
  return results
6772

    
6773

    
6774
def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6775
                         p_minor, s_minor):
6776
  """Generate a drbd8 device complete with its children.
6777

6778
  """
6779
  port = lu.cfg.AllocatePort()
6780
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6781
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6782
                          logical_id=(vgname, names[0]))
6783
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6784
                          logical_id=(vgname, names[1]))
6785
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6786
                          logical_id=(primary, secondary, port,
6787
                                      p_minor, s_minor,
6788
                                      shared_secret),
6789
                          children=[dev_data, dev_meta],
6790
                          iv_name=iv_name)
6791
  return drbd_dev
6792

    
6793

    
6794
def _GenerateDiskTemplate(lu, template_name,
6795
                          instance_name, primary_node,
6796
                          secondary_nodes, disk_info,
6797
                          file_storage_dir, file_driver,
6798
                          base_index, feedback_fn):
6799
  """Generate the entire disk layout for a given template type.
6800

6801
  """
6802
  #TODO: compute space requirements
6803

    
6804
  vgname = lu.cfg.GetVGName()
6805
  disk_count = len(disk_info)
6806
  disks = []
6807
  if template_name == constants.DT_DISKLESS:
6808
    pass
6809
  elif template_name == constants.DT_PLAIN:
6810
    if len(secondary_nodes) != 0:
6811
      raise errors.ProgrammerError("Wrong template configuration")
6812

    
6813
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6814
                                      for i in range(disk_count)])
6815
    for idx, disk in enumerate(disk_info):
6816
      disk_index = idx + base_index
6817
      vg = disk.get("vg", vgname)
6818
      feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6819
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6820
                              logical_id=(vg, names[idx]),
6821
                              iv_name="disk/%d" % disk_index,
6822
                              mode=disk["mode"])
6823
      disks.append(disk_dev)
6824
  elif template_name == constants.DT_DRBD8:
6825
    if len(secondary_nodes) != 1:
6826
      raise errors.ProgrammerError("Wrong template configuration")
6827
    remote_node = secondary_nodes[0]
6828
    minors = lu.cfg.AllocateDRBDMinor(
6829
      [primary_node, remote_node] * len(disk_info), instance_name)
6830

    
6831
    names = []
6832
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6833
                                               for i in range(disk_count)]):
6834
      names.append(lv_prefix + "_data")
6835
      names.append(lv_prefix + "_meta")
6836
    for idx, disk in enumerate(disk_info):
6837
      disk_index = idx + base_index
6838
      vg = disk.get("vg", vgname)
6839
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6840
                                      disk["size"], vg, names[idx*2:idx*2+2],
6841
                                      "disk/%d" % disk_index,
6842
                                      minors[idx*2], minors[idx*2+1])
6843
      disk_dev.mode = disk["mode"]
6844
      disks.append(disk_dev)
6845
  elif template_name == constants.DT_FILE:
6846
    if len(secondary_nodes) != 0:
6847
      raise errors.ProgrammerError("Wrong template configuration")
6848

    
6849
    opcodes.RequireFileStorage()
6850

    
6851
    for idx, disk in enumerate(disk_info):
6852
      disk_index = idx + base_index
6853
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6854
                              iv_name="disk/%d" % disk_index,
6855
                              logical_id=(file_driver,
6856
                                          "%s/disk%d" % (file_storage_dir,
6857
                                                         disk_index)),
6858
                              mode=disk["mode"])
6859
      disks.append(disk_dev)
6860
  elif template_name == constants.DT_SHARED_FILE:
6861
    if len(secondary_nodes) != 0:
6862
      raise errors.ProgrammerError("Wrong template configuration")
6863

    
6864
    opcodes.RequireSharedFileStorage()
6865

    
6866
    for idx, disk in enumerate(disk_info):
6867
      disk_index = idx + base_index
6868
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6869
                              iv_name="disk/%d" % disk_index,
6870
                              logical_id=(file_driver,
6871
                                          "%s/disk%d" % (file_storage_dir,
6872
                                                         disk_index)),
6873
                              mode=disk["mode"])
6874
      disks.append(disk_dev)
6875
  elif template_name == constants.DT_BLOCK:
6876
    if len(secondary_nodes) != 0:
6877
      raise errors.ProgrammerError("Wrong template configuration")
6878

    
6879
    for idx, disk in enumerate(disk_info):
6880
      disk_index = idx + base_index
6881
      disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV, size=disk["size"],
6882
                              logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
6883
                                          disk["adopt"]),
6884
                              iv_name="disk/%d" % disk_index,
6885
                              mode=disk["mode"])
6886
      disks.append(disk_dev)
6887

    
6888
  else:
6889
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6890
  return disks
6891

    
6892

    
6893
def _GetInstanceInfoText(instance):
6894
  """Compute that text that should be added to the disk's metadata.
6895

6896
  """
6897
  return "originstname+%s" % instance.name
6898

    
6899

    
6900
def _CalcEta(time_taken, written, total_size):
6901
  """Calculates the ETA based on size written and total size.
6902

6903
  @param time_taken: The time taken so far
6904
  @param written: amount written so far
6905
  @param total_size: The total size of data to be written
6906
  @return: The remaining time in seconds
6907

6908
  """
6909
  avg_time = time_taken / float(written)
6910
  return (total_size - written) * avg_time
6911

    
6912

    
6913
def _WipeDisks(lu, instance):
6914
  """Wipes instance disks.
6915

6916
  @type lu: L{LogicalUnit}
6917
  @param lu: the logical unit on whose behalf we execute
6918
  @type instance: L{objects.Instance}
6919
  @param instance: the instance whose disks we should create
6920
  @return: the success of the wipe
6921

6922
  """
6923
  node = instance.primary_node
6924

    
6925
  for device in instance.disks:
6926
    lu.cfg.SetDiskID(device, node)
6927

    
6928
  logging.info("Pause sync of instance %s disks", instance.name)
6929
  result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
6930

    
6931
  for idx, success in enumerate(result.payload):
6932
    if not success:
6933
      logging.warn("pause-sync of instance %s for disks %d failed",
6934
                   instance.name, idx)
6935

    
6936
  try:
6937
    for idx, device in enumerate(instance.disks):
6938
      lu.LogInfo("* Wiping disk %d", idx)
6939
      logging.info("Wiping disk %d for instance %s, node %s",
6940
                   idx, instance.name, node)
6941

    
6942
      # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6943
      # MAX_WIPE_CHUNK at max
6944
      wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6945
                            constants.MIN_WIPE_CHUNK_PERCENT)
6946

    
6947
      offset = 0
6948
      size = device.size
6949
      last_output = 0
6950
      start_time = time.time()
6951

    
6952
      while offset < size:
6953
        wipe_size = min(wipe_chunk_size, size - offset)
6954
        result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6955
        result.Raise("Could not wipe disk %d at offset %d for size %d" %
6956
                     (idx, offset, wipe_size))
6957
        now = time.time()
6958
        offset += wipe_size
6959
        if now - last_output >= 60:
6960
          eta = _CalcEta(now - start_time, offset, size)
6961
          lu.LogInfo(" - done: %.1f%% ETA: %s" %
6962
                     (offset / float(size) * 100, utils.FormatSeconds(eta)))
6963
          last_output = now
6964
  finally:
6965
    logging.info("Resume sync of instance %s disks", instance.name)
6966

    
6967
    result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
6968

    
6969
    for idx, success in enumerate(result.payload):
6970
      if not success:
6971
        lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
6972
                      " look at the status and troubleshoot the issue.", idx)
6973
        logging.warn("resume-sync of instance %s for disks %d failed",
6974
                     instance.name, idx)
6975

    
6976

    
6977
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6978
  """Create all disks for an instance.
6979

6980
  This abstracts away some work from AddInstance.
6981

6982
  @type lu: L{LogicalUnit}
6983
  @param lu: the logical unit on whose behalf we execute
6984
  @type instance: L{objects.Instance}
6985
  @param instance: the instance whose disks we should create
6986
  @type to_skip: list
6987
  @param to_skip: list of indices to skip
6988
  @type target_node: string
6989
  @param target_node: if passed, overrides the target node for creation
6990
  @rtype: boolean
6991
  @return: the success of the creation
6992

6993
  """
6994
  info = _GetInstanceInfoText(instance)
6995
  if target_node is None:
6996
    pnode = instance.primary_node
6997
    all_nodes = instance.all_nodes
6998
  else:
6999
    pnode = target_node
7000
    all_nodes = [pnode]
7001

    
7002
  if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7003
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7004
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7005

    
7006
    result.Raise("Failed to create directory '%s' on"
7007
                 " node %s" % (file_storage_dir, pnode))
7008

    
7009
  # Note: this needs to be kept in sync with adding of disks in
7010
  # LUInstanceSetParams
7011
  for idx, device in enumerate(instance.disks):
7012
    if to_skip and idx in to_skip:
7013
      continue
7014
    logging.info("Creating volume %s for instance %s",
7015
                 device.iv_name, instance.name)
7016
    #HARDCODE
7017
    for node in all_nodes:
7018
      f_create = node == pnode
7019
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7020

    
7021

    
7022
def _RemoveDisks(lu, instance, target_node=None):
7023
  """Remove all disks for an instance.
7024

7025
  This abstracts away some work from `AddInstance()` and
7026
  `RemoveInstance()`. Note that in case some of the devices couldn't
7027
  be removed, the removal will continue with the other ones (compare
7028
  with `_CreateDisks()`).
7029

7030
  @type lu: L{LogicalUnit}
7031
  @param lu: the logical unit on whose behalf we execute
7032
  @type instance: L{objects.Instance}
7033
  @param instance: the instance whose disks we should remove
7034
  @type target_node: string
7035
  @param target_node: used to override the node on which to remove the disks
7036
  @rtype: boolean
7037
  @return: the success of the removal
7038

7039
  """
7040
  logging.info("Removing block devices for instance %s", instance.name)
7041

    
7042
  all_result = True
7043
  for device in instance.disks:
7044
    if target_node:
7045
      edata = [(target_node, device)]
7046
    else:
7047
      edata = device.ComputeNodeTree(instance.primary_node)
7048
    for node, disk in edata:
7049
      lu.cfg.SetDiskID(disk, node)
7050
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7051
      if msg:
7052
        lu.LogWarning("Could not remove block device %s on node %s,"
7053
                      " continuing anyway: %s", device.iv_name, node, msg)
7054
        all_result = False
7055

    
7056
  if instance.disk_template == constants.DT_FILE:
7057
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7058
    if target_node:
7059
      tgt = target_node
7060
    else:
7061
      tgt = instance.primary_node
7062
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7063
    if result.fail_msg:
7064
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7065
                    file_storage_dir, instance.primary_node, result.fail_msg)
7066
      all_result = False
7067

    
7068
  return all_result
7069

    
7070

    
7071
def _ComputeDiskSizePerVG(disk_template, disks):
7072
  """Compute disk size requirements in the volume group
7073

7074
  """
7075
  def _compute(disks, payload):
7076
    """Universal algorithm
7077

7078
    """
7079
    vgs = {}
7080
    for disk in disks:
7081
      vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
7082

    
7083
    return vgs
7084

    
7085
  # Required free disk space as a function of disk and swap space
7086
  req_size_dict = {
7087
    constants.DT_DISKLESS: {},
7088
    constants.DT_PLAIN: _compute(disks, 0),
7089
    # 128 MB are added for drbd metadata for each disk
7090
    constants.DT_DRBD8: _compute(disks, 128),
7091
    constants.DT_FILE: {},
7092
    constants.DT_SHARED_FILE: {},
7093
  }
7094

    
7095
  if disk_template not in req_size_dict:
7096
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7097
                                 " is unknown" %  disk_template)
7098

    
7099
  return req_size_dict[disk_template]
7100

    
7101

    
7102
def _ComputeDiskSize(disk_template, disks):
7103
  """Compute disk size requirements in the volume group
7104

7105
  """
7106
  # Required free disk space as a function of disk and swap space
7107
  req_size_dict = {
7108
    constants.DT_DISKLESS: None,
7109
    constants.DT_PLAIN: sum(d["size"] for d in disks),
7110
    # 128 MB are added for drbd metadata for each disk
7111
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
7112
    constants.DT_FILE: None,
7113
    constants.DT_SHARED_FILE: 0,
7114
    constants.DT_BLOCK: 0,
7115
  }
7116

    
7117
  if disk_template not in req_size_dict:
7118
    raise errors.ProgrammerError("Disk template '%s' size requirement"
7119
                                 " is unknown" %  disk_template)
7120

    
7121
  return req_size_dict[disk_template]
7122

    
7123

    
7124
def _FilterVmNodes(lu, nodenames):
7125
  """Filters out non-vm_capable nodes from a list.
7126

7127
  @type lu: L{LogicalUnit}
7128
  @param lu: the logical unit for which we check
7129
  @type nodenames: list
7130
  @param nodenames: the list of nodes on which we should check
7131
  @rtype: list
7132
  @return: the list of vm-capable nodes
7133

7134
  """
7135
  vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7136
  return [name for name in nodenames if name not in vm_nodes]
7137

    
7138

    
7139
def _CheckHVParams(lu, nodenames, hvname, hvparams):
7140
  """Hypervisor parameter validation.
7141

7142
  This function abstract the hypervisor parameter validation to be
7143
  used in both instance create and instance modify.
7144

7145
  @type lu: L{LogicalUnit}
7146
  @param lu: the logical unit for which we check
7147
  @type nodenames: list
7148
  @param nodenames: the list of nodes on which we should check
7149
  @type hvname: string
7150
  @param hvname: the name of the hypervisor we should use
7151
  @type hvparams: dict
7152
  @param hvparams: the parameters which we need to check
7153
  @raise errors.OpPrereqError: if the parameters are not valid
7154

7155
  """
7156
  nodenames = _FilterVmNodes(lu, nodenames)
7157
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7158
                                                  hvname,
7159
                                                  hvparams)
7160
  for node in nodenames:
7161
    info = hvinfo[node]
7162
    if info.offline:
7163
      continue
7164
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
7165

    
7166

    
7167
def _CheckOSParams(lu, required, nodenames, osname, osparams):
7168
  """OS parameters validation.
7169

7170
  @type lu: L{LogicalUnit}
7171
  @param lu: the logical unit for which we check
7172
  @type required: boolean
7173
  @param required: whether the validation should fail if the OS is not
7174
      found
7175
  @type nodenames: list
7176
  @param nodenames: the list of nodes on which we should check
7177
  @type osname: string
7178
  @param osname: the name of the hypervisor we should use
7179
  @type osparams: dict
7180
  @param osparams: the parameters which we need to check
7181
  @raise errors.OpPrereqError: if the parameters are not valid
7182

7183
  """
7184
  nodenames = _FilterVmNodes(lu, nodenames)
7185
  result = lu.rpc.call_os_validate(required, nodenames, osname,
7186
                                   [constants.OS_VALIDATE_PARAMETERS],
7187
                                   osparams)
7188
  for node, nres in result.items():
7189
    # we don't check for offline cases since this should be run only
7190
    # against the master node and/or an instance's nodes
7191
    nres.Raise("OS Parameters validation failed on node %s" % node)
7192
    if not nres.payload:
7193
      lu.LogInfo("OS %s not found on node %s, validation skipped",
7194
                 osname, node)
7195

    
7196

    
7197
class LUInstanceCreate(LogicalUnit):
7198
  """Create an instance.
7199

7200
  """
7201
  HPATH = "instance-add"
7202
  HTYPE = constants.HTYPE_INSTANCE
7203
  REQ_BGL = False
7204

    
7205
  def CheckArguments(self):
7206
    """Check arguments.
7207

7208
    """
7209
    # do not require name_check to ease forward/backward compatibility
7210
    # for tools
7211
    if self.op.no_install and self.op.start:
7212
      self.LogInfo("No-installation mode selected, disabling startup")
7213
      self.op.start = False
7214
    # validate/normalize the instance name
7215
    self.op.instance_name = \
7216
      netutils.Hostname.GetNormalizedName(self.op.instance_name)
7217

    
7218
    if self.op.ip_check and not self.op.name_check:
7219
      # TODO: make the ip check more flexible and not depend on the name check
7220
      raise errors.OpPrereqError("Cannot do ip check without a name check",
7221
                                 errors.ECODE_INVAL)
7222

    
7223
    # check nics' parameter names
7224
    for nic in self.op.nics:
7225
      utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7226

    
7227
    # check disks. parameter names and consistent adopt/no-adopt strategy
7228
    has_adopt = has_no_adopt = False
7229
    for disk in self.op.disks:
7230
      utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7231
      if "adopt" in disk:
7232
        has_adopt = True
7233
      else:
7234
        has_no_adopt = True
7235
    if has_adopt and has_no_adopt:
7236
      raise errors.OpPrereqError("Either all disks are adopted or none is",
7237
                                 errors.ECODE_INVAL)
7238
    if has_adopt:
7239
      if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7240
        raise errors.OpPrereqError("Disk adoption is not supported for the"
7241
                                   " '%s' disk template" %
7242
                                   self.op.disk_template,
7243
                                   errors.ECODE_INVAL)
7244
      if self.op.iallocator is not None:
7245
        raise errors.OpPrereqError("Disk adoption not allowed with an"
7246
                                   " iallocator script", errors.ECODE_INVAL)
7247
      if self.op.mode == constants.INSTANCE_IMPORT:
7248
        raise errors.OpPrereqError("Disk adoption not allowed for"
7249
                                   " instance import", errors.ECODE_INVAL)
7250
    else:
7251
      if self.op.disk_template in constants.DTS_MUST_ADOPT:
7252
        raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7253
                                   " but no 'adopt' parameter given" %
7254
                                   self.op.disk_template,
7255
                                   errors.ECODE_INVAL)
7256

    
7257
    self.adopt_disks = has_adopt
7258

    
7259
    # instance name verification
7260
    if self.op.name_check:
7261
      self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7262
      self.op.instance_name = self.hostname1.name
7263
      # used in CheckPrereq for ip ping check
7264
      self.check_ip = self.hostname1.ip
7265
    else:
7266
      self.check_ip = None
7267

    
7268
    # file storage checks
7269
    if (self.op.file_driver and
7270
        not self.op.file_driver in constants.FILE_DRIVER):
7271
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
7272
                                 self.op.file_driver, errors.ECODE_INVAL)
7273

    
7274
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7275
      raise errors.OpPrereqError("File storage directory path not absolute",
7276
                                 errors.ECODE_INVAL)
7277

    
7278
    ### Node/iallocator related checks
7279
    _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7280

    
7281
    if self.op.pnode is not None:
7282
      if self.op.disk_template in constants.DTS_INT_MIRROR:
7283
        if self.op.snode is None:
7284
          raise errors.OpPrereqError("The networked disk templates need"
7285
                                     " a mirror node", errors.ECODE_INVAL)
7286
      elif self.op.snode:
7287
        self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7288
                        " template")
7289
        self.op.snode = None
7290

    
7291
    self._cds = _GetClusterDomainSecret()
7292

    
7293
    if self.op.mode == constants.INSTANCE_IMPORT:
7294
      # On import force_variant must be True, because if we forced it at
7295
      # initial install, our only chance when importing it back is that it
7296
      # works again!
7297
      self.op.force_variant = True
7298

    
7299
      if self.op.no_install:
7300
        self.LogInfo("No-installation mode has no effect during import")
7301

    
7302
    elif self.op.mode == constants.INSTANCE_CREATE:
7303
      if self.op.os_type is None:
7304
        raise errors.OpPrereqError("No guest OS specified",
7305
                                   errors.ECODE_INVAL)
7306
      if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7307
        raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7308
                                   " installation" % self.op.os_type,
7309
                                   errors.ECODE_STATE)
7310
      if self.op.disk_template is None:
7311
        raise errors.OpPrereqError("No disk template specified",
7312
                                   errors.ECODE_INVAL)
7313

    
7314
    elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7315
      # Check handshake to ensure both clusters have the same domain secret
7316
      src_handshake = self.op.source_handshake
7317
      if not src_handshake:
7318
        raise errors.OpPrereqError("Missing source handshake",
7319
                                   errors.ECODE_INVAL)
7320

    
7321
      errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7322
                                                           src_handshake)
7323
      if errmsg:
7324
        raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7325
                                   errors.ECODE_INVAL)
7326

    
7327
      # Load and check source CA
7328
      self.source_x509_ca_pem = self.op.source_x509_ca
7329
      if not self.source_x509_ca_pem:
7330
        raise errors.OpPrereqError("Missing source X509 CA",
7331
                                   errors.ECODE_INVAL)
7332

    
7333
      try:
7334
        (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7335
                                                    self._cds)
7336
      except OpenSSL.crypto.Error, err:
7337
        raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7338
                                   (err, ), errors.ECODE_INVAL)
7339

    
7340
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7341
      if errcode is not None:
7342
        raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7343
                                   errors.ECODE_INVAL)
7344

    
7345
      self.source_x509_ca = cert
7346

    
7347
      src_instance_name = self.op.source_instance_name
7348
      if not src_instance_name:
7349
        raise errors.OpPrereqError("Missing source instance name",
7350
                                   errors.ECODE_INVAL)
7351

    
7352
      self.source_instance_name = \
7353
          netutils.GetHostname(name=src_instance_name).name
7354

    
7355
    else:
7356
      raise errors.OpPrereqError("Invalid instance creation mode %r" %
7357
                                 self.op.mode, errors.ECODE_INVAL)
7358

    
7359
  def ExpandNames(self):
7360
    """ExpandNames for CreateInstance.
7361

7362
    Figure out the right locks for instance creation.
7363

7364
    """
7365
    self.needed_locks = {}
7366

    
7367
    instance_name = self.op.instance_name
7368
    # this is just a preventive check, but someone might still add this
7369
    # instance in the meantime, and creation will fail at lock-add time
7370
    if instance_name in self.cfg.GetInstanceList():
7371
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7372
                                 instance_name, errors.ECODE_EXISTS)
7373

    
7374
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7375

    
7376
    if self.op.iallocator:
7377
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7378
    else:
7379
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7380
      nodelist = [self.op.pnode]
7381
      if self.op.snode is not None:
7382
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7383
        nodelist.append(self.op.snode)
7384
      self.needed_locks[locking.LEVEL_NODE] = nodelist
7385

    
7386
    # in case of import lock the source node too
7387
    if self.op.mode == constants.INSTANCE_IMPORT:
7388
      src_node = self.op.src_node
7389
      src_path = self.op.src_path
7390

    
7391
      if src_path is None:
7392
        self.op.src_path = src_path = self.op.instance_name
7393

    
7394
      if src_node is None:
7395
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7396
        self.op.src_node = None
7397
        if os.path.isabs(src_path):
7398
          raise errors.OpPrereqError("Importing an instance from an absolute"
7399
                                     " path requires a source node option.",
7400
                                     errors.ECODE_INVAL)
7401
      else:
7402
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7403
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7404
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
7405
        if not os.path.isabs(src_path):
7406
          self.op.src_path = src_path = \
7407
            utils.PathJoin(constants.EXPORT_DIR, src_path)
7408

    
7409
  def _RunAllocator(self):
7410
    """Run the allocator based on input opcode.
7411

7412
    """
7413
    nics = [n.ToDict() for n in self.nics]
7414
    ial = IAllocator(self.cfg, self.rpc,
7415
                     mode=constants.IALLOCATOR_MODE_ALLOC,
7416
                     name=self.op.instance_name,
7417
                     disk_template=self.op.disk_template,
7418
                     tags=[],
7419
                     os=self.op.os_type,
7420
                     vcpus=self.be_full[constants.BE_VCPUS],
7421
                     mem_size=self.be_full[constants.BE_MEMORY],
7422
                     disks=self.disks,
7423
                     nics=nics,
7424
                     hypervisor=self.op.hypervisor,
7425
                     )
7426

    
7427
    ial.Run(self.op.iallocator)
7428

    
7429
    if not ial.success:
7430
      raise errors.OpPrereqError("Can't compute nodes using"
7431
                                 " iallocator '%s': %s" %
7432
                                 (self.op.iallocator, ial.info),
7433
                                 errors.ECODE_NORES)
7434
    if len(ial.result) != ial.required_nodes:
7435
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7436
                                 " of nodes (%s), required %s" %
7437
                                 (self.op.iallocator, len(ial.result),
7438
                                  ial.required_nodes), errors.ECODE_FAULT)
7439
    self.op.pnode = ial.result[0]
7440
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7441
                 self.op.instance_name, self.op.iallocator,
7442
                 utils.CommaJoin(ial.result))
7443
    if ial.required_nodes == 2:
7444
      self.op.snode = ial.result[1]
7445

    
7446
  def BuildHooksEnv(self):
7447
    """Build hooks env.
7448

7449
    This runs on master, primary and secondary nodes of the instance.
7450

7451
    """
7452
    env = {
7453
      "ADD_MODE": self.op.mode,
7454
      }
7455
    if self.op.mode == constants.INSTANCE_IMPORT:
7456
      env["SRC_NODE"] = self.op.src_node
7457
      env["SRC_PATH"] = self.op.src_path
7458
      env["SRC_IMAGES"] = self.src_images
7459

    
7460
    env.update(_BuildInstanceHookEnv(
7461
      name=self.op.instance_name,
7462
      primary_node=self.op.pnode,
7463
      secondary_nodes=self.secondaries,
7464
      status=self.op.start,
7465
      os_type=self.op.os_type,
7466
      memory=self.be_full[constants.BE_MEMORY],
7467
      vcpus=self.be_full[constants.BE_VCPUS],
7468
      nics=_NICListToTuple(self, self.nics),
7469
      disk_template=self.op.disk_template,
7470
      disks=[(d["size"], d["mode"]) for d in self.disks],
7471
      bep=self.be_full,
7472
      hvp=self.hv_full,
7473
      hypervisor_name=self.op.hypervisor,
7474
    ))
7475

    
7476
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7477
          self.secondaries)
7478
    return env, nl, nl
7479

    
7480
  def _ReadExportInfo(self):
7481
    """Reads the export information from disk.
7482

7483
    It will override the opcode source node and path with the actual
7484
    information, if these two were not specified before.
7485

7486
    @return: the export information
7487

7488
    """
7489
    assert self.op.mode == constants.INSTANCE_IMPORT
7490

    
7491
    src_node = self.op.src_node
7492
    src_path = self.op.src_path
7493

    
7494
    if src_node is None:
7495
      locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7496
      exp_list = self.rpc.call_export_list(locked_nodes)
7497
      found = False
7498
      for node in exp_list:
7499
        if exp_list[node].fail_msg:
7500
          continue
7501
        if src_path in exp_list[node].payload:
7502
          found = True
7503
          self.op.src_node = src_node = node
7504
          self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7505
                                                       src_path)
7506
          break
7507
      if not found:
7508
        raise errors.OpPrereqError("No export found for relative path %s" %
7509
                                    src_path, errors.ECODE_INVAL)
7510

    
7511
    _CheckNodeOnline(self, src_node)
7512
    result = self.rpc.call_export_info(src_node, src_path)
7513
    result.Raise("No export or invalid export found in dir %s" % src_path)
7514

    
7515
    export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7516
    if not export_info.has_section(constants.INISECT_EXP):
7517
      raise errors.ProgrammerError("Corrupted export config",
7518
                                   errors.ECODE_ENVIRON)
7519

    
7520
    ei_version = export_info.get(constants.INISECT_EXP, "version")
7521
    if (int(ei_version) != constants.EXPORT_VERSION):
7522
      raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7523
                                 (ei_version, constants.EXPORT_VERSION),
7524
                                 errors.ECODE_ENVIRON)
7525
    return export_info
7526

    
7527
  def _ReadExportParams(self, einfo):
7528
    """Use export parameters as defaults.
7529

7530
    In case the opcode doesn't specify (as in override) some instance
7531
    parameters, then try to use them from the export information, if
7532
    that declares them.
7533

7534
    """
7535
    self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7536

    
7537
    if self.op.disk_template is None:
7538
      if einfo.has_option(constants.INISECT_INS, "disk_template"):
7539
        self.op.disk_template = einfo.get(constants.INISECT_INS,
7540
                                          "disk_template")
7541
      else:
7542
        raise errors.OpPrereqError("No disk template specified and the export"
7543
                                   " is missing the disk_template information",
7544
                                   errors.ECODE_INVAL)
7545

    
7546
    if not self.op.disks:
7547
      if einfo.has_option(constants.INISECT_INS, "disk_count"):
7548
        disks = []
7549
        # TODO: import the disk iv_name too
7550
        for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7551
          disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7552
          disks.append({"size": disk_sz})
7553
        self.op.disks = disks
7554
      else:
7555
        raise errors.OpPrereqError("No disk info specified and the export"
7556
                                   " is missing the disk information",
7557
                                   errors.ECODE_INVAL)
7558

    
7559
    if (not self.op.nics and
7560
        einfo.has_option(constants.INISECT_INS, "nic_count")):
7561
      nics = []
7562
      for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7563
        ndict = {}
7564
        for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7565
          v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7566
          ndict[name] = v
7567
        nics.append(ndict)
7568
      self.op.nics = nics
7569

    
7570
    if (self.op.hypervisor is None and
7571
        einfo.has_option(constants.INISECT_INS, "hypervisor")):
7572
      self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7573
    if einfo.has_section(constants.INISECT_HYP):
7574
      # use the export parameters but do not override the ones
7575
      # specified by the user
7576
      for name, value in einfo.items(constants.INISECT_HYP):
7577
        if name not in self.op.hvparams:
7578
          self.op.hvparams[name] = value
7579

    
7580
    if einfo.has_section(constants.INISECT_BEP):
7581
      # use the parameters, without overriding
7582
      for name, value in einfo.items(constants.INISECT_BEP):
7583
        if name not in self.op.beparams:
7584
          self.op.beparams[name] = value
7585
    else:
7586
      # try to read the parameters old style, from the main section
7587
      for name in constants.BES_PARAMETERS:
7588
        if (name not in self.op.beparams and
7589
            einfo.has_option(constants.INISECT_INS, name)):
7590
          self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7591

    
7592
    if einfo.has_section(constants.INISECT_OSP):
7593
      # use the parameters, without overriding
7594
      for name, value in einfo.items(constants.INISECT_OSP):
7595
        if name not in self.op.osparams:
7596
          self.op.osparams[name] = value
7597

    
7598
  def _RevertToDefaults(self, cluster):
7599
    """Revert the instance parameters to the default values.
7600

7601
    """
7602
    # hvparams
7603
    hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7604
    for name in self.op.hvparams.keys():
7605
      if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7606
        del self.op.hvparams[name]
7607
    # beparams
7608
    be_defs = cluster.SimpleFillBE({})
7609
    for name in self.op.beparams.keys():
7610
      if name in be_defs and be_defs[name] == self.op.beparams[name]:
7611
        del self.op.beparams[name]
7612
    # nic params
7613
    nic_defs = cluster.SimpleFillNIC({})
7614
    for nic in self.op.nics:
7615
      for name in constants.NICS_PARAMETERS:
7616
        if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7617
          del nic[name]
7618
    # osparams
7619
    os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7620
    for name in self.op.osparams.keys():
7621
      if name in os_defs and os_defs[name] == self.op.osparams[name]:
7622
        del self.op.osparams[name]
7623

    
7624
  def CheckPrereq(self):
7625
    """Check prerequisites.
7626

7627
    """
7628
    if self.op.mode == constants.INSTANCE_IMPORT:
7629
      export_info = self._ReadExportInfo()
7630
      self._ReadExportParams(export_info)
7631

    
7632
    if (not self.cfg.GetVGName() and
7633
        self.op.disk_template not in constants.DTS_NOT_LVM):
7634
      raise errors.OpPrereqError("Cluster does not support lvm-based"
7635
                                 " instances", errors.ECODE_STATE)
7636

    
7637
    if self.op.hypervisor is None:
7638
      self.op.hypervisor = self.cfg.GetHypervisorType()
7639

    
7640
    cluster = self.cfg.GetClusterInfo()
7641
    enabled_hvs = cluster.enabled_hypervisors
7642
    if self.op.hypervisor not in enabled_hvs:
7643
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7644
                                 " cluster (%s)" % (self.op.hypervisor,
7645
                                  ",".join(enabled_hvs)),
7646
                                 errors.ECODE_STATE)
7647

    
7648
    # check hypervisor parameter syntax (locally)
7649
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7650
    filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7651
                                      self.op.hvparams)
7652
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7653
    hv_type.CheckParameterSyntax(filled_hvp)
7654
    self.hv_full = filled_hvp
7655
    # check that we don't specify global parameters on an instance
7656
    _CheckGlobalHvParams(self.op.hvparams)
7657

    
7658
    # fill and remember the beparams dict
7659
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7660
    self.be_full = cluster.SimpleFillBE(self.op.beparams)
7661

    
7662
    # build os parameters
7663
    self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7664

    
7665
    # now that hvp/bep are in final format, let's reset to defaults,
7666
    # if told to do so
7667
    if self.op.identify_defaults:
7668
      self._RevertToDefaults(cluster)
7669

    
7670
    # NIC buildup
7671
    self.nics = []
7672
    for idx, nic in enumerate(self.op.nics):
7673
      nic_mode_req = nic.get("mode", None)
7674
      nic_mode = nic_mode_req
7675
      if nic_mode is None:
7676
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7677

    
7678
      # in routed mode, for the first nic, the default ip is 'auto'
7679
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7680
        default_ip_mode = constants.VALUE_AUTO
7681
      else:
7682
        default_ip_mode = constants.VALUE_NONE
7683

    
7684
      # ip validity checks
7685
      ip = nic.get("ip", default_ip_mode)
7686
      if ip is None or ip.lower() == constants.VALUE_NONE:
7687
        nic_ip = None
7688
      elif ip.lower() == constants.VALUE_AUTO:
7689
        if not self.op.name_check:
7690
          raise errors.OpPrereqError("IP address set to auto but name checks"
7691
                                     " have been skipped",
7692
                                     errors.ECODE_INVAL)
7693
        nic_ip = self.hostname1.ip
7694
      else:
7695
        if not netutils.IPAddress.IsValid(ip):
7696
          raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7697
                                     errors.ECODE_INVAL)
7698
        nic_ip = ip
7699

    
7700
      # TODO: check the ip address for uniqueness
7701
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7702
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
7703
                                   errors.ECODE_INVAL)
7704

    
7705
      # MAC address verification
7706
      mac = nic.get("mac", constants.VALUE_AUTO)
7707
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7708
        mac = utils.NormalizeAndValidateMac(mac)
7709

    
7710
        try:
7711
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
7712
        except errors.ReservationError:
7713
          raise errors.OpPrereqError("MAC address %s already in use"
7714
                                     " in cluster" % mac,
7715
                                     errors.ECODE_NOTUNIQUE)
7716

    
7717
      #  Build nic parameters
7718
      link = nic.get(constants.INIC_LINK, None)
7719
      nicparams = {}
7720
      if nic_mode_req:
7721
        nicparams[constants.NIC_MODE] = nic_mode_req
7722
      if link:
7723
        nicparams[constants.NIC_LINK] = link
7724

    
7725
      check_params = cluster.SimpleFillNIC(nicparams)
7726
      objects.NIC.CheckParameterSyntax(check_params)
7727
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7728

    
7729
    # disk checks/pre-build
7730
    self.disks = []
7731
    for disk in self.op.disks:
7732
      mode = disk.get("mode", constants.DISK_RDWR)
7733
      if mode not in constants.DISK_ACCESS_SET:
7734
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7735
                                   mode, errors.ECODE_INVAL)
7736
      size = disk.get("size", None)
7737
      if size is None:
7738
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7739
      try:
7740
        size = int(size)
7741
      except (TypeError, ValueError):
7742
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7743
                                   errors.ECODE_INVAL)
7744
      vg = disk.get("vg", self.cfg.GetVGName())
7745
      new_disk = {"size": size, "mode": mode, "vg": vg}
7746
      if "adopt" in disk:
7747
        new_disk["adopt"] = disk["adopt"]
7748
      self.disks.append(new_disk)
7749

    
7750
    if self.op.mode == constants.INSTANCE_IMPORT:
7751

    
7752
      # Check that the new instance doesn't have less disks than the export
7753
      instance_disks = len(self.disks)
7754
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7755
      if instance_disks < export_disks:
7756
        raise errors.OpPrereqError("Not enough disks to import."
7757
                                   " (instance: %d, export: %d)" %
7758
                                   (instance_disks, export_disks),
7759
                                   errors.ECODE_INVAL)
7760

    
7761
      disk_images = []
7762
      for idx in range(export_disks):
7763
        option = 'disk%d_dump' % idx
7764
        if export_info.has_option(constants.INISECT_INS, option):
7765
          # FIXME: are the old os-es, disk sizes, etc. useful?
7766
          export_name = export_info.get(constants.INISECT_INS, option)
7767
          image = utils.PathJoin(self.op.src_path, export_name)
7768
          disk_images.append(image)
7769
        else:
7770
          disk_images.append(False)
7771

    
7772
      self.src_images = disk_images
7773

    
7774
      old_name = export_info.get(constants.INISECT_INS, 'name')
7775
      try:
7776
        exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7777
      except (TypeError, ValueError), err:
7778
        raise errors.OpPrereqError("Invalid export file, nic_count is not"
7779
                                   " an integer: %s" % str(err),
7780
                                   errors.ECODE_STATE)
7781
      if self.op.instance_name == old_name:
7782
        for idx, nic in enumerate(self.nics):
7783
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7784
            nic_mac_ini = 'nic%d_mac' % idx
7785
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7786

    
7787
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7788

    
7789
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
7790
    if self.op.ip_check:
7791
      if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7792
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
7793
                                   (self.check_ip, self.op.instance_name),
7794
                                   errors.ECODE_NOTUNIQUE)
7795

    
7796
    #### mac address generation
7797
    # By generating here the mac address both the allocator and the hooks get
7798
    # the real final mac address rather than the 'auto' or 'generate' value.
7799
    # There is a race condition between the generation and the instance object
7800
    # creation, which means that we know the mac is valid now, but we're not
7801
    # sure it will be when we actually add the instance. If things go bad
7802
    # adding the instance will abort because of a duplicate mac, and the
7803
    # creation job will fail.
7804
    for nic in self.nics:
7805
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7806
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7807

    
7808
    #### allocator run
7809

    
7810
    if self.op.iallocator is not None:
7811
      self._RunAllocator()
7812

    
7813
    #### node related checks
7814

    
7815
    # check primary node
7816
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7817
    assert self.pnode is not None, \
7818
      "Cannot retrieve locked node %s" % self.op.pnode
7819
    if pnode.offline:
7820
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7821
                                 pnode.name, errors.ECODE_STATE)
7822
    if pnode.drained:
7823
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7824
                                 pnode.name, errors.ECODE_STATE)
7825
    if not pnode.vm_capable:
7826
      raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7827
                                 " '%s'" % pnode.name, errors.ECODE_STATE)
7828

    
7829
    self.secondaries = []
7830

    
7831
    # mirror node verification
7832
    if self.op.disk_template in constants.DTS_INT_MIRROR:
7833
      if self.op.snode == pnode.name:
7834
        raise errors.OpPrereqError("The secondary node cannot be the"
7835
                                   " primary node.", errors.ECODE_INVAL)
7836
      _CheckNodeOnline(self, self.op.snode)
7837
      _CheckNodeNotDrained(self, self.op.snode)
7838
      _CheckNodeVmCapable(self, self.op.snode)
7839
      self.secondaries.append(self.op.snode)
7840

    
7841
    nodenames = [pnode.name] + self.secondaries
7842

    
7843
    if not self.adopt_disks:
7844
      # Check lv size requirements, if not adopting
7845
      req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7846
      _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7847

    
7848
    elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
7849
      all_lvs = set([i["vg"] + "/" + i["adopt"] for i in self.disks])
7850
      if len(all_lvs) != len(self.disks):
7851
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
7852
                                   errors.ECODE_INVAL)
7853
      for lv_name in all_lvs:
7854
        try:
7855
          # FIXME: lv_name here is "vg/lv" need to ensure that other calls
7856
          # to ReserveLV uses the same syntax
7857
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7858
        except errors.ReservationError:
7859
          raise errors.OpPrereqError("LV named %s used by another instance" %
7860
                                     lv_name, errors.ECODE_NOTUNIQUE)
7861

    
7862
      vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
7863
      vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
7864

    
7865
      node_lvs = self.rpc.call_lv_list([pnode.name],
7866
                                       vg_names.payload.keys())[pnode.name]
7867
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7868
      node_lvs = node_lvs.payload
7869

    
7870
      delta = all_lvs.difference(node_lvs.keys())
7871
      if delta:
7872
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
7873
                                   utils.CommaJoin(delta),
7874
                                   errors.ECODE_INVAL)
7875
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7876
      if online_lvs:
7877
        raise errors.OpPrereqError("Online logical volumes found, cannot"
7878
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
7879
                                   errors.ECODE_STATE)
7880
      # update the size of disk based on what is found
7881
      for dsk in self.disks:
7882
        dsk["size"] = int(float(node_lvs[dsk["vg"] + "/" + dsk["adopt"]][0]))
7883

    
7884
    elif self.op.disk_template == constants.DT_BLOCK:
7885
      # Normalize and de-duplicate device paths
7886
      all_disks = set([os.path.abspath(i["adopt"]) for i in self.disks])
7887
      if len(all_disks) != len(self.disks):
7888
        raise errors.OpPrereqError("Duplicate disk names given for adoption",
7889
                                   errors.ECODE_INVAL)
7890
      baddisks = [d for d in all_disks
7891
                  if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
7892
      if baddisks:
7893
        raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
7894
                                   " cannot be adopted" %
7895
                                   (", ".join(baddisks),
7896
                                    constants.ADOPTABLE_BLOCKDEV_ROOT),
7897
                                   errors.ECODE_INVAL)
7898

    
7899
      node_disks = self.rpc.call_bdev_sizes([pnode.name],
7900
                                            list(all_disks))[pnode.name]
7901
      node_disks.Raise("Cannot get block device information from node %s" %
7902
                       pnode.name)
7903
      node_disks = node_disks.payload
7904
      delta = all_disks.difference(node_disks.keys())
7905
      if delta:
7906
        raise errors.OpPrereqError("Missing block device(s): %s" %
7907
                                   utils.CommaJoin(delta),
7908
                                   errors.ECODE_INVAL)
7909
      for dsk in self.disks:
7910
        dsk["size"] = int(float(node_disks[dsk["adopt"]]))
7911

    
7912
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7913

    
7914
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7915
    # check OS parameters (remotely)
7916
    _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7917

    
7918
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7919

    
7920
    # memory check on primary node
7921
    if self.op.start:
7922
      _CheckNodeFreeMemory(self, self.pnode.name,
7923
                           "creating instance %s" % self.op.instance_name,
7924
                           self.be_full[constants.BE_MEMORY],
7925
                           self.op.hypervisor)
7926

    
7927
    self.dry_run_result = list(nodenames)
7928

    
7929
  def Exec(self, feedback_fn):
7930
    """Create and add the instance to the cluster.
7931

7932
    """
7933
    instance = self.op.instance_name
7934
    pnode_name = self.pnode.name
7935

    
7936
    ht_kind = self.op.hypervisor
7937
    if ht_kind in constants.HTS_REQ_PORT:
7938
      network_port = self.cfg.AllocatePort()
7939
    else:
7940
      network_port = None
7941

    
7942
    if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
7943
      # this is needed because os.path.join does not accept None arguments
7944
      if self.op.file_storage_dir is None:
7945
        string_file_storage_dir = ""
7946
      else:
7947
        string_file_storage_dir = self.op.file_storage_dir
7948

    
7949
      # build the full file storage dir path
7950
      if self.op.disk_template == constants.DT_SHARED_FILE:
7951
        get_fsd_fn = self.cfg.GetSharedFileStorageDir
7952
      else:
7953
        get_fsd_fn = self.cfg.GetFileStorageDir
7954

    
7955
      file_storage_dir = utils.PathJoin(get_fsd_fn(),
7956
                                        string_file_storage_dir, instance)
7957
    else:
7958
      file_storage_dir = ""
7959

    
7960
    disks = _GenerateDiskTemplate(self,
7961
                                  self.op.disk_template,
7962
                                  instance, pnode_name,
7963
                                  self.secondaries,
7964
                                  self.disks,
7965
                                  file_storage_dir,
7966
                                  self.op.file_driver,
7967
                                  0,
7968
                                  feedback_fn)
7969

    
7970
    iobj = objects.Instance(name=instance, os=self.op.os_type,
7971
                            primary_node=pnode_name,
7972
                            nics=self.nics, disks=disks,
7973
                            disk_template=self.op.disk_template,
7974
                            admin_up=False,
7975
                            network_port=network_port,
7976
                            beparams=self.op.beparams,
7977
                            hvparams=self.op.hvparams,
7978
                            hypervisor=self.op.hypervisor,
7979
                            osparams=self.op.osparams,
7980
                            )
7981

    
7982
    if self.adopt_disks:
7983
      if self.op.disk_template == constants.DT_PLAIN:
7984
        # rename LVs to the newly-generated names; we need to construct
7985
        # 'fake' LV disks with the old data, plus the new unique_id
7986
        tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7987
        rename_to = []
7988
        for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7989
          rename_to.append(t_dsk.logical_id)
7990
          t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7991
          self.cfg.SetDiskID(t_dsk, pnode_name)
7992
        result = self.rpc.call_blockdev_rename(pnode_name,
7993
                                               zip(tmp_disks, rename_to))
7994
        result.Raise("Failed to rename adoped LVs")
7995
    else:
7996
      feedback_fn("* creating instance disks...")
7997
      try:
7998
        _CreateDisks(self, iobj)
7999
      except errors.OpExecError:
8000
        self.LogWarning("Device creation failed, reverting...")
8001
        try:
8002
          _RemoveDisks(self, iobj)
8003
        finally:
8004
          self.cfg.ReleaseDRBDMinors(instance)
8005
          raise
8006

    
8007
      if self.cfg.GetClusterInfo().prealloc_wipe_disks:
8008
        feedback_fn("* wiping instance disks...")
8009
        try:
8010
          _WipeDisks(self, iobj)
8011
        except errors.OpExecError:
8012
          self.LogWarning("Device wiping failed, reverting...")
8013
          try:
8014
            _RemoveDisks(self, iobj)
8015
          finally:
8016
            self.cfg.ReleaseDRBDMinors(instance)
8017
            raise
8018

    
8019
    feedback_fn("adding instance %s to cluster config" % instance)
8020

    
8021
    self.cfg.AddInstance(iobj, self.proc.GetECId())
8022

    
8023
    # Declare that we don't want to remove the instance lock anymore, as we've
8024
    # added the instance to the config
8025
    del self.remove_locks[locking.LEVEL_INSTANCE]
8026
    # Unlock all the nodes
8027
    if self.op.mode == constants.INSTANCE_IMPORT:
8028
      nodes_keep = [self.op.src_node]
8029
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
8030
                       if node != self.op.src_node]
8031
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
8032
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
8033
    else:
8034
      self.context.glm.release(locking.LEVEL_NODE)
8035
      del self.acquired_locks[locking.LEVEL_NODE]
8036

    
8037
    if self.op.wait_for_sync:
8038
      disk_abort = not _WaitForSync(self, iobj)
8039
    elif iobj.disk_template in constants.DTS_INT_MIRROR:
8040
      # make sure the disks are not degraded (still sync-ing is ok)
8041
      time.sleep(15)
8042
      feedback_fn("* checking mirrors status")
8043
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8044
    else:
8045
      disk_abort = False
8046

    
8047
    if disk_abort:
8048
      _RemoveDisks(self, iobj)
8049
      self.cfg.RemoveInstance(iobj.name)
8050
      # Make sure the instance lock gets removed
8051
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8052
      raise errors.OpExecError("There are some degraded disks for"
8053
                               " this instance")
8054

    
8055
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8056
      if self.op.mode == constants.INSTANCE_CREATE:
8057
        if not self.op.no_install:
8058
          feedback_fn("* running the instance OS create scripts...")
8059
          # FIXME: pass debug option from opcode to backend
8060
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8061
                                                 self.op.debug_level)
8062
          result.Raise("Could not add os for instance %s"
8063
                       " on node %s" % (instance, pnode_name))
8064

    
8065
      elif self.op.mode == constants.INSTANCE_IMPORT:
8066
        feedback_fn("* running the instance OS import scripts...")
8067

    
8068
        transfers = []
8069

    
8070
        for idx, image in enumerate(self.src_images):
8071
          if not image:
8072
            continue
8073

    
8074
          # FIXME: pass debug option from opcode to backend
8075
          dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8076
                                             constants.IEIO_FILE, (image, ),
8077
                                             constants.IEIO_SCRIPT,
8078
                                             (iobj.disks[idx], idx),
8079
                                             None)
8080
          transfers.append(dt)
8081

    
8082
        import_result = \
8083
          masterd.instance.TransferInstanceData(self, feedback_fn,
8084
                                                self.op.src_node, pnode_name,
8085
                                                self.pnode.secondary_ip,
8086
                                                iobj, transfers)
8087
        if not compat.all(import_result):
8088
          self.LogWarning("Some disks for instance %s on node %s were not"
8089
                          " imported successfully" % (instance, pnode_name))
8090

    
8091
      elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8092
        feedback_fn("* preparing remote import...")
8093
        # The source cluster will stop the instance before attempting to make a
8094
        # connection. In some cases stopping an instance can take a long time,
8095
        # hence the shutdown timeout is added to the connection timeout.
8096
        connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8097
                           self.op.source_shutdown_timeout)
8098
        timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8099

    
8100
        assert iobj.primary_node == self.pnode.name
8101
        disk_results = \
8102
          masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8103
                                        self.source_x509_ca,
8104
                                        self._cds, timeouts)
8105
        if not compat.all(disk_results):
8106
          # TODO: Should the instance still be started, even if some disks
8107
          # failed to import (valid for local imports, too)?
8108
          self.LogWarning("Some disks for instance %s on node %s were not"
8109
                          " imported successfully" % (instance, pnode_name))
8110

    
8111
        # Run rename script on newly imported instance
8112
        assert iobj.name == instance
8113
        feedback_fn("Running rename script for %s" % instance)
8114
        result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8115
                                                   self.source_instance_name,
8116
                                                   self.op.debug_level)
8117
        if result.fail_msg:
8118
          self.LogWarning("Failed to run rename script for %s on node"
8119
                          " %s: %s" % (instance, pnode_name, result.fail_msg))
8120

    
8121
      else:
8122
        # also checked in the prereq part
8123
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8124
                                     % self.op.mode)
8125

    
8126
    if self.op.start:
8127
      iobj.admin_up = True
8128
      self.cfg.Update(iobj, feedback_fn)
8129
      logging.info("Starting instance %s on node %s", instance, pnode_name)
8130
      feedback_fn("* starting instance...")
8131
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8132
      result.Raise("Could not start instance")
8133

    
8134
    return list(iobj.all_nodes)
8135

    
8136

    
8137
class LUInstanceConsole(NoHooksLU):
8138
  """Connect to an instance's console.
8139

8140
  This is somewhat special in that it returns the command line that
8141
  you need to run on the master node in order to connect to the
8142
  console.
8143

8144
  """
8145
  REQ_BGL = False
8146

    
8147
  def ExpandNames(self):
8148
    self._ExpandAndLockInstance()
8149

    
8150
  def CheckPrereq(self):
8151
    """Check prerequisites.
8152

8153
    This checks that the instance is in the cluster.
8154

8155
    """
8156
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8157
    assert self.instance is not None, \
8158
      "Cannot retrieve locked instance %s" % self.op.instance_name
8159
    _CheckNodeOnline(self, self.instance.primary_node)
8160

    
8161
  def Exec(self, feedback_fn):
8162
    """Connect to the console of an instance
8163

8164
    """
8165
    instance = self.instance
8166
    node = instance.primary_node
8167

    
8168
    node_insts = self.rpc.call_instance_list([node],
8169
                                             [instance.hypervisor])[node]
8170
    node_insts.Raise("Can't get node information from %s" % node)
8171

    
8172
    if instance.name not in node_insts.payload:
8173
      if instance.admin_up:
8174
        state = constants.INSTST_ERRORDOWN
8175
      else:
8176
        state = constants.INSTST_ADMINDOWN
8177
      raise errors.OpExecError("Instance %s is not running (state %s)" %
8178
                               (instance.name, state))
8179

    
8180
    logging.debug("Connecting to console of %s on %s", instance.name, node)
8181

    
8182
    return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8183

    
8184

    
8185
def _GetInstanceConsole(cluster, instance):
8186
  """Returns console information for an instance.
8187

8188
  @type cluster: L{objects.Cluster}
8189
  @type instance: L{objects.Instance}
8190
  @rtype: dict
8191

8192
  """
8193
  hyper = hypervisor.GetHypervisor(instance.hypervisor)
8194
  # beparams and hvparams are passed separately, to avoid editing the
8195
  # instance and then saving the defaults in the instance itself.
8196
  hvparams = cluster.FillHV(instance)
8197
  beparams = cluster.FillBE(instance)
8198
  console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8199

    
8200
  assert console.instance == instance.name
8201
  assert console.Validate()
8202

    
8203
  return console.ToDict()
8204

    
8205

    
8206
class LUInstanceReplaceDisks(LogicalUnit):
8207
  """Replace the disks of an instance.
8208

8209
  """
8210
  HPATH = "mirrors-replace"
8211
  HTYPE = constants.HTYPE_INSTANCE
8212
  REQ_BGL = False
8213

    
8214
  def CheckArguments(self):
8215
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8216
                                  self.op.iallocator)
8217

    
8218
  def ExpandNames(self):
8219
    self._ExpandAndLockInstance()
8220

    
8221
    if self.op.iallocator is not None:
8222
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8223

    
8224
    elif self.op.remote_node is not None:
8225
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8226
      self.op.remote_node = remote_node
8227

    
8228
      # Warning: do not remove the locking of the new secondary here
8229
      # unless DRBD8.AddChildren is changed to work in parallel;
8230
      # currently it doesn't since parallel invocations of
8231
      # FindUnusedMinor will conflict
8232
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8233
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8234

    
8235
    else:
8236
      self.needed_locks[locking.LEVEL_NODE] = []
8237
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8238

    
8239
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8240
                                   self.op.iallocator, self.op.remote_node,
8241
                                   self.op.disks, False, self.op.early_release)
8242

    
8243
    self.tasklets = [self.replacer]
8244

    
8245
  def DeclareLocks(self, level):
8246
    # If we're not already locking all nodes in the set we have to declare the
8247
    # instance's primary/secondary nodes.
8248
    if (level == locking.LEVEL_NODE and
8249
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8250
      self._LockInstancesNodes()
8251

    
8252
  def BuildHooksEnv(self):
8253
    """Build hooks env.
8254

8255
    This runs on the master, the primary and all the secondaries.
8256

8257
    """
8258
    instance = self.replacer.instance
8259
    env = {
8260
      "MODE": self.op.mode,
8261
      "NEW_SECONDARY": self.op.remote_node,
8262
      "OLD_SECONDARY": instance.secondary_nodes[0],
8263
      }
8264
    env.update(_BuildInstanceHookEnvByObject(self, instance))
8265
    nl = [
8266
      self.cfg.GetMasterNode(),
8267
      instance.primary_node,
8268
      ]
8269
    if self.op.remote_node is not None:
8270
      nl.append(self.op.remote_node)
8271
    return env, nl, nl
8272

    
8273

    
8274
class TLReplaceDisks(Tasklet):
8275
  """Replaces disks for an instance.
8276

8277
  Note: Locking is not within the scope of this class.
8278

8279
  """
8280
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8281
               disks, delay_iallocator, early_release):
8282
    """Initializes this class.
8283

8284
    """
8285
    Tasklet.__init__(self, lu)
8286

    
8287
    # Parameters
8288
    self.instance_name = instance_name
8289
    self.mode = mode
8290
    self.iallocator_name = iallocator_name
8291
    self.remote_node = remote_node
8292
    self.disks = disks
8293
    self.delay_iallocator = delay_iallocator
8294
    self.early_release = early_release
8295

    
8296
    # Runtime data
8297
    self.instance = None
8298
    self.new_node = None
8299
    self.target_node = None
8300
    self.other_node = None
8301
    self.remote_node_info = None
8302
    self.node_secondary_ip = None
8303

    
8304
  @staticmethod
8305
  def CheckArguments(mode, remote_node, iallocator):
8306
    """Helper function for users of this class.
8307

8308
    """
8309
    # check for valid parameter combination
8310
    if mode == constants.REPLACE_DISK_CHG:
8311
      if remote_node is None and iallocator is None:
8312
        raise errors.OpPrereqError("When changing the secondary either an"
8313
                                   " iallocator script must be used or the"
8314
                                   " new node given", errors.ECODE_INVAL)
8315

    
8316
      if remote_node is not None and iallocator is not None:
8317
        raise errors.OpPrereqError("Give either the iallocator or the new"
8318
                                   " secondary, not both", errors.ECODE_INVAL)
8319

    
8320
    elif remote_node is not None or iallocator is not None:
8321
      # Not replacing the secondary
8322
      raise errors.OpPrereqError("The iallocator and new node options can"
8323
                                 " only be used when changing the"
8324
                                 " secondary node", errors.ECODE_INVAL)
8325

    
8326
  @staticmethod
8327
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8328
    """Compute a new secondary node using an IAllocator.
8329

8330
    """
8331
    ial = IAllocator(lu.cfg, lu.rpc,
8332
                     mode=constants.IALLOCATOR_MODE_RELOC,
8333
                     name=instance_name,
8334
                     relocate_from=relocate_from)
8335

    
8336
    ial.Run(iallocator_name)
8337

    
8338
    if not ial.success:
8339
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8340
                                 " %s" % (iallocator_name, ial.info),
8341
                                 errors.ECODE_NORES)
8342

    
8343
    if len(ial.result) != ial.required_nodes:
8344
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8345
                                 " of nodes (%s), required %s" %
8346
                                 (iallocator_name,
8347
                                  len(ial.result), ial.required_nodes),
8348
                                 errors.ECODE_FAULT)
8349

    
8350
    remote_node_name = ial.result[0]
8351

    
8352
    lu.LogInfo("Selected new secondary for instance '%s': %s",
8353
               instance_name, remote_node_name)
8354

    
8355
    return remote_node_name
8356

    
8357
  def _FindFaultyDisks(self, node_name):
8358
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8359
                                    node_name, True)
8360

    
8361
  def _CheckDisksActivated(self, instance):
8362
    """Checks if the instance disks are activated.
8363

8364
    @param instance: The instance to check disks
8365
    @return: True if they are activated, False otherwise
8366

8367
    """
8368
    nodes = instance.all_nodes
8369

    
8370
    for idx, dev in enumerate(instance.disks):
8371
      for node in nodes:
8372
        self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8373
        self.cfg.SetDiskID(dev, node)
8374

    
8375
        result = self.rpc.call_blockdev_find(node, dev)
8376

    
8377
        if result.offline:
8378
          continue
8379
        elif result.fail_msg or not result.payload:
8380
          return False
8381

    
8382
    return True
8383

    
8384

    
8385
  def CheckPrereq(self):
8386
    """Check prerequisites.
8387

8388
    This checks that the instance is in the cluster.
8389

8390
    """
8391
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8392
    assert instance is not None, \
8393
      "Cannot retrieve locked instance %s" % self.instance_name
8394

    
8395
    if instance.disk_template != constants.DT_DRBD8:
8396
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8397
                                 " instances", errors.ECODE_INVAL)
8398

    
8399
    if len(instance.secondary_nodes) != 1:
8400
      raise errors.OpPrereqError("The instance has a strange layout,"
8401
                                 " expected one secondary but found %d" %
8402
                                 len(instance.secondary_nodes),
8403
                                 errors.ECODE_FAULT)
8404

    
8405
    if not self.delay_iallocator:
8406
      self._CheckPrereq2()
8407

    
8408
  def _CheckPrereq2(self):
8409
    """Check prerequisites, second part.
8410

8411
    This function should always be part of CheckPrereq. It was separated and is
8412
    now called from Exec because during node evacuation iallocator was only
8413
    called with an unmodified cluster model, not taking planned changes into
8414
    account.
8415

8416
    """
8417
    instance = self.instance
8418
    secondary_node = instance.secondary_nodes[0]
8419

    
8420
    if self.iallocator_name is None:
8421
      remote_node = self.remote_node
8422
    else:
8423
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8424
                                       instance.name, instance.secondary_nodes)
8425

    
8426
    if remote_node is not None:
8427
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8428
      assert self.remote_node_info is not None, \
8429
        "Cannot retrieve locked node %s" % remote_node
8430
    else:
8431
      self.remote_node_info = None
8432

    
8433
    if remote_node == self.instance.primary_node:
8434
      raise errors.OpPrereqError("The specified node is the primary node of"
8435
                                 " the instance.", errors.ECODE_INVAL)
8436

    
8437
    if remote_node == secondary_node:
8438
      raise errors.OpPrereqError("The specified node is already the"
8439
                                 " secondary node of the instance.",
8440
                                 errors.ECODE_INVAL)
8441

    
8442
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8443
                                    constants.REPLACE_DISK_CHG):
8444
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
8445
                                 errors.ECODE_INVAL)
8446

    
8447
    if self.mode == constants.REPLACE_DISK_AUTO:
8448
      if not self._CheckDisksActivated(instance):
8449
        raise errors.OpPrereqError("Please run activate-disks on instance %s"
8450
                                   " first" % self.instance_name,
8451
                                   errors.ECODE_STATE)
8452
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
8453
      faulty_secondary = self._FindFaultyDisks(secondary_node)
8454

    
8455
      if faulty_primary and faulty_secondary:
8456
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8457
                                   " one node and can not be repaired"
8458
                                   " automatically" % self.instance_name,
8459
                                   errors.ECODE_STATE)
8460

    
8461
      if faulty_primary:
8462
        self.disks = faulty_primary
8463
        self.target_node = instance.primary_node
8464
        self.other_node = secondary_node
8465
        check_nodes = [self.target_node, self.other_node]
8466
      elif faulty_secondary:
8467
        self.disks = faulty_secondary
8468
        self.target_node = secondary_node
8469
        self.other_node = instance.primary_node
8470
        check_nodes = [self.target_node, self.other_node]
8471
      else:
8472
        self.disks = []
8473
        check_nodes = []
8474

    
8475
    else:
8476
      # Non-automatic modes
8477
      if self.mode == constants.REPLACE_DISK_PRI:
8478
        self.target_node = instance.primary_node
8479
        self.other_node = secondary_node
8480
        check_nodes = [self.target_node, self.other_node]
8481

    
8482
      elif self.mode == constants.REPLACE_DISK_SEC:
8483
        self.target_node = secondary_node
8484
        self.other_node = instance.primary_node
8485
        check_nodes = [self.target_node, self.other_node]
8486

    
8487
      elif self.mode == constants.REPLACE_DISK_CHG:
8488
        self.new_node = remote_node
8489
        self.other_node = instance.primary_node
8490
        self.target_node = secondary_node
8491
        check_nodes = [self.new_node, self.other_node]
8492

    
8493
        _CheckNodeNotDrained(self.lu, remote_node)
8494
        _CheckNodeVmCapable(self.lu, remote_node)
8495

    
8496
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
8497
        assert old_node_info is not None
8498
        if old_node_info.offline and not self.early_release:
8499
          # doesn't make sense to delay the release
8500
          self.early_release = True
8501
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8502
                          " early-release mode", secondary_node)
8503

    
8504
      else:
8505
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8506
                                     self.mode)
8507

    
8508
      # If not specified all disks should be replaced
8509
      if not self.disks:
8510
        self.disks = range(len(self.instance.disks))
8511

    
8512
    for node in check_nodes:
8513
      _CheckNodeOnline(self.lu, node)
8514

    
8515
    # Check whether disks are valid
8516
    for disk_idx in self.disks:
8517
      instance.FindDisk(disk_idx)
8518

    
8519
    # Get secondary node IP addresses
8520
    node_2nd_ip = {}
8521

    
8522
    for node_name in [self.target_node, self.other_node, self.new_node]:
8523
      if node_name is not None:
8524
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8525

    
8526
    self.node_secondary_ip = node_2nd_ip
8527

    
8528
  def Exec(self, feedback_fn):
8529
    """Execute disk replacement.
8530

8531
    This dispatches the disk replacement to the appropriate handler.
8532

8533
    """
8534
    if self.delay_iallocator:
8535
      self._CheckPrereq2()
8536

    
8537
    if not self.disks:
8538
      feedback_fn("No disks need replacement")
8539
      return
8540

    
8541
    feedback_fn("Replacing disk(s) %s for %s" %
8542
                (utils.CommaJoin(self.disks), self.instance.name))
8543

    
8544
    activate_disks = (not self.instance.admin_up)
8545

    
8546
    # Activate the instance disks if we're replacing them on a down instance
8547
    if activate_disks:
8548
      _StartInstanceDisks(self.lu, self.instance, True)
8549

    
8550
    try:
8551
      # Should we replace the secondary node?
8552
      if self.new_node is not None:
8553
        fn = self._ExecDrbd8Secondary
8554
      else:
8555
        fn = self._ExecDrbd8DiskOnly
8556

    
8557
      return fn(feedback_fn)
8558

    
8559
    finally:
8560
      # Deactivate the instance disks if we're replacing them on a
8561
      # down instance
8562
      if activate_disks:
8563
        _SafeShutdownInstanceDisks(self.lu, self.instance)
8564

    
8565
  def _CheckVolumeGroup(self, nodes):
8566
    self.lu.LogInfo("Checking volume groups")
8567

    
8568
    vgname = self.cfg.GetVGName()
8569

    
8570
    # Make sure volume group exists on all involved nodes
8571
    results = self.rpc.call_vg_list(nodes)
8572
    if not results:
8573
      raise errors.OpExecError("Can't list volume groups on the nodes")
8574

    
8575
    for node in nodes:
8576
      res = results[node]
8577
      res.Raise("Error checking node %s" % node)
8578
      if vgname not in res.payload:
8579
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
8580
                                 (vgname, node))
8581

    
8582
  def _CheckDisksExistence(self, nodes):
8583
    # Check disk existence
8584
    for idx, dev in enumerate(self.instance.disks):
8585
      if idx not in self.disks:
8586
        continue
8587

    
8588
      for node in nodes:
8589
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8590
        self.cfg.SetDiskID(dev, node)
8591

    
8592
        result = self.rpc.call_blockdev_find(node, dev)
8593

    
8594
        msg = result.fail_msg
8595
        if msg or not result.payload:
8596
          if not msg:
8597
            msg = "disk not found"
8598
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8599
                                   (idx, node, msg))
8600

    
8601
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8602
    for idx, dev in enumerate(self.instance.disks):
8603
      if idx not in self.disks:
8604
        continue
8605

    
8606
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8607
                      (idx, node_name))
8608

    
8609
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8610
                                   ldisk=ldisk):
8611
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8612
                                 " replace disks for instance %s" %
8613
                                 (node_name, self.instance.name))
8614

    
8615
  def _CreateNewStorage(self, node_name):
8616
    vgname = self.cfg.GetVGName()
8617
    iv_names = {}
8618

    
8619
    for idx, dev in enumerate(self.instance.disks):
8620
      if idx not in self.disks:
8621
        continue
8622

    
8623
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8624

    
8625
      self.cfg.SetDiskID(dev, node_name)
8626

    
8627
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8628
      names = _GenerateUniqueNames(self.lu, lv_names)
8629

    
8630
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8631
                             logical_id=(vgname, names[0]))
8632
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8633
                             logical_id=(vgname, names[1]))
8634

    
8635
      new_lvs = [lv_data, lv_meta]
8636
      old_lvs = dev.children
8637
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8638

    
8639
      # we pass force_create=True to force the LVM creation
8640
      for new_lv in new_lvs:
8641
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8642
                        _GetInstanceInfoText(self.instance), False)
8643

    
8644
    return iv_names
8645

    
8646
  def _CheckDevices(self, node_name, iv_names):
8647
    for name, (dev, _, _) in iv_names.iteritems():
8648
      self.cfg.SetDiskID(dev, node_name)
8649

    
8650
      result = self.rpc.call_blockdev_find(node_name, dev)
8651

    
8652
      msg = result.fail_msg
8653
      if msg or not result.payload:
8654
        if not msg:
8655
          msg = "disk not found"
8656
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
8657
                                 (name, msg))
8658

    
8659
      if result.payload.is_degraded:
8660
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
8661

    
8662
  def _RemoveOldStorage(self, node_name, iv_names):
8663
    for name, (_, old_lvs, _) in iv_names.iteritems():
8664
      self.lu.LogInfo("Remove logical volumes for %s" % name)
8665

    
8666
      for lv in old_lvs:
8667
        self.cfg.SetDiskID(lv, node_name)
8668

    
8669
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8670
        if msg:
8671
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
8672
                             hint="remove unused LVs manually")
8673

    
8674
  def _ReleaseNodeLock(self, node_name):
8675
    """Releases the lock for a given node."""
8676
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8677

    
8678
  def _ExecDrbd8DiskOnly(self, feedback_fn):
8679
    """Replace a disk on the primary or secondary for DRBD 8.
8680

8681
    The algorithm for replace is quite complicated:
8682

8683
      1. for each disk to be replaced:
8684

8685
        1. create new LVs on the target node with unique names
8686
        1. detach old LVs from the drbd device
8687
        1. rename old LVs to name_replaced.<time_t>
8688
        1. rename new LVs to old LVs
8689
        1. attach the new LVs (with the old names now) to the drbd device
8690

8691
      1. wait for sync across all devices
8692

8693
      1. for each modified disk:
8694

8695
        1. remove old LVs (which have the name name_replaces.<time_t>)
8696

8697
    Failures are not very well handled.
8698

8699
    """
8700
    steps_total = 6
8701

    
8702
    # Step: check device activation
8703
    self.lu.LogStep(1, steps_total, "Check device existence")
8704
    self._CheckDisksExistence([self.other_node, self.target_node])
8705
    self._CheckVolumeGroup([self.target_node, self.other_node])
8706

    
8707
    # Step: check other node consistency
8708
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8709
    self._CheckDisksConsistency(self.other_node,
8710
                                self.other_node == self.instance.primary_node,
8711
                                False)
8712

    
8713
    # Step: create new storage
8714
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8715
    iv_names = self._CreateNewStorage(self.target_node)
8716

    
8717
    # Step: for each lv, detach+rename*2+attach
8718
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8719
    for dev, old_lvs, new_lvs in iv_names.itervalues():
8720
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8721

    
8722
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8723
                                                     old_lvs)
8724
      result.Raise("Can't detach drbd from local storage on node"
8725
                   " %s for device %s" % (self.target_node, dev.iv_name))
8726
      #dev.children = []
8727
      #cfg.Update(instance)
8728

    
8729
      # ok, we created the new LVs, so now we know we have the needed
8730
      # storage; as such, we proceed on the target node to rename
8731
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8732
      # using the assumption that logical_id == physical_id (which in
8733
      # turn is the unique_id on that node)
8734

    
8735
      # FIXME(iustin): use a better name for the replaced LVs
8736
      temp_suffix = int(time.time())
8737
      ren_fn = lambda d, suff: (d.physical_id[0],
8738
                                d.physical_id[1] + "_replaced-%s" % suff)
8739

    
8740
      # Build the rename list based on what LVs exist on the node
8741
      rename_old_to_new = []
8742
      for to_ren in old_lvs:
8743
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8744
        if not result.fail_msg and result.payload:
8745
          # device exists
8746
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8747

    
8748
      self.lu.LogInfo("Renaming the old LVs on the target node")
8749
      result = self.rpc.call_blockdev_rename(self.target_node,
8750
                                             rename_old_to_new)
8751
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
8752

    
8753
      # Now we rename the new LVs to the old LVs
8754
      self.lu.LogInfo("Renaming the new LVs on the target node")
8755
      rename_new_to_old = [(new, old.physical_id)
8756
                           for old, new in zip(old_lvs, new_lvs)]
8757
      result = self.rpc.call_blockdev_rename(self.target_node,
8758
                                             rename_new_to_old)
8759
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
8760

    
8761
      for old, new in zip(old_lvs, new_lvs):
8762
        new.logical_id = old.logical_id
8763
        self.cfg.SetDiskID(new, self.target_node)
8764

    
8765
      for disk in old_lvs:
8766
        disk.logical_id = ren_fn(disk, temp_suffix)
8767
        self.cfg.SetDiskID(disk, self.target_node)
8768

    
8769
      # Now that the new lvs have the old name, we can add them to the device
8770
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8771
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8772
                                                  new_lvs)
8773
      msg = result.fail_msg
8774
      if msg:
8775
        for new_lv in new_lvs:
8776
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
8777
                                               new_lv).fail_msg
8778
          if msg2:
8779
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8780
                               hint=("cleanup manually the unused logical"
8781
                                     "volumes"))
8782
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8783

    
8784
      dev.children = new_lvs
8785

    
8786
      self.cfg.Update(self.instance, feedback_fn)
8787

    
8788
    cstep = 5
8789
    if self.early_release:
8790
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8791
      cstep += 1
8792
      self._RemoveOldStorage(self.target_node, iv_names)
8793
      # WARNING: we release both node locks here, do not do other RPCs
8794
      # than WaitForSync to the primary node
8795
      self._ReleaseNodeLock([self.target_node, self.other_node])
8796

    
8797
    # Wait for sync
8798
    # This can fail as the old devices are degraded and _WaitForSync
8799
    # does a combined result over all disks, so we don't check its return value
8800
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8801
    cstep += 1
8802
    _WaitForSync(self.lu, self.instance)
8803

    
8804
    # Check all devices manually
8805
    self._CheckDevices(self.instance.primary_node, iv_names)
8806

    
8807
    # Step: remove old storage
8808
    if not self.early_release:
8809
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8810
      cstep += 1
8811
      self._RemoveOldStorage(self.target_node, iv_names)
8812

    
8813
  def _ExecDrbd8Secondary(self, feedback_fn):
8814
    """Replace the secondary node for DRBD 8.
8815

8816
    The algorithm for replace is quite complicated:
8817
      - for all disks of the instance:
8818
        - create new LVs on the new node with same names
8819
        - shutdown the drbd device on the old secondary
8820
        - disconnect the drbd network on the primary
8821
        - create the drbd device on the new secondary
8822
        - network attach the drbd on the primary, using an artifice:
8823
          the drbd code for Attach() will connect to the network if it
8824
          finds a device which is connected to the good local disks but
8825
          not network enabled
8826
      - wait for sync across all devices
8827
      - remove all disks from the old secondary
8828

8829
    Failures are not very well handled.
8830

8831
    """
8832
    steps_total = 6
8833

    
8834
    # Step: check device activation
8835
    self.lu.LogStep(1, steps_total, "Check device existence")
8836
    self._CheckDisksExistence([self.instance.primary_node])
8837
    self._CheckVolumeGroup([self.instance.primary_node])
8838

    
8839
    # Step: check other node consistency
8840
    self.lu.LogStep(2, steps_total, "Check peer consistency")
8841
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
8842

    
8843
    # Step: create new storage
8844
    self.lu.LogStep(3, steps_total, "Allocate new storage")
8845
    for idx, dev in enumerate(self.instance.disks):
8846
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8847
                      (self.new_node, idx))
8848
      # we pass force_create=True to force LVM creation
8849
      for new_lv in dev.children:
8850
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8851
                        _GetInstanceInfoText(self.instance), False)
8852

    
8853
    # Step 4: dbrd minors and drbd setups changes
8854
    # after this, we must manually remove the drbd minors on both the
8855
    # error and the success paths
8856
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8857
    minors = self.cfg.AllocateDRBDMinor([self.new_node
8858
                                         for dev in self.instance.disks],
8859
                                        self.instance.name)
8860
    logging.debug("Allocated minors %r", minors)
8861

    
8862
    iv_names = {}
8863
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8864
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8865
                      (self.new_node, idx))
8866
      # create new devices on new_node; note that we create two IDs:
8867
      # one without port, so the drbd will be activated without
8868
      # networking information on the new node at this stage, and one
8869
      # with network, for the latter activation in step 4
8870
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8871
      if self.instance.primary_node == o_node1:
8872
        p_minor = o_minor1
8873
      else:
8874
        assert self.instance.primary_node == o_node2, "Three-node instance?"
8875
        p_minor = o_minor2
8876

    
8877
      new_alone_id = (self.instance.primary_node, self.new_node, None,
8878
                      p_minor, new_minor, o_secret)
8879
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
8880
                    p_minor, new_minor, o_secret)
8881

    
8882
      iv_names[idx] = (dev, dev.children, new_net_id)
8883
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8884
                    new_net_id)
8885
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8886
                              logical_id=new_alone_id,
8887
                              children=dev.children,
8888
                              size=dev.size)
8889
      try:
8890
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8891
                              _GetInstanceInfoText(self.instance), False)
8892
      except errors.GenericError:
8893
        self.cfg.ReleaseDRBDMinors(self.instance.name)
8894
        raise
8895

    
8896
    # We have new devices, shutdown the drbd on the old secondary
8897
    for idx, dev in enumerate(self.instance.disks):
8898
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8899
      self.cfg.SetDiskID(dev, self.target_node)
8900
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8901
      if msg:
8902
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8903
                           "node: %s" % (idx, msg),
8904
                           hint=("Please cleanup this device manually as"
8905
                                 " soon as possible"))
8906

    
8907
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8908
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8909
                                               self.node_secondary_ip,
8910
                                               self.instance.disks)\
8911
                                              [self.instance.primary_node]
8912

    
8913
    msg = result.fail_msg
8914
    if msg:
8915
      # detaches didn't succeed (unlikely)
8916
      self.cfg.ReleaseDRBDMinors(self.instance.name)
8917
      raise errors.OpExecError("Can't detach the disks from the network on"
8918
                               " old node: %s" % (msg,))
8919

    
8920
    # if we managed to detach at least one, we update all the disks of
8921
    # the instance to point to the new secondary
8922
    self.lu.LogInfo("Updating instance configuration")
8923
    for dev, _, new_logical_id in iv_names.itervalues():
8924
      dev.logical_id = new_logical_id
8925
      self.cfg.SetDiskID(dev, self.instance.primary_node)
8926

    
8927
    self.cfg.Update(self.instance, feedback_fn)
8928

    
8929
    # and now perform the drbd attach
8930
    self.lu.LogInfo("Attaching primary drbds to new secondary"
8931
                    " (standalone => connected)")
8932
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8933
                                            self.new_node],
8934
                                           self.node_secondary_ip,
8935
                                           self.instance.disks,
8936
                                           self.instance.name,
8937
                                           False)
8938
    for to_node, to_result in result.items():
8939
      msg = to_result.fail_msg
8940
      if msg:
8941
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8942
                           to_node, msg,
8943
                           hint=("please do a gnt-instance info to see the"
8944
                                 " status of disks"))
8945
    cstep = 5
8946
    if self.early_release:
8947
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8948
      cstep += 1
8949
      self._RemoveOldStorage(self.target_node, iv_names)
8950
      # WARNING: we release all node locks here, do not do other RPCs
8951
      # than WaitForSync to the primary node
8952
      self._ReleaseNodeLock([self.instance.primary_node,
8953
                             self.target_node,
8954
                             self.new_node])
8955

    
8956
    # Wait for sync
8957
    # This can fail as the old devices are degraded and _WaitForSync
8958
    # does a combined result over all disks, so we don't check its return value
8959
    self.lu.LogStep(cstep, steps_total, "Sync devices")
8960
    cstep += 1
8961
    _WaitForSync(self.lu, self.instance)
8962

    
8963
    # Check all devices manually
8964
    self._CheckDevices(self.instance.primary_node, iv_names)
8965

    
8966
    # Step: remove old storage
8967
    if not self.early_release:
8968
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
8969
      self._RemoveOldStorage(self.target_node, iv_names)
8970

    
8971

    
8972
class LURepairNodeStorage(NoHooksLU):
8973
  """Repairs the volume group on a node.
8974

8975
  """
8976
  REQ_BGL = False
8977

    
8978
  def CheckArguments(self):
8979
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8980

    
8981
    storage_type = self.op.storage_type
8982

    
8983
    if (constants.SO_FIX_CONSISTENCY not in
8984
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8985
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
8986
                                 " repaired" % storage_type,
8987
                                 errors.ECODE_INVAL)
8988

    
8989
  def ExpandNames(self):
8990
    self.needed_locks = {
8991
      locking.LEVEL_NODE: [self.op.node_name],
8992
      }
8993

    
8994
  def _CheckFaultyDisks(self, instance, node_name):
8995
    """Ensure faulty disks abort the opcode or at least warn."""
8996
    try:
8997
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8998
                                  node_name, True):
8999
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9000
                                   " node '%s'" % (instance.name, node_name),
9001
                                   errors.ECODE_STATE)
9002
    except errors.OpPrereqError, err:
9003
      if self.op.ignore_consistency:
9004
        self.proc.LogWarning(str(err.args[0]))
9005
      else:
9006
        raise
9007

    
9008
  def CheckPrereq(self):
9009
    """Check prerequisites.
9010

9011
    """
9012
    # Check whether any instance on this node has faulty disks
9013
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9014
      if not inst.admin_up:
9015
        continue
9016
      check_nodes = set(inst.all_nodes)
9017
      check_nodes.discard(self.op.node_name)
9018
      for inst_node_name in check_nodes:
9019
        self._CheckFaultyDisks(inst, inst_node_name)
9020

    
9021
  def Exec(self, feedback_fn):
9022
    feedback_fn("Repairing storage unit '%s' on %s ..." %
9023
                (self.op.name, self.op.node_name))
9024

    
9025
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9026
    result = self.rpc.call_storage_execute(self.op.node_name,
9027
                                           self.op.storage_type, st_args,
9028
                                           self.op.name,
9029
                                           constants.SO_FIX_CONSISTENCY)
9030
    result.Raise("Failed to repair storage unit '%s' on %s" %
9031
                 (self.op.name, self.op.node_name))
9032

    
9033

    
9034
class LUNodeEvacStrategy(NoHooksLU):
9035
  """Computes the node evacuation strategy.
9036

9037
  """
9038
  REQ_BGL = False
9039

    
9040
  def CheckArguments(self):
9041
    _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9042

    
9043
  def ExpandNames(self):
9044
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9045
    self.needed_locks = locks = {}
9046
    if self.op.remote_node is None:
9047
      locks[locking.LEVEL_NODE] = locking.ALL_SET
9048
    else:
9049
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9050
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9051

    
9052
  def Exec(self, feedback_fn):
9053
    if self.op.remote_node is not None:
9054
      instances = []
9055
      for node in self.op.nodes:
9056
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9057
      result = []
9058
      for i in instances:
9059
        if i.primary_node == self.op.remote_node:
9060
          raise errors.OpPrereqError("Node %s is the primary node of"
9061
                                     " instance %s, cannot use it as"
9062
                                     " secondary" %
9063
                                     (self.op.remote_node, i.name),
9064
                                     errors.ECODE_INVAL)
9065
        result.append([i.name, self.op.remote_node])
9066
    else:
9067
      ial = IAllocator(self.cfg, self.rpc,
9068
                       mode=constants.IALLOCATOR_MODE_MEVAC,
9069
                       evac_nodes=self.op.nodes)
9070
      ial.Run(self.op.iallocator, validate=True)
9071
      if not ial.success:
9072
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9073
                                 errors.ECODE_NORES)
9074
      result = ial.result
9075
    return result
9076

    
9077

    
9078
class LUInstanceGrowDisk(LogicalUnit):
9079
  """Grow a disk of an instance.
9080

9081
  """
9082
  HPATH = "disk-grow"
9083
  HTYPE = constants.HTYPE_INSTANCE
9084
  REQ_BGL = False
9085

    
9086
  def ExpandNames(self):
9087
    self._ExpandAndLockInstance()
9088
    self.needed_locks[locking.LEVEL_NODE] = []
9089
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9090

    
9091
  def DeclareLocks(self, level):
9092
    if level == locking.LEVEL_NODE:
9093
      self._LockInstancesNodes()
9094

    
9095
  def BuildHooksEnv(self):
9096
    """Build hooks env.
9097

9098
    This runs on the master, the primary and all the secondaries.
9099

9100
    """
9101
    env = {
9102
      "DISK": self.op.disk,
9103
      "AMOUNT": self.op.amount,
9104
      }
9105
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9106
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9107
    return env, nl, nl
9108

    
9109
  def CheckPrereq(self):
9110
    """Check prerequisites.
9111

9112
    This checks that the instance is in the cluster.
9113

9114
    """
9115
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9116
    assert instance is not None, \
9117
      "Cannot retrieve locked instance %s" % self.op.instance_name
9118
    nodenames = list(instance.all_nodes)
9119
    for node in nodenames:
9120
      _CheckNodeOnline(self, node)
9121

    
9122
    self.instance = instance
9123

    
9124
    if instance.disk_template not in constants.DTS_GROWABLE:
9125
      raise errors.OpPrereqError("Instance's disk layout does not support"
9126
                                 " growing.", errors.ECODE_INVAL)
9127

    
9128
    self.disk = instance.FindDisk(self.op.disk)
9129

    
9130
    if instance.disk_template not in (constants.DT_FILE,
9131
                                      constants.DT_SHARED_FILE):
9132
      # TODO: check the free disk space for file, when that feature will be
9133
      # supported
9134
      _CheckNodesFreeDiskPerVG(self, nodenames,
9135
                               self.disk.ComputeGrowth(self.op.amount))
9136

    
9137
  def Exec(self, feedback_fn):
9138
    """Execute disk grow.
9139

9140
    """
9141
    instance = self.instance
9142
    disk = self.disk
9143

    
9144
    disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9145
    if not disks_ok:
9146
      raise errors.OpExecError("Cannot activate block device to grow")
9147

    
9148
    for node in instance.all_nodes:
9149
      self.cfg.SetDiskID(disk, node)
9150
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9151
      result.Raise("Grow request failed to node %s" % node)
9152

    
9153
      # TODO: Rewrite code to work properly
9154
      # DRBD goes into sync mode for a short amount of time after executing the
9155
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9156
      # calling "resize" in sync mode fails. Sleeping for a short amount of
9157
      # time is a work-around.
9158
      time.sleep(5)
9159

    
9160
    disk.RecordGrow(self.op.amount)
9161
    self.cfg.Update(instance, feedback_fn)
9162
    if self.op.wait_for_sync:
9163
      disk_abort = not _WaitForSync(self, instance, disks=[disk])
9164
      if disk_abort:
9165
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9166
                             " status.\nPlease check the instance.")
9167
      if not instance.admin_up:
9168
        _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9169
    elif not instance.admin_up:
9170
      self.proc.LogWarning("Not shutting down the disk even if the instance is"
9171
                           " not supposed to be running because no wait for"
9172
                           " sync mode was requested.")
9173

    
9174

    
9175
class LUInstanceQueryData(NoHooksLU):
9176
  """Query runtime instance data.
9177

9178
  """
9179
  REQ_BGL = False
9180

    
9181
  def ExpandNames(self):
9182
    self.needed_locks = {}
9183
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9184

    
9185
    if self.op.instances:
9186
      self.wanted_names = []
9187
      for name in self.op.instances:
9188
        full_name = _ExpandInstanceName(self.cfg, name)
9189
        self.wanted_names.append(full_name)
9190
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9191
    else:
9192
      self.wanted_names = None
9193
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9194

    
9195
    self.needed_locks[locking.LEVEL_NODE] = []
9196
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9197

    
9198
  def DeclareLocks(self, level):
9199
    if level == locking.LEVEL_NODE:
9200
      self._LockInstancesNodes()
9201

    
9202
  def CheckPrereq(self):
9203
    """Check prerequisites.
9204

9205
    This only checks the optional instance list against the existing names.
9206

9207
    """
9208
    if self.wanted_names is None:
9209
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9210

    
9211
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
9212
                             in self.wanted_names]
9213

    
9214
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
9215
    """Returns the status of a block device
9216

9217
    """
9218
    if self.op.static or not node:
9219
      return None
9220

    
9221
    self.cfg.SetDiskID(dev, node)
9222

    
9223
    result = self.rpc.call_blockdev_find(node, dev)
9224
    if result.offline:
9225
      return None
9226

    
9227
    result.Raise("Can't compute disk status for %s" % instance_name)
9228

    
9229
    status = result.payload
9230
    if status is None:
9231
      return None
9232

    
9233
    return (status.dev_path, status.major, status.minor,
9234
            status.sync_percent, status.estimated_time,
9235
            status.is_degraded, status.ldisk_status)
9236

    
9237
  def _ComputeDiskStatus(self, instance, snode, dev):
9238
    """Compute block device status.
9239

9240
    """
9241
    if dev.dev_type in constants.LDS_DRBD:
9242
      # we change the snode then (otherwise we use the one passed in)
9243
      if dev.logical_id[0] == instance.primary_node:
9244
        snode = dev.logical_id[1]
9245
      else:
9246
        snode = dev.logical_id[0]
9247

    
9248
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9249
                                              instance.name, dev)
9250
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9251

    
9252
    if dev.children:
9253
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
9254
                      for child in dev.children]
9255
    else:
9256
      dev_children = []
9257

    
9258
    data = {
9259
      "iv_name": dev.iv_name,
9260
      "dev_type": dev.dev_type,
9261
      "logical_id": dev.logical_id,
9262
      "physical_id": dev.physical_id,
9263
      "pstatus": dev_pstatus,
9264
      "sstatus": dev_sstatus,
9265
      "children": dev_children,
9266
      "mode": dev.mode,
9267
      "size": dev.size,
9268
      }
9269

    
9270
    return data
9271

    
9272
  def Exec(self, feedback_fn):
9273
    """Gather and return data"""
9274
    result = {}
9275

    
9276
    cluster = self.cfg.GetClusterInfo()
9277

    
9278
    for instance in self.wanted_instances:
9279
      if not self.op.static:
9280
        remote_info = self.rpc.call_instance_info(instance.primary_node,
9281
                                                  instance.name,
9282
                                                  instance.hypervisor)
9283
        remote_info.Raise("Error checking node %s" % instance.primary_node)
9284
        remote_info = remote_info.payload
9285
        if remote_info and "state" in remote_info:
9286
          remote_state = "up"
9287
        else:
9288
          remote_state = "down"
9289
      else:
9290
        remote_state = None
9291
      if instance.admin_up:
9292
        config_state = "up"
9293
      else:
9294
        config_state = "down"
9295

    
9296
      disks = [self._ComputeDiskStatus(instance, None, device)
9297
               for device in instance.disks]
9298

    
9299
      idict = {
9300
        "name": instance.name,
9301
        "config_state": config_state,
9302
        "run_state": remote_state,
9303
        "pnode": instance.primary_node,
9304
        "snodes": instance.secondary_nodes,
9305
        "os": instance.os,
9306
        # this happens to be the same format used for hooks
9307
        "nics": _NICListToTuple(self, instance.nics),
9308
        "disk_template": instance.disk_template,
9309
        "disks": disks,
9310
        "hypervisor": instance.hypervisor,
9311
        "network_port": instance.network_port,
9312
        "hv_instance": instance.hvparams,
9313
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
9314
        "be_instance": instance.beparams,
9315
        "be_actual": cluster.FillBE(instance),
9316
        "os_instance": instance.osparams,
9317
        "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9318
        "serial_no": instance.serial_no,
9319
        "mtime": instance.mtime,
9320
        "ctime": instance.ctime,
9321
        "uuid": instance.uuid,
9322
        }
9323

    
9324
      result[instance.name] = idict
9325

    
9326
    return result
9327

    
9328

    
9329
class LUInstanceSetParams(LogicalUnit):
9330
  """Modifies an instances's parameters.
9331

9332
  """
9333
  HPATH = "instance-modify"
9334
  HTYPE = constants.HTYPE_INSTANCE
9335
  REQ_BGL = False
9336

    
9337
  def CheckArguments(self):
9338
    if not (self.op.nics or self.op.disks or self.op.disk_template or
9339
            self.op.hvparams or self.op.beparams or self.op.os_name):
9340
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9341

    
9342
    if self.op.hvparams:
9343
      _CheckGlobalHvParams(self.op.hvparams)
9344

    
9345
    # Disk validation
9346
    disk_addremove = 0
9347
    for disk_op, disk_dict in self.op.disks:
9348
      utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9349
      if disk_op == constants.DDM_REMOVE:
9350
        disk_addremove += 1
9351
        continue
9352
      elif disk_op == constants.DDM_ADD:
9353
        disk_addremove += 1
9354
      else:
9355
        if not isinstance(disk_op, int):
9356
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9357
        if not isinstance(disk_dict, dict):
9358
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9359
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9360

    
9361
      if disk_op == constants.DDM_ADD:
9362
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9363
        if mode not in constants.DISK_ACCESS_SET:
9364
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9365
                                     errors.ECODE_INVAL)
9366
        size = disk_dict.get('size', None)
9367
        if size is None:
9368
          raise errors.OpPrereqError("Required disk parameter size missing",
9369
                                     errors.ECODE_INVAL)
9370
        try:
9371
          size = int(size)
9372
        except (TypeError, ValueError), err:
9373
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9374
                                     str(err), errors.ECODE_INVAL)
9375
        disk_dict['size'] = size
9376
      else:
9377
        # modification of disk
9378
        if 'size' in disk_dict:
9379
          raise errors.OpPrereqError("Disk size change not possible, use"
9380
                                     " grow-disk", errors.ECODE_INVAL)
9381

    
9382
    if disk_addremove > 1:
9383
      raise errors.OpPrereqError("Only one disk add or remove operation"
9384
                                 " supported at a time", errors.ECODE_INVAL)
9385

    
9386
    if self.op.disks and self.op.disk_template is not None:
9387
      raise errors.OpPrereqError("Disk template conversion and other disk"
9388
                                 " changes not supported at the same time",
9389
                                 errors.ECODE_INVAL)
9390

    
9391
    if (self.op.disk_template and
9392
        self.op.disk_template in constants.DTS_INT_MIRROR and
9393
        self.op.remote_node is None):
9394
      raise errors.OpPrereqError("Changing the disk template to a mirrored"
9395
                                 " one requires specifying a secondary node",
9396
                                 errors.ECODE_INVAL)
9397

    
9398
    # NIC validation
9399
    nic_addremove = 0
9400
    for nic_op, nic_dict in self.op.nics:
9401
      utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9402
      if nic_op == constants.DDM_REMOVE:
9403
        nic_addremove += 1
9404
        continue
9405
      elif nic_op == constants.DDM_ADD:
9406
        nic_addremove += 1
9407
      else:
9408
        if not isinstance(nic_op, int):
9409
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9410
        if not isinstance(nic_dict, dict):
9411
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9412
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9413

    
9414
      # nic_dict should be a dict
9415
      nic_ip = nic_dict.get('ip', None)
9416
      if nic_ip is not None:
9417
        if nic_ip.lower() == constants.VALUE_NONE:
9418
          nic_dict['ip'] = None
9419
        else:
9420
          if not netutils.IPAddress.IsValid(nic_ip):
9421
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9422
                                       errors.ECODE_INVAL)
9423

    
9424
      nic_bridge = nic_dict.get('bridge', None)
9425
      nic_link = nic_dict.get('link', None)
9426
      if nic_bridge and nic_link:
9427
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9428
                                   " at the same time", errors.ECODE_INVAL)
9429
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9430
        nic_dict['bridge'] = None
9431
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9432
        nic_dict['link'] = None
9433

    
9434
      if nic_op == constants.DDM_ADD:
9435
        nic_mac = nic_dict.get('mac', None)
9436
        if nic_mac is None:
9437
          nic_dict['mac'] = constants.VALUE_AUTO
9438

    
9439
      if 'mac' in nic_dict:
9440
        nic_mac = nic_dict['mac']
9441
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9442
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9443

    
9444
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9445
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9446
                                     " modifying an existing nic",
9447
                                     errors.ECODE_INVAL)
9448

    
9449
    if nic_addremove > 1:
9450
      raise errors.OpPrereqError("Only one NIC add or remove operation"
9451
                                 " supported at a time", errors.ECODE_INVAL)
9452

    
9453
  def ExpandNames(self):
9454
    self._ExpandAndLockInstance()
9455
    self.needed_locks[locking.LEVEL_NODE] = []
9456
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9457

    
9458
  def DeclareLocks(self, level):
9459
    if level == locking.LEVEL_NODE:
9460
      self._LockInstancesNodes()
9461
      if self.op.disk_template and self.op.remote_node:
9462
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9463
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9464

    
9465
  def BuildHooksEnv(self):
9466
    """Build hooks env.
9467

9468
    This runs on the master, primary and secondaries.
9469

9470
    """
9471
    args = dict()
9472
    if constants.BE_MEMORY in self.be_new:
9473
      args['memory'] = self.be_new[constants.BE_MEMORY]
9474
    if constants.BE_VCPUS in self.be_new:
9475
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
9476
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9477
    # information at all.
9478
    if self.op.nics:
9479
      args['nics'] = []
9480
      nic_override = dict(self.op.nics)
9481
      for idx, nic in enumerate(self.instance.nics):
9482
        if idx in nic_override:
9483
          this_nic_override = nic_override[idx]
9484
        else:
9485
          this_nic_override = {}
9486
        if 'ip' in this_nic_override:
9487
          ip = this_nic_override['ip']
9488
        else:
9489
          ip = nic.ip
9490
        if 'mac' in this_nic_override:
9491
          mac = this_nic_override['mac']
9492
        else:
9493
          mac = nic.mac
9494
        if idx in self.nic_pnew:
9495
          nicparams = self.nic_pnew[idx]
9496
        else:
9497
          nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9498
        mode = nicparams[constants.NIC_MODE]
9499
        link = nicparams[constants.NIC_LINK]
9500
        args['nics'].append((ip, mac, mode, link))
9501
      if constants.DDM_ADD in nic_override:
9502
        ip = nic_override[constants.DDM_ADD].get('ip', None)
9503
        mac = nic_override[constants.DDM_ADD]['mac']
9504
        nicparams = self.nic_pnew[constants.DDM_ADD]
9505
        mode = nicparams[constants.NIC_MODE]
9506
        link = nicparams[constants.NIC_LINK]
9507
        args['nics'].append((ip, mac, mode, link))
9508
      elif constants.DDM_REMOVE in nic_override:
9509
        del args['nics'][-1]
9510

    
9511
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9512
    if self.op.disk_template:
9513
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9514
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9515
    return env, nl, nl
9516

    
9517
  def CheckPrereq(self):
9518
    """Check prerequisites.
9519

9520
    This only checks the instance list against the existing names.
9521

9522
    """
9523
    # checking the new params on the primary/secondary nodes
9524

    
9525
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9526
    cluster = self.cluster = self.cfg.GetClusterInfo()
9527
    assert self.instance is not None, \
9528
      "Cannot retrieve locked instance %s" % self.op.instance_name
9529
    pnode = instance.primary_node
9530
    nodelist = list(instance.all_nodes)
9531

    
9532
    # OS change
9533
    if self.op.os_name and not self.op.force:
9534
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9535
                      self.op.force_variant)
9536
      instance_os = self.op.os_name
9537
    else:
9538
      instance_os = instance.os
9539

    
9540
    if self.op.disk_template:
9541
      if instance.disk_template == self.op.disk_template:
9542
        raise errors.OpPrereqError("Instance already has disk template %s" %
9543
                                   instance.disk_template, errors.ECODE_INVAL)
9544

    
9545
      if (instance.disk_template,
9546
          self.op.disk_template) not in self._DISK_CONVERSIONS:
9547
        raise errors.OpPrereqError("Unsupported disk template conversion from"
9548
                                   " %s to %s" % (instance.disk_template,
9549
                                                  self.op.disk_template),
9550
                                   errors.ECODE_INVAL)
9551
      _CheckInstanceDown(self, instance, "cannot change disk template")
9552
      if self.op.disk_template in constants.DTS_INT_MIRROR:
9553
        if self.op.remote_node == pnode:
9554
          raise errors.OpPrereqError("Given new secondary node %s is the same"
9555
                                     " as the primary node of the instance" %
9556
                                     self.op.remote_node, errors.ECODE_STATE)
9557
        _CheckNodeOnline(self, self.op.remote_node)
9558
        _CheckNodeNotDrained(self, self.op.remote_node)
9559
        # FIXME: here we assume that the old instance type is DT_PLAIN
9560
        assert instance.disk_template == constants.DT_PLAIN
9561
        disks = [{"size": d.size, "vg": d.logical_id[0]}
9562
                 for d in instance.disks]
9563
        required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9564
        _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9565

    
9566
    # hvparams processing
9567
    if self.op.hvparams:
9568
      hv_type = instance.hypervisor
9569
      i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9570
      utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9571
      hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9572

    
9573
      # local check
9574
      hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9575
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9576
      self.hv_new = hv_new # the new actual values
9577
      self.hv_inst = i_hvdict # the new dict (without defaults)
9578
    else:
9579
      self.hv_new = self.hv_inst = {}
9580

    
9581
    # beparams processing
9582
    if self.op.beparams:
9583
      i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9584
                                   use_none=True)
9585
      utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9586
      be_new = cluster.SimpleFillBE(i_bedict)
9587
      self.be_new = be_new # the new actual values
9588
      self.be_inst = i_bedict # the new dict (without defaults)
9589
    else:
9590
      self.be_new = self.be_inst = {}
9591

    
9592
    # osparams processing
9593
    if self.op.osparams:
9594
      i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9595
      _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9596
      self.os_inst = i_osdict # the new dict (without defaults)
9597
    else:
9598
      self.os_inst = {}
9599

    
9600
    self.warn = []
9601

    
9602
    if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9603
      mem_check_list = [pnode]
9604
      if be_new[constants.BE_AUTO_BALANCE]:
9605
        # either we changed auto_balance to yes or it was from before
9606
        mem_check_list.extend(instance.secondary_nodes)
9607
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
9608
                                                  instance.hypervisor)
9609
      nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9610
                                         instance.hypervisor)
9611
      pninfo = nodeinfo[pnode]
9612
      msg = pninfo.fail_msg
9613
      if msg:
9614
        # Assume the primary node is unreachable and go ahead
9615
        self.warn.append("Can't get info from primary node %s: %s" %
9616
                         (pnode,  msg))
9617
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
9618
        self.warn.append("Node data from primary node %s doesn't contain"
9619
                         " free memory information" % pnode)
9620
      elif instance_info.fail_msg:
9621
        self.warn.append("Can't get instance runtime information: %s" %
9622
                        instance_info.fail_msg)
9623
      else:
9624
        if instance_info.payload:
9625
          current_mem = int(instance_info.payload['memory'])
9626
        else:
9627
          # Assume instance not running
9628
          # (there is a slight race condition here, but it's not very probable,
9629
          # and we have no other way to check)
9630
          current_mem = 0
9631
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9632
                    pninfo.payload['memory_free'])
9633
        if miss_mem > 0:
9634
          raise errors.OpPrereqError("This change will prevent the instance"
9635
                                     " from starting, due to %d MB of memory"
9636
                                     " missing on its primary node" % miss_mem,
9637
                                     errors.ECODE_NORES)
9638

    
9639
      if be_new[constants.BE_AUTO_BALANCE]:
9640
        for node, nres in nodeinfo.items():
9641
          if node not in instance.secondary_nodes:
9642
            continue
9643
          msg = nres.fail_msg
9644
          if msg:
9645
            self.warn.append("Can't get info from secondary node %s: %s" %
9646
                             (node, msg))
9647
          elif not isinstance(nres.payload.get('memory_free', None), int):
9648
            self.warn.append("Secondary node %s didn't return free"
9649
                             " memory information" % node)
9650
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9651
            self.warn.append("Not enough memory to failover instance to"
9652
                             " secondary node %s" % node)
9653

    
9654
    # NIC processing
9655
    self.nic_pnew = {}
9656
    self.nic_pinst = {}
9657
    for nic_op, nic_dict in self.op.nics:
9658
      if nic_op == constants.DDM_REMOVE:
9659
        if not instance.nics:
9660
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9661
                                     errors.ECODE_INVAL)
9662
        continue
9663
      if nic_op != constants.DDM_ADD:
9664
        # an existing nic
9665
        if not instance.nics:
9666
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9667
                                     " no NICs" % nic_op,
9668
                                     errors.ECODE_INVAL)
9669
        if nic_op < 0 or nic_op >= len(instance.nics):
9670
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9671
                                     " are 0 to %d" %
9672
                                     (nic_op, len(instance.nics) - 1),
9673
                                     errors.ECODE_INVAL)
9674
        old_nic_params = instance.nics[nic_op].nicparams
9675
        old_nic_ip = instance.nics[nic_op].ip
9676
      else:
9677
        old_nic_params = {}
9678
        old_nic_ip = None
9679

    
9680
      update_params_dict = dict([(key, nic_dict[key])
9681
                                 for key in constants.NICS_PARAMETERS
9682
                                 if key in nic_dict])
9683

    
9684
      if 'bridge' in nic_dict:
9685
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9686

    
9687
      new_nic_params = _GetUpdatedParams(old_nic_params,
9688
                                         update_params_dict)
9689
      utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9690
      new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9691
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9692
      self.nic_pinst[nic_op] = new_nic_params
9693
      self.nic_pnew[nic_op] = new_filled_nic_params
9694
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9695

    
9696
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
9697
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9698
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9699
        if msg:
9700
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9701
          if self.op.force:
9702
            self.warn.append(msg)
9703
          else:
9704
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9705
      if new_nic_mode == constants.NIC_MODE_ROUTED:
9706
        if 'ip' in nic_dict:
9707
          nic_ip = nic_dict['ip']
9708
        else:
9709
          nic_ip = old_nic_ip
9710
        if nic_ip is None:
9711
          raise errors.OpPrereqError('Cannot set the nic ip to None'
9712
                                     ' on a routed nic', errors.ECODE_INVAL)
9713
      if 'mac' in nic_dict:
9714
        nic_mac = nic_dict['mac']
9715
        if nic_mac is None:
9716
          raise errors.OpPrereqError('Cannot set the nic mac to None',
9717
                                     errors.ECODE_INVAL)
9718
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9719
          # otherwise generate the mac
9720
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9721
        else:
9722
          # or validate/reserve the current one
9723
          try:
9724
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9725
          except errors.ReservationError:
9726
            raise errors.OpPrereqError("MAC address %s already in use"
9727
                                       " in cluster" % nic_mac,
9728
                                       errors.ECODE_NOTUNIQUE)
9729

    
9730
    # DISK processing
9731
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9732
      raise errors.OpPrereqError("Disk operations not supported for"
9733
                                 " diskless instances",
9734
                                 errors.ECODE_INVAL)
9735
    for disk_op, _ in self.op.disks:
9736
      if disk_op == constants.DDM_REMOVE:
9737
        if len(instance.disks) == 1:
9738
          raise errors.OpPrereqError("Cannot remove the last disk of"
9739
                                     " an instance", errors.ECODE_INVAL)
9740
        _CheckInstanceDown(self, instance, "cannot remove disks")
9741

    
9742
      if (disk_op == constants.DDM_ADD and
9743
          len(instance.disks) >= constants.MAX_DISKS):
9744
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9745
                                   " add more" % constants.MAX_DISKS,
9746
                                   errors.ECODE_STATE)
9747
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9748
        # an existing disk
9749
        if disk_op < 0 or disk_op >= len(instance.disks):
9750
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
9751
                                     " are 0 to %d" %
9752
                                     (disk_op, len(instance.disks)),
9753
                                     errors.ECODE_INVAL)
9754

    
9755
    return
9756

    
9757
  def _ConvertPlainToDrbd(self, feedback_fn):
9758
    """Converts an instance from plain to drbd.
9759

9760
    """
9761
    feedback_fn("Converting template to drbd")
9762
    instance = self.instance
9763
    pnode = instance.primary_node
9764
    snode = self.op.remote_node
9765

    
9766
    # create a fake disk info for _GenerateDiskTemplate
9767
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9768
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9769
                                      instance.name, pnode, [snode],
9770
                                      disk_info, None, None, 0, feedback_fn)
9771
    info = _GetInstanceInfoText(instance)
9772
    feedback_fn("Creating aditional volumes...")
9773
    # first, create the missing data and meta devices
9774
    for disk in new_disks:
9775
      # unfortunately this is... not too nice
9776
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9777
                            info, True)
9778
      for child in disk.children:
9779
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
9780
    # at this stage, all new LVs have been created, we can rename the
9781
    # old ones
9782
    feedback_fn("Renaming original volumes...")
9783
    rename_list = [(o, n.children[0].logical_id)
9784
                   for (o, n) in zip(instance.disks, new_disks)]
9785
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
9786
    result.Raise("Failed to rename original LVs")
9787

    
9788
    feedback_fn("Initializing DRBD devices...")
9789
    # all child devices are in place, we can now create the DRBD devices
9790
    for disk in new_disks:
9791
      for node in [pnode, snode]:
9792
        f_create = node == pnode
9793
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9794

    
9795
    # at this point, the instance has been modified
9796
    instance.disk_template = constants.DT_DRBD8
9797
    instance.disks = new_disks
9798
    self.cfg.Update(instance, feedback_fn)
9799

    
9800
    # disks are created, waiting for sync
9801
    disk_abort = not _WaitForSync(self, instance)
9802
    if disk_abort:
9803
      raise errors.OpExecError("There are some degraded disks for"
9804
                               " this instance, please cleanup manually")
9805

    
9806
  def _ConvertDrbdToPlain(self, feedback_fn):
9807
    """Converts an instance from drbd to plain.
9808

9809
    """
9810
    instance = self.instance
9811
    assert len(instance.secondary_nodes) == 1
9812
    pnode = instance.primary_node
9813
    snode = instance.secondary_nodes[0]
9814
    feedback_fn("Converting template to plain")
9815

    
9816
    old_disks = instance.disks
9817
    new_disks = [d.children[0] for d in old_disks]
9818

    
9819
    # copy over size and mode
9820
    for parent, child in zip(old_disks, new_disks):
9821
      child.size = parent.size
9822
      child.mode = parent.mode
9823

    
9824
    # update instance structure
9825
    instance.disks = new_disks
9826
    instance.disk_template = constants.DT_PLAIN
9827
    self.cfg.Update(instance, feedback_fn)
9828

    
9829
    feedback_fn("Removing volumes on the secondary node...")
9830
    for disk in old_disks:
9831
      self.cfg.SetDiskID(disk, snode)
9832
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9833
      if msg:
9834
        self.LogWarning("Could not remove block device %s on node %s,"
9835
                        " continuing anyway: %s", disk.iv_name, snode, msg)
9836

    
9837
    feedback_fn("Removing unneeded volumes on the primary node...")
9838
    for idx, disk in enumerate(old_disks):
9839
      meta = disk.children[1]
9840
      self.cfg.SetDiskID(meta, pnode)
9841
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9842
      if msg:
9843
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
9844
                        " continuing anyway: %s", idx, pnode, msg)
9845

    
9846
  def Exec(self, feedback_fn):
9847
    """Modifies an instance.
9848

9849
    All parameters take effect only at the next restart of the instance.
9850

9851
    """
9852
    # Process here the warnings from CheckPrereq, as we don't have a
9853
    # feedback_fn there.
9854
    for warn in self.warn:
9855
      feedback_fn("WARNING: %s" % warn)
9856

    
9857
    result = []
9858
    instance = self.instance
9859
    # disk changes
9860
    for disk_op, disk_dict in self.op.disks:
9861
      if disk_op == constants.DDM_REMOVE:
9862
        # remove the last disk
9863
        device = instance.disks.pop()
9864
        device_idx = len(instance.disks)
9865
        for node, disk in device.ComputeNodeTree(instance.primary_node):
9866
          self.cfg.SetDiskID(disk, node)
9867
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9868
          if msg:
9869
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
9870
                            " continuing anyway", device_idx, node, msg)
9871
        result.append(("disk/%d" % device_idx, "remove"))
9872
      elif disk_op == constants.DDM_ADD:
9873
        # add a new disk
9874
        if instance.disk_template in (constants.DT_FILE,
9875
                                        constants.DT_SHARED_FILE):
9876
          file_driver, file_path = instance.disks[0].logical_id
9877
          file_path = os.path.dirname(file_path)
9878
        else:
9879
          file_driver = file_path = None
9880
        disk_idx_base = len(instance.disks)
9881
        new_disk = _GenerateDiskTemplate(self,
9882
                                         instance.disk_template,
9883
                                         instance.name, instance.primary_node,
9884
                                         instance.secondary_nodes,
9885
                                         [disk_dict],
9886
                                         file_path,
9887
                                         file_driver,
9888
                                         disk_idx_base, feedback_fn)[0]
9889
        instance.disks.append(new_disk)
9890
        info = _GetInstanceInfoText(instance)
9891

    
9892
        logging.info("Creating volume %s for instance %s",
9893
                     new_disk.iv_name, instance.name)
9894
        # Note: this needs to be kept in sync with _CreateDisks
9895
        #HARDCODE
9896
        for node in instance.all_nodes:
9897
          f_create = node == instance.primary_node
9898
          try:
9899
            _CreateBlockDev(self, node, instance, new_disk,
9900
                            f_create, info, f_create)
9901
          except errors.OpExecError, err:
9902
            self.LogWarning("Failed to create volume %s (%s) on"
9903
                            " node %s: %s",
9904
                            new_disk.iv_name, new_disk, node, err)
9905
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9906
                       (new_disk.size, new_disk.mode)))
9907
      else:
9908
        # change a given disk
9909
        instance.disks[disk_op].mode = disk_dict['mode']
9910
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9911

    
9912
    if self.op.disk_template:
9913
      r_shut = _ShutdownInstanceDisks(self, instance)
9914
      if not r_shut:
9915
        raise errors.OpExecError("Cannot shutdown instance disks, unable to"
9916
                                 " proceed with disk template conversion")
9917
      mode = (instance.disk_template, self.op.disk_template)
9918
      try:
9919
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
9920
      except:
9921
        self.cfg.ReleaseDRBDMinors(instance.name)
9922
        raise
9923
      result.append(("disk_template", self.op.disk_template))
9924

    
9925
    # NIC changes
9926
    for nic_op, nic_dict in self.op.nics:
9927
      if nic_op == constants.DDM_REMOVE:
9928
        # remove the last nic
9929
        del instance.nics[-1]
9930
        result.append(("nic.%d" % len(instance.nics), "remove"))
9931
      elif nic_op == constants.DDM_ADD:
9932
        # mac and bridge should be set, by now
9933
        mac = nic_dict['mac']
9934
        ip = nic_dict.get('ip', None)
9935
        nicparams = self.nic_pinst[constants.DDM_ADD]
9936
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9937
        instance.nics.append(new_nic)
9938
        result.append(("nic.%d" % (len(instance.nics) - 1),
9939
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
9940
                       (new_nic.mac, new_nic.ip,
9941
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9942
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9943
                       )))
9944
      else:
9945
        for key in 'mac', 'ip':
9946
          if key in nic_dict:
9947
            setattr(instance.nics[nic_op], key, nic_dict[key])
9948
        if nic_op in self.nic_pinst:
9949
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9950
        for key, val in nic_dict.iteritems():
9951
          result.append(("nic.%s/%d" % (key, nic_op), val))
9952

    
9953
    # hvparams changes
9954
    if self.op.hvparams:
9955
      instance.hvparams = self.hv_inst
9956
      for key, val in self.op.hvparams.iteritems():
9957
        result.append(("hv/%s" % key, val))
9958

    
9959
    # beparams changes
9960
    if self.op.beparams:
9961
      instance.beparams = self.be_inst
9962
      for key, val in self.op.beparams.iteritems():
9963
        result.append(("be/%s" % key, val))
9964

    
9965
    # OS change
9966
    if self.op.os_name:
9967
      instance.os = self.op.os_name
9968

    
9969
    # osparams changes
9970
    if self.op.osparams:
9971
      instance.osparams = self.os_inst
9972
      for key, val in self.op.osparams.iteritems():
9973
        result.append(("os/%s" % key, val))
9974

    
9975
    self.cfg.Update(instance, feedback_fn)
9976

    
9977
    return result
9978

    
9979
  _DISK_CONVERSIONS = {
9980
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9981
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9982
    }
9983

    
9984

    
9985
class LUBackupQuery(NoHooksLU):
9986
  """Query the exports list
9987

9988
  """
9989
  REQ_BGL = False
9990

    
9991
  def ExpandNames(self):
9992
    self.needed_locks = {}
9993
    self.share_locks[locking.LEVEL_NODE] = 1
9994
    if not self.op.nodes:
9995
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9996
    else:
9997
      self.needed_locks[locking.LEVEL_NODE] = \
9998
        _GetWantedNodes(self, self.op.nodes)
9999

    
10000
  def Exec(self, feedback_fn):
10001
    """Compute the list of all the exported system images.
10002

10003
    @rtype: dict
10004
    @return: a dictionary with the structure node->(export-list)
10005
        where export-list is a list of the instances exported on
10006
        that node.
10007

10008
    """
10009
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10010
    rpcresult = self.rpc.call_export_list(self.nodes)
10011
    result = {}
10012
    for node in rpcresult:
10013
      if rpcresult[node].fail_msg:
10014
        result[node] = False
10015
      else:
10016
        result[node] = rpcresult[node].payload
10017

    
10018
    return result
10019

    
10020

    
10021
class LUBackupPrepare(NoHooksLU):
10022
  """Prepares an instance for an export and returns useful information.
10023

10024
  """
10025
  REQ_BGL = False
10026

    
10027
  def ExpandNames(self):
10028
    self._ExpandAndLockInstance()
10029

    
10030
  def CheckPrereq(self):
10031
    """Check prerequisites.
10032

10033
    """
10034
    instance_name = self.op.instance_name
10035

    
10036
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10037
    assert self.instance is not None, \
10038
          "Cannot retrieve locked instance %s" % self.op.instance_name
10039
    _CheckNodeOnline(self, self.instance.primary_node)
10040

    
10041
    self._cds = _GetClusterDomainSecret()
10042

    
10043
  def Exec(self, feedback_fn):
10044
    """Prepares an instance for an export.
10045

10046
    """
10047
    instance = self.instance
10048

    
10049
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10050
      salt = utils.GenerateSecret(8)
10051

    
10052
      feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10053
      result = self.rpc.call_x509_cert_create(instance.primary_node,
10054
                                              constants.RIE_CERT_VALIDITY)
10055
      result.Raise("Can't create X509 key and certificate on %s" % result.node)
10056

    
10057
      (name, cert_pem) = result.payload
10058

    
10059
      cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10060
                                             cert_pem)
10061

    
10062
      return {
10063
        "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10064
        "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10065
                          salt),
10066
        "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10067
        }
10068

    
10069
    return None
10070

    
10071

    
10072
class LUBackupExport(LogicalUnit):
10073
  """Export an instance to an image in the cluster.
10074

10075
  """
10076
  HPATH = "instance-export"
10077
  HTYPE = constants.HTYPE_INSTANCE
10078
  REQ_BGL = False
10079

    
10080
  def CheckArguments(self):
10081
    """Check the arguments.
10082

10083
    """
10084
    self.x509_key_name = self.op.x509_key_name
10085
    self.dest_x509_ca_pem = self.op.destination_x509_ca
10086

    
10087
    if self.op.mode == constants.EXPORT_MODE_REMOTE:
10088
      if not self.x509_key_name:
10089
        raise errors.OpPrereqError("Missing X509 key name for encryption",
10090
                                   errors.ECODE_INVAL)
10091

    
10092
      if not self.dest_x509_ca_pem:
10093
        raise errors.OpPrereqError("Missing destination X509 CA",
10094
                                   errors.ECODE_INVAL)
10095

    
10096
  def ExpandNames(self):
10097
    self._ExpandAndLockInstance()
10098

    
10099
    # Lock all nodes for local exports
10100
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10101
      # FIXME: lock only instance primary and destination node
10102
      #
10103
      # Sad but true, for now we have do lock all nodes, as we don't know where
10104
      # the previous export might be, and in this LU we search for it and
10105
      # remove it from its current node. In the future we could fix this by:
10106
      #  - making a tasklet to search (share-lock all), then create the
10107
      #    new one, then one to remove, after
10108
      #  - removing the removal operation altogether
10109
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10110

    
10111
  def DeclareLocks(self, level):
10112
    """Last minute lock declaration."""
10113
    # All nodes are locked anyway, so nothing to do here.
10114

    
10115
  def BuildHooksEnv(self):
10116
    """Build hooks env.
10117

10118
    This will run on the master, primary node and target node.
10119

10120
    """
10121
    env = {
10122
      "EXPORT_MODE": self.op.mode,
10123
      "EXPORT_NODE": self.op.target_node,
10124
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10125
      "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10126
      # TODO: Generic function for boolean env variables
10127
      "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10128
      }
10129

    
10130
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10131

    
10132
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10133

    
10134
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10135
      nl.append(self.op.target_node)
10136

    
10137
    return env, nl, nl
10138

    
10139
  def CheckPrereq(self):
10140
    """Check prerequisites.
10141

10142
    This checks that the instance and node names are valid.
10143

10144
    """
10145
    instance_name = self.op.instance_name
10146

    
10147
    self.instance = self.cfg.GetInstanceInfo(instance_name)
10148
    assert self.instance is not None, \
10149
          "Cannot retrieve locked instance %s" % self.op.instance_name
10150
    _CheckNodeOnline(self, self.instance.primary_node)
10151

    
10152
    if (self.op.remove_instance and self.instance.admin_up and
10153
        not self.op.shutdown):
10154
      raise errors.OpPrereqError("Can not remove instance without shutting it"
10155
                                 " down before")
10156

    
10157
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10158
      self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10159
      self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10160
      assert self.dst_node is not None
10161

    
10162
      _CheckNodeOnline(self, self.dst_node.name)
10163
      _CheckNodeNotDrained(self, self.dst_node.name)
10164

    
10165
      self._cds = None
10166
      self.dest_disk_info = None
10167
      self.dest_x509_ca = None
10168

    
10169
    elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10170
      self.dst_node = None
10171

    
10172
      if len(self.op.target_node) != len(self.instance.disks):
10173
        raise errors.OpPrereqError(("Received destination information for %s"
10174
                                    " disks, but instance %s has %s disks") %
10175
                                   (len(self.op.target_node), instance_name,
10176
                                    len(self.instance.disks)),
10177
                                   errors.ECODE_INVAL)
10178

    
10179
      cds = _GetClusterDomainSecret()
10180

    
10181
      # Check X509 key name
10182
      try:
10183
        (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10184
      except (TypeError, ValueError), err:
10185
        raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10186

    
10187
      if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10188
        raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10189
                                   errors.ECODE_INVAL)
10190

    
10191
      # Load and verify CA
10192
      try:
10193
        (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10194
      except OpenSSL.crypto.Error, err:
10195
        raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10196
                                   (err, ), errors.ECODE_INVAL)
10197

    
10198
      (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10199
      if errcode is not None:
10200
        raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10201
                                   (msg, ), errors.ECODE_INVAL)
10202

    
10203
      self.dest_x509_ca = cert
10204

    
10205
      # Verify target information
10206
      disk_info = []
10207
      for idx, disk_data in enumerate(self.op.target_node):
10208
        try:
10209
          (host, port, magic) = \
10210
            masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10211
        except errors.GenericError, err:
10212
          raise errors.OpPrereqError("Target info for disk %s: %s" %
10213
                                     (idx, err), errors.ECODE_INVAL)
10214

    
10215
        disk_info.append((host, port, magic))
10216

    
10217
      assert len(disk_info) == len(self.op.target_node)
10218
      self.dest_disk_info = disk_info
10219

    
10220
    else:
10221
      raise errors.ProgrammerError("Unhandled export mode %r" %
10222
                                   self.op.mode)
10223

    
10224
    # instance disk type verification
10225
    # TODO: Implement export support for file-based disks
10226
    for disk in self.instance.disks:
10227
      if disk.dev_type == constants.LD_FILE:
10228
        raise errors.OpPrereqError("Export not supported for instances with"
10229
                                   " file-based disks", errors.ECODE_INVAL)
10230

    
10231
  def _CleanupExports(self, feedback_fn):
10232
    """Removes exports of current instance from all other nodes.
10233

10234
    If an instance in a cluster with nodes A..D was exported to node C, its
10235
    exports will be removed from the nodes A, B and D.
10236

10237
    """
10238
    assert self.op.mode != constants.EXPORT_MODE_REMOTE
10239

    
10240
    nodelist = self.cfg.GetNodeList()
10241
    nodelist.remove(self.dst_node.name)
10242

    
10243
    # on one-node clusters nodelist will be empty after the removal
10244
    # if we proceed the backup would be removed because OpBackupQuery
10245
    # substitutes an empty list with the full cluster node list.
10246
    iname = self.instance.name
10247
    if nodelist:
10248
      feedback_fn("Removing old exports for instance %s" % iname)
10249
      exportlist = self.rpc.call_export_list(nodelist)
10250
      for node in exportlist:
10251
        if exportlist[node].fail_msg:
10252
          continue
10253
        if iname in exportlist[node].payload:
10254
          msg = self.rpc.call_export_remove(node, iname).fail_msg
10255
          if msg:
10256
            self.LogWarning("Could not remove older export for instance %s"
10257
                            " on node %s: %s", iname, node, msg)
10258

    
10259
  def Exec(self, feedback_fn):
10260
    """Export an instance to an image in the cluster.
10261

10262
    """
10263
    assert self.op.mode in constants.EXPORT_MODES
10264

    
10265
    instance = self.instance
10266
    src_node = instance.primary_node
10267

    
10268
    if self.op.shutdown:
10269
      # shutdown the instance, but not the disks
10270
      feedback_fn("Shutting down instance %s" % instance.name)
10271
      result = self.rpc.call_instance_shutdown(src_node, instance,
10272
                                               self.op.shutdown_timeout)
10273
      # TODO: Maybe ignore failures if ignore_remove_failures is set
10274
      result.Raise("Could not shutdown instance %s on"
10275
                   " node %s" % (instance.name, src_node))
10276

    
10277
    # set the disks ID correctly since call_instance_start needs the
10278
    # correct drbd minor to create the symlinks
10279
    for disk in instance.disks:
10280
      self.cfg.SetDiskID(disk, src_node)
10281

    
10282
    activate_disks = (not instance.admin_up)
10283

    
10284
    if activate_disks:
10285
      # Activate the instance disks if we'exporting a stopped instance
10286
      feedback_fn("Activating disks for %s" % instance.name)
10287
      _StartInstanceDisks(self, instance, None)
10288

    
10289
    try:
10290
      helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10291
                                                     instance)
10292

    
10293
      helper.CreateSnapshots()
10294
      try:
10295
        if (self.op.shutdown and instance.admin_up and
10296
            not self.op.remove_instance):
10297
          assert not activate_disks
10298
          feedback_fn("Starting instance %s" % instance.name)
10299
          result = self.rpc.call_instance_start(src_node, instance, None, None)
10300
          msg = result.fail_msg
10301
          if msg:
10302
            feedback_fn("Failed to start instance: %s" % msg)
10303
            _ShutdownInstanceDisks(self, instance)
10304
            raise errors.OpExecError("Could not start instance: %s" % msg)
10305

    
10306
        if self.op.mode == constants.EXPORT_MODE_LOCAL:
10307
          (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10308
        elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10309
          connect_timeout = constants.RIE_CONNECT_TIMEOUT
10310
          timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10311

    
10312
          (key_name, _, _) = self.x509_key_name
10313

    
10314
          dest_ca_pem = \
10315
            OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10316
                                            self.dest_x509_ca)
10317

    
10318
          (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10319
                                                     key_name, dest_ca_pem,
10320
                                                     timeouts)
10321
      finally:
10322
        helper.Cleanup()
10323

    
10324
      # Check for backwards compatibility
10325
      assert len(dresults) == len(instance.disks)
10326
      assert compat.all(isinstance(i, bool) for i in dresults), \
10327
             "Not all results are boolean: %r" % dresults
10328

    
10329
    finally:
10330
      if activate_disks:
10331
        feedback_fn("Deactivating disks for %s" % instance.name)
10332
        _ShutdownInstanceDisks(self, instance)
10333

    
10334
    if not (compat.all(dresults) and fin_resu):
10335
      failures = []
10336
      if not fin_resu:
10337
        failures.append("export finalization")
10338
      if not compat.all(dresults):
10339
        fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10340
                               if not dsk)
10341
        failures.append("disk export: disk(s) %s" % fdsk)
10342

    
10343
      raise errors.OpExecError("Export failed, errors in %s" %
10344
                               utils.CommaJoin(failures))
10345

    
10346
    # At this point, the export was successful, we can cleanup/finish
10347

    
10348
    # Remove instance if requested
10349
    if self.op.remove_instance:
10350
      feedback_fn("Removing instance %s" % instance.name)
10351
      _RemoveInstance(self, feedback_fn, instance,
10352
                      self.op.ignore_remove_failures)
10353

    
10354
    if self.op.mode == constants.EXPORT_MODE_LOCAL:
10355
      self._CleanupExports(feedback_fn)
10356

    
10357
    return fin_resu, dresults
10358

    
10359

    
10360
class LUBackupRemove(NoHooksLU):
10361
  """Remove exports related to the named instance.
10362

10363
  """
10364
  REQ_BGL = False
10365

    
10366
  def ExpandNames(self):
10367
    self.needed_locks = {}
10368
    # We need all nodes to be locked in order for RemoveExport to work, but we
10369
    # don't need to lock the instance itself, as nothing will happen to it (and
10370
    # we can remove exports also for a removed instance)
10371
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10372

    
10373
  def Exec(self, feedback_fn):
10374
    """Remove any export.
10375

10376
    """
10377
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10378
    # If the instance was not found we'll try with the name that was passed in.
10379
    # This will only work if it was an FQDN, though.
10380
    fqdn_warn = False
10381
    if not instance_name:
10382
      fqdn_warn = True
10383
      instance_name = self.op.instance_name
10384

    
10385
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10386
    exportlist = self.rpc.call_export_list(locked_nodes)
10387
    found = False
10388
    for node in exportlist:
10389
      msg = exportlist[node].fail_msg
10390
      if msg:
10391
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10392
        continue
10393
      if instance_name in exportlist[node].payload:
10394
        found = True
10395
        result = self.rpc.call_export_remove(node, instance_name)
10396
        msg = result.fail_msg
10397
        if msg:
10398
          logging.error("Could not remove export for instance %s"
10399
                        " on node %s: %s", instance_name, node, msg)
10400

    
10401
    if fqdn_warn and not found:
10402
      feedback_fn("Export not found. If trying to remove an export belonging"
10403
                  " to a deleted instance please use its Fully Qualified"
10404
                  " Domain Name.")
10405

    
10406

    
10407
class LUGroupAdd(LogicalUnit):
10408
  """Logical unit for creating node groups.
10409

10410
  """
10411
  HPATH = "group-add"
10412
  HTYPE = constants.HTYPE_GROUP
10413
  REQ_BGL = False
10414

    
10415
  def ExpandNames(self):
10416
    # We need the new group's UUID here so that we can create and acquire the
10417
    # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10418
    # that it should not check whether the UUID exists in the configuration.
10419
    self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10420
    self.needed_locks = {}
10421
    self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10422

    
10423
  def CheckPrereq(self):
10424
    """Check prerequisites.
10425

10426
    This checks that the given group name is not an existing node group
10427
    already.
10428

10429
    """
10430
    try:
10431
      existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10432
    except errors.OpPrereqError:
10433
      pass
10434
    else:
10435
      raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10436
                                 " node group (UUID: %s)" %
10437
                                 (self.op.group_name, existing_uuid),
10438
                                 errors.ECODE_EXISTS)
10439

    
10440
    if self.op.ndparams:
10441
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10442

    
10443
  def BuildHooksEnv(self):
10444
    """Build hooks env.
10445

10446
    """
10447
    env = {
10448
      "GROUP_NAME": self.op.group_name,
10449
      }
10450
    mn = self.cfg.GetMasterNode()
10451
    return env, [mn], [mn]
10452

    
10453
  def Exec(self, feedback_fn):
10454
    """Add the node group to the cluster.
10455

10456
    """
10457
    group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10458
                                  uuid=self.group_uuid,
10459
                                  alloc_policy=self.op.alloc_policy,
10460
                                  ndparams=self.op.ndparams)
10461

    
10462
    self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10463
    del self.remove_locks[locking.LEVEL_NODEGROUP]
10464

    
10465

    
10466
class LUGroupAssignNodes(NoHooksLU):
10467
  """Logical unit for assigning nodes to groups.
10468

10469
  """
10470
  REQ_BGL = False
10471

    
10472
  def ExpandNames(self):
10473
    # These raise errors.OpPrereqError on their own:
10474
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10475
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10476

    
10477
    # We want to lock all the affected nodes and groups. We have readily
10478
    # available the list of nodes, and the *destination* group. To gather the
10479
    # list of "source" groups, we need to fetch node information.
10480
    self.node_data = self.cfg.GetAllNodesInfo()
10481
    affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10482
    affected_groups.add(self.group_uuid)
10483

    
10484
    self.needed_locks = {
10485
      locking.LEVEL_NODEGROUP: list(affected_groups),
10486
      locking.LEVEL_NODE: self.op.nodes,
10487
      }
10488

    
10489
  def CheckPrereq(self):
10490
    """Check prerequisites.
10491

10492
    """
10493
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10494
    instance_data = self.cfg.GetAllInstancesInfo()
10495

    
10496
    if self.group is None:
10497
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10498
                               (self.op.group_name, self.group_uuid))
10499

    
10500
    (new_splits, previous_splits) = \
10501
      self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10502
                                             for node in self.op.nodes],
10503
                                            self.node_data, instance_data)
10504

    
10505
    if new_splits:
10506
      fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10507

    
10508
      if not self.op.force:
10509
        raise errors.OpExecError("The following instances get split by this"
10510
                                 " change and --force was not given: %s" %
10511
                                 fmt_new_splits)
10512
      else:
10513
        self.LogWarning("This operation will split the following instances: %s",
10514
                        fmt_new_splits)
10515

    
10516
        if previous_splits:
10517
          self.LogWarning("In addition, these already-split instances continue"
10518
                          " to be spit across groups: %s",
10519
                          utils.CommaJoin(utils.NiceSort(previous_splits)))
10520

    
10521
  def Exec(self, feedback_fn):
10522
    """Assign nodes to a new group.
10523

10524
    """
10525
    for node in self.op.nodes:
10526
      self.node_data[node].group = self.group_uuid
10527

    
10528
    self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10529

    
10530
  @staticmethod
10531
  def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10532
    """Check for split instances after a node assignment.
10533

10534
    This method considers a series of node assignments as an atomic operation,
10535
    and returns information about split instances after applying the set of
10536
    changes.
10537

10538
    In particular, it returns information about newly split instances, and
10539
    instances that were already split, and remain so after the change.
10540

10541
    Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10542
    considered.
10543

10544
    @type changes: list of (node_name, new_group_uuid) pairs.
10545
    @param changes: list of node assignments to consider.
10546
    @param node_data: a dict with data for all nodes
10547
    @param instance_data: a dict with all instances to consider
10548
    @rtype: a two-tuple
10549
    @return: a list of instances that were previously okay and result split as a
10550
      consequence of this change, and a list of instances that were previously
10551
      split and this change does not fix.
10552

10553
    """
10554
    changed_nodes = dict((node, group) for node, group in changes
10555
                         if node_data[node].group != group)
10556

    
10557
    all_split_instances = set()
10558
    previously_split_instances = set()
10559

    
10560
    def InstanceNodes(instance):
10561
      return [instance.primary_node] + list(instance.secondary_nodes)
10562

    
10563
    for inst in instance_data.values():
10564
      if inst.disk_template not in constants.DTS_INT_MIRROR:
10565
        continue
10566

    
10567
      instance_nodes = InstanceNodes(inst)
10568

    
10569
      if len(set(node_data[node].group for node in instance_nodes)) > 1:
10570
        previously_split_instances.add(inst.name)
10571

    
10572
      if len(set(changed_nodes.get(node, node_data[node].group)
10573
                 for node in instance_nodes)) > 1:
10574
        all_split_instances.add(inst.name)
10575

    
10576
    return (list(all_split_instances - previously_split_instances),
10577
            list(previously_split_instances & all_split_instances))
10578

    
10579

    
10580
class _GroupQuery(_QueryBase):
10581
  FIELDS = query.GROUP_FIELDS
10582

    
10583
  def ExpandNames(self, lu):
10584
    lu.needed_locks = {}
10585

    
10586
    self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10587
    name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10588

    
10589
    if not self.names:
10590
      self.wanted = [name_to_uuid[name]
10591
                     for name in utils.NiceSort(name_to_uuid.keys())]
10592
    else:
10593
      # Accept names to be either names or UUIDs.
10594
      missing = []
10595
      self.wanted = []
10596
      all_uuid = frozenset(self._all_groups.keys())
10597

    
10598
      for name in self.names:
10599
        if name in all_uuid:
10600
          self.wanted.append(name)
10601
        elif name in name_to_uuid:
10602
          self.wanted.append(name_to_uuid[name])
10603
        else:
10604
          missing.append(name)
10605

    
10606
      if missing:
10607
        raise errors.OpPrereqError("Some groups do not exist: %s" % missing,
10608
                                   errors.ECODE_NOENT)
10609

    
10610
  def DeclareLocks(self, lu, level):
10611
    pass
10612

    
10613
  def _GetQueryData(self, lu):
10614
    """Computes the list of node groups and their attributes.
10615

10616
    """
10617
    do_nodes = query.GQ_NODE in self.requested_data
10618
    do_instances = query.GQ_INST in self.requested_data
10619

    
10620
    group_to_nodes = None
10621
    group_to_instances = None
10622

    
10623
    # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10624
    # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10625
    # latter GetAllInstancesInfo() is not enough, for we have to go through
10626
    # instance->node. Hence, we will need to process nodes even if we only need
10627
    # instance information.
10628
    if do_nodes or do_instances:
10629
      all_nodes = lu.cfg.GetAllNodesInfo()
10630
      group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10631
      node_to_group = {}
10632

    
10633
      for node in all_nodes.values():
10634
        if node.group in group_to_nodes:
10635
          group_to_nodes[node.group].append(node.name)
10636
          node_to_group[node.name] = node.group
10637

    
10638
      if do_instances:
10639
        all_instances = lu.cfg.GetAllInstancesInfo()
10640
        group_to_instances = dict((uuid, []) for uuid in self.wanted)
10641

    
10642
        for instance in all_instances.values():
10643
          node = instance.primary_node
10644
          if node in node_to_group:
10645
            group_to_instances[node_to_group[node]].append(instance.name)
10646

    
10647
        if not do_nodes:
10648
          # Do not pass on node information if it was not requested.
10649
          group_to_nodes = None
10650

    
10651
    return query.GroupQueryData([self._all_groups[uuid]
10652
                                 for uuid in self.wanted],
10653
                                group_to_nodes, group_to_instances)
10654

    
10655

    
10656
class LUGroupQuery(NoHooksLU):
10657
  """Logical unit for querying node groups.
10658

10659
  """
10660
  REQ_BGL = False
10661

    
10662
  def CheckArguments(self):
10663
    self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
10664
                          self.op.output_fields, False)
10665

    
10666
  def ExpandNames(self):
10667
    self.gq.ExpandNames(self)
10668

    
10669
  def Exec(self, feedback_fn):
10670
    return self.gq.OldStyleQuery(self)
10671

    
10672

    
10673
class LUGroupSetParams(LogicalUnit):
10674
  """Modifies the parameters of a node group.
10675

10676
  """
10677
  HPATH = "group-modify"
10678
  HTYPE = constants.HTYPE_GROUP
10679
  REQ_BGL = False
10680

    
10681
  def CheckArguments(self):
10682
    all_changes = [
10683
      self.op.ndparams,
10684
      self.op.alloc_policy,
10685
      ]
10686

    
10687
    if all_changes.count(None) == len(all_changes):
10688
      raise errors.OpPrereqError("Please pass at least one modification",
10689
                                 errors.ECODE_INVAL)
10690

    
10691
  def ExpandNames(self):
10692
    # This raises errors.OpPrereqError on its own:
10693
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10694

    
10695
    self.needed_locks = {
10696
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10697
      }
10698

    
10699
  def CheckPrereq(self):
10700
    """Check prerequisites.
10701

10702
    """
10703
    self.group = self.cfg.GetNodeGroup(self.group_uuid)
10704

    
10705
    if self.group is None:
10706
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10707
                               (self.op.group_name, self.group_uuid))
10708

    
10709
    if self.op.ndparams:
10710
      new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
10711
      utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10712
      self.new_ndparams = new_ndparams
10713

    
10714
  def BuildHooksEnv(self):
10715
    """Build hooks env.
10716

10717
    """
10718
    env = {
10719
      "GROUP_NAME": self.op.group_name,
10720
      "NEW_ALLOC_POLICY": self.op.alloc_policy,
10721
      }
10722
    mn = self.cfg.GetMasterNode()
10723
    return env, [mn], [mn]
10724

    
10725
  def Exec(self, feedback_fn):
10726
    """Modifies the node group.
10727

10728
    """
10729
    result = []
10730

    
10731
    if self.op.ndparams:
10732
      self.group.ndparams = self.new_ndparams
10733
      result.append(("ndparams", str(self.group.ndparams)))
10734

    
10735
    if self.op.alloc_policy:
10736
      self.group.alloc_policy = self.op.alloc_policy
10737

    
10738
    self.cfg.Update(self.group, feedback_fn)
10739
    return result
10740

    
10741

    
10742

    
10743
class LUGroupRemove(LogicalUnit):
10744
  HPATH = "group-remove"
10745
  HTYPE = constants.HTYPE_GROUP
10746
  REQ_BGL = False
10747

    
10748
  def ExpandNames(self):
10749
    # This will raises errors.OpPrereqError on its own:
10750
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10751
    self.needed_locks = {
10752
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10753
      }
10754

    
10755
  def CheckPrereq(self):
10756
    """Check prerequisites.
10757

10758
    This checks that the given group name exists as a node group, that is
10759
    empty (i.e., contains no nodes), and that is not the last group of the
10760
    cluster.
10761

10762
    """
10763
    # Verify that the group is empty.
10764
    group_nodes = [node.name
10765
                   for node in self.cfg.GetAllNodesInfo().values()
10766
                   if node.group == self.group_uuid]
10767

    
10768
    if group_nodes:
10769
      raise errors.OpPrereqError("Group '%s' not empty, has the following"
10770
                                 " nodes: %s" %
10771
                                 (self.op.group_name,
10772
                                  utils.CommaJoin(utils.NiceSort(group_nodes))),
10773
                                 errors.ECODE_STATE)
10774

    
10775
    # Verify the cluster would not be left group-less.
10776
    if len(self.cfg.GetNodeGroupList()) == 1:
10777
      raise errors.OpPrereqError("Group '%s' is the only group,"
10778
                                 " cannot be removed" %
10779
                                 self.op.group_name,
10780
                                 errors.ECODE_STATE)
10781

    
10782
  def BuildHooksEnv(self):
10783
    """Build hooks env.
10784

10785
    """
10786
    env = {
10787
      "GROUP_NAME": self.op.group_name,
10788
      }
10789
    mn = self.cfg.GetMasterNode()
10790
    return env, [mn], [mn]
10791

    
10792
  def Exec(self, feedback_fn):
10793
    """Remove the node group.
10794

10795
    """
10796
    try:
10797
      self.cfg.RemoveNodeGroup(self.group_uuid)
10798
    except errors.ConfigurationError:
10799
      raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
10800
                               (self.op.group_name, self.group_uuid))
10801

    
10802
    self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10803

    
10804

    
10805
class LUGroupRename(LogicalUnit):
10806
  HPATH = "group-rename"
10807
  HTYPE = constants.HTYPE_GROUP
10808
  REQ_BGL = False
10809

    
10810
  def ExpandNames(self):
10811
    # This raises errors.OpPrereqError on its own:
10812
    self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10813

    
10814
    self.needed_locks = {
10815
      locking.LEVEL_NODEGROUP: [self.group_uuid],
10816
      }
10817

    
10818
  def CheckPrereq(self):
10819
    """Check prerequisites.
10820

10821
    Ensures requested new name is not yet used.
10822

10823
    """
10824
    try:
10825
      new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
10826
    except errors.OpPrereqError:
10827
      pass
10828
    else:
10829
      raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
10830
                                 " node group (UUID: %s)" %
10831
                                 (self.op.new_name, new_name_uuid),
10832
                                 errors.ECODE_EXISTS)
10833

    
10834
  def BuildHooksEnv(self):
10835
    """Build hooks env.
10836

10837
    """
10838
    env = {
10839
      "OLD_NAME": self.op.group_name,
10840
      "NEW_NAME": self.op.new_name,
10841
      }
10842

    
10843
    mn = self.cfg.GetMasterNode()
10844
    all_nodes = self.cfg.GetAllNodesInfo()
10845
    run_nodes = [mn]
10846
    all_nodes.pop(mn, None)
10847

    
10848
    for node in all_nodes.values():
10849
      if node.group == self.group_uuid:
10850
        run_nodes.append(node.name)
10851

    
10852
    return env, run_nodes, run_nodes
10853

    
10854
  def Exec(self, feedback_fn):
10855
    """Rename the node group.
10856

10857
    """
10858
    group = self.cfg.GetNodeGroup(self.group_uuid)
10859

    
10860
    if group is None:
10861
      raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10862
                               (self.op.group_name, self.group_uuid))
10863

    
10864
    group.name = self.op.new_name
10865
    self.cfg.Update(group, feedback_fn)
10866

    
10867
    return self.op.new_name
10868

    
10869

    
10870
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10871
  """Generic tags LU.
10872

10873
  This is an abstract class which is the parent of all the other tags LUs.
10874

10875
  """
10876

    
10877
  def ExpandNames(self):
10878
    self.needed_locks = {}
10879
    if self.op.kind == constants.TAG_NODE:
10880
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10881
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
10882
    elif self.op.kind == constants.TAG_INSTANCE:
10883
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10884
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10885

    
10886
    # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10887
    # not possible to acquire the BGL based on opcode parameters)
10888

    
10889
  def CheckPrereq(self):
10890
    """Check prerequisites.
10891

10892
    """
10893
    if self.op.kind == constants.TAG_CLUSTER:
10894
      self.target = self.cfg.GetClusterInfo()
10895
    elif self.op.kind == constants.TAG_NODE:
10896
      self.target = self.cfg.GetNodeInfo(self.op.name)
10897
    elif self.op.kind == constants.TAG_INSTANCE:
10898
      self.target = self.cfg.GetInstanceInfo(self.op.name)
10899
    else:
10900
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10901
                                 str(self.op.kind), errors.ECODE_INVAL)
10902

    
10903

    
10904
class LUTagsGet(TagsLU):
10905
  """Returns the tags of a given object.
10906

10907
  """
10908
  REQ_BGL = False
10909

    
10910
  def ExpandNames(self):
10911
    TagsLU.ExpandNames(self)
10912

    
10913
    # Share locks as this is only a read operation
10914
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10915

    
10916
  def Exec(self, feedback_fn):
10917
    """Returns the tag list.
10918

10919
    """
10920
    return list(self.target.GetTags())
10921

    
10922

    
10923
class LUTagsSearch(NoHooksLU):
10924
  """Searches the tags for a given pattern.
10925

10926
  """
10927
  REQ_BGL = False
10928

    
10929
  def ExpandNames(self):
10930
    self.needed_locks = {}
10931

    
10932
  def CheckPrereq(self):
10933
    """Check prerequisites.
10934

10935
    This checks the pattern passed for validity by compiling it.
10936

10937
    """
10938
    try:
10939
      self.re = re.compile(self.op.pattern)
10940
    except re.error, err:
10941
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10942
                                 (self.op.pattern, err), errors.ECODE_INVAL)
10943

    
10944
  def Exec(self, feedback_fn):
10945
    """Returns the tag list.
10946

10947
    """
10948
    cfg = self.cfg
10949
    tgts = [("/cluster", cfg.GetClusterInfo())]
10950
    ilist = cfg.GetAllInstancesInfo().values()
10951
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10952
    nlist = cfg.GetAllNodesInfo().values()
10953
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10954
    results = []
10955
    for path, target in tgts:
10956
      for tag in target.GetTags():
10957
        if self.re.search(tag):
10958
          results.append((path, tag))
10959
    return results
10960

    
10961

    
10962
class LUTagsSet(TagsLU):
10963
  """Sets a tag on a given object.
10964

10965
  """
10966
  REQ_BGL = False
10967

    
10968
  def CheckPrereq(self):
10969
    """Check prerequisites.
10970

10971
    This checks the type and length of the tag name and value.
10972

10973
    """
10974
    TagsLU.CheckPrereq(self)
10975
    for tag in self.op.tags:
10976
      objects.TaggableObject.ValidateTag(tag)
10977

    
10978
  def Exec(self, feedback_fn):
10979
    """Sets the tag.
10980

10981
    """
10982
    try:
10983
      for tag in self.op.tags:
10984
        self.target.AddTag(tag)
10985
    except errors.TagError, err:
10986
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
10987
    self.cfg.Update(self.target, feedback_fn)
10988

    
10989

    
10990
class LUTagsDel(TagsLU):
10991
  """Delete a list of tags from a given object.
10992

10993
  """
10994
  REQ_BGL = False
10995

    
10996
  def CheckPrereq(self):
10997
    """Check prerequisites.
10998

10999
    This checks that we have the given tag.
11000

11001
    """
11002
    TagsLU.CheckPrereq(self)
11003
    for tag in self.op.tags:
11004
      objects.TaggableObject.ValidateTag(tag)
11005
    del_tags = frozenset(self.op.tags)
11006
    cur_tags = self.target.GetTags()
11007

    
11008
    diff_tags = del_tags - cur_tags
11009
    if diff_tags:
11010
      diff_names = ("'%s'" % i for i in sorted(diff_tags))
11011
      raise errors.OpPrereqError("Tag(s) %s not found" %
11012
                                 (utils.CommaJoin(diff_names), ),
11013
                                 errors.ECODE_NOENT)
11014

    
11015
  def Exec(self, feedback_fn):
11016
    """Remove the tag from the object.
11017

11018
    """
11019
    for tag in self.op.tags:
11020
      self.target.RemoveTag(tag)
11021
    self.cfg.Update(self.target, feedback_fn)
11022

    
11023

    
11024
class LUTestDelay(NoHooksLU):
11025
  """Sleep for a specified amount of time.
11026

11027
  This LU sleeps on the master and/or nodes for a specified amount of
11028
  time.
11029

11030
  """
11031
  REQ_BGL = False
11032

    
11033
  def ExpandNames(self):
11034
    """Expand names and set required locks.
11035

11036
    This expands the node list, if any.
11037

11038
    """
11039
    self.needed_locks = {}
11040
    if self.op.on_nodes:
11041
      # _GetWantedNodes can be used here, but is not always appropriate to use
11042
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11043
      # more information.
11044
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11045
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11046

    
11047
  def _TestDelay(self):
11048
    """Do the actual sleep.
11049

11050
    """
11051
    if self.op.on_master:
11052
      if not utils.TestDelay(self.op.duration):
11053
        raise errors.OpExecError("Error during master delay test")
11054
    if self.op.on_nodes:
11055
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11056
      for node, node_result in result.items():
11057
        node_result.Raise("Failure during rpc call to node %s" % node)
11058

    
11059
  def Exec(self, feedback_fn):
11060
    """Execute the test delay opcode, with the wanted repetitions.
11061

11062
    """
11063
    if self.op.repeat == 0:
11064
      self._TestDelay()
11065
    else:
11066
      top_value = self.op.repeat - 1
11067
      for i in range(self.op.repeat):
11068
        self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11069
        self._TestDelay()
11070

    
11071

    
11072
class LUTestJqueue(NoHooksLU):
11073
  """Utility LU to test some aspects of the job queue.
11074

11075
  """
11076
  REQ_BGL = False
11077

    
11078
  # Must be lower than default timeout for WaitForJobChange to see whether it
11079
  # notices changed jobs
11080
  _CLIENT_CONNECT_TIMEOUT = 20.0
11081
  _CLIENT_CONFIRM_TIMEOUT = 60.0
11082

    
11083
  @classmethod
11084
  def _NotifyUsingSocket(cls, cb, errcls):
11085
    """Opens a Unix socket and waits for another program to connect.
11086

11087
    @type cb: callable
11088
    @param cb: Callback to send socket name to client
11089
    @type errcls: class
11090
    @param errcls: Exception class to use for errors
11091

11092
    """
11093
    # Using a temporary directory as there's no easy way to create temporary
11094
    # sockets without writing a custom loop around tempfile.mktemp and
11095
    # socket.bind
11096
    tmpdir = tempfile.mkdtemp()
11097
    try:
11098
      tmpsock = utils.PathJoin(tmpdir, "sock")
11099

    
11100
      logging.debug("Creating temporary socket at %s", tmpsock)
11101
      sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11102
      try:
11103
        sock.bind(tmpsock)
11104
        sock.listen(1)
11105

    
11106
        # Send details to client
11107
        cb(tmpsock)
11108

    
11109
        # Wait for client to connect before continuing
11110
        sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11111
        try:
11112
          (conn, _) = sock.accept()
11113
        except socket.error, err:
11114
          raise errcls("Client didn't connect in time (%s)" % err)
11115
      finally:
11116
        sock.close()
11117
    finally:
11118
      # Remove as soon as client is connected
11119
      shutil.rmtree(tmpdir)
11120

    
11121
    # Wait for client to close
11122
    try:
11123
      try:
11124
        # pylint: disable-msg=E1101
11125
        # Instance of '_socketobject' has no ... member
11126
        conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11127
        conn.recv(1)
11128
      except socket.error, err:
11129
        raise errcls("Client failed to confirm notification (%s)" % err)
11130
    finally:
11131
      conn.close()
11132

    
11133
  def _SendNotification(self, test, arg, sockname):
11134
    """Sends a notification to the client.
11135

11136
    @type test: string
11137
    @param test: Test name
11138
    @param arg: Test argument (depends on test)
11139
    @type sockname: string
11140
    @param sockname: Socket path
11141

11142
    """
11143
    self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11144

    
11145
  def _Notify(self, prereq, test, arg):
11146
    """Notifies the client of a test.
11147

11148
    @type prereq: bool
11149
    @param prereq: Whether this is a prereq-phase test
11150
    @type test: string
11151
    @param test: Test name
11152
    @param arg: Test argument (depends on test)
11153

11154
    """
11155
    if prereq:
11156
      errcls = errors.OpPrereqError
11157
    else:
11158
      errcls = errors.OpExecError
11159

    
11160
    return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11161
                                                  test, arg),
11162
                                   errcls)
11163

    
11164
  def CheckArguments(self):
11165
    self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11166
    self.expandnames_calls = 0
11167

    
11168
  def ExpandNames(self):
11169
    checkargs_calls = getattr(self, "checkargs_calls", 0)
11170
    if checkargs_calls < 1:
11171
      raise errors.ProgrammerError("CheckArguments was not called")
11172

    
11173
    self.expandnames_calls += 1
11174

    
11175
    if self.op.notify_waitlock:
11176
      self._Notify(True, constants.JQT_EXPANDNAMES, None)
11177

    
11178
    self.LogInfo("Expanding names")
11179

    
11180
    # Get lock on master node (just to get a lock, not for a particular reason)
11181
    self.needed_locks = {
11182
      locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11183
      }
11184

    
11185
  def Exec(self, feedback_fn):
11186
    if self.expandnames_calls < 1:
11187
      raise errors.ProgrammerError("ExpandNames was not called")
11188

    
11189
    if self.op.notify_exec:
11190
      self._Notify(False, constants.JQT_EXEC, None)
11191

    
11192
    self.LogInfo("Executing")
11193

    
11194
    if self.op.log_messages:
11195
      self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11196
      for idx, msg in enumerate(self.op.log_messages):
11197
        self.LogInfo("Sending log message %s", idx + 1)
11198
        feedback_fn(constants.JQT_MSGPREFIX + msg)
11199
        # Report how many test messages have been sent
11200
        self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11201

    
11202
    if self.op.fail:
11203
      raise errors.OpExecError("Opcode failure was requested")
11204

    
11205
    return True
11206

    
11207

    
11208
class IAllocator(object):
11209
  """IAllocator framework.
11210

11211
  An IAllocator instance has three sets of attributes:
11212
    - cfg that is needed to query the cluster
11213
    - input data (all members of the _KEYS class attribute are required)
11214
    - four buffer attributes (in|out_data|text), that represent the
11215
      input (to the external script) in text and data structure format,
11216
      and the output from it, again in two formats
11217
    - the result variables from the script (success, info, nodes) for
11218
      easy usage
11219

11220
  """
11221
  # pylint: disable-msg=R0902
11222
  # lots of instance attributes
11223
  _ALLO_KEYS = [
11224
    "name", "mem_size", "disks", "disk_template",
11225
    "os", "tags", "nics", "vcpus", "hypervisor",
11226
    ]
11227
  _RELO_KEYS = [
11228
    "name", "relocate_from",
11229
    ]
11230
  _EVAC_KEYS = [
11231
    "evac_nodes",
11232
    ]
11233

    
11234
  def __init__(self, cfg, rpc, mode, **kwargs):
11235
    self.cfg = cfg
11236
    self.rpc = rpc
11237
    # init buffer variables
11238
    self.in_text = self.out_text = self.in_data = self.out_data = None
11239
    # init all input fields so that pylint is happy
11240
    self.mode = mode
11241
    self.mem_size = self.disks = self.disk_template = None
11242
    self.os = self.tags = self.nics = self.vcpus = None
11243
    self.hypervisor = None
11244
    self.relocate_from = None
11245
    self.name = None
11246
    self.evac_nodes = None
11247
    # computed fields
11248
    self.required_nodes = None
11249
    # init result fields
11250
    self.success = self.info = self.result = None
11251
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11252
      keyset = self._ALLO_KEYS
11253
      fn = self._AddNewInstance
11254
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11255
      keyset = self._RELO_KEYS
11256
      fn = self._AddRelocateInstance
11257
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11258
      keyset = self._EVAC_KEYS
11259
      fn = self._AddEvacuateNodes
11260
    else:
11261
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11262
                                   " IAllocator" % self.mode)
11263
    for key in kwargs:
11264
      if key not in keyset:
11265
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
11266
                                     " IAllocator" % key)
11267
      setattr(self, key, kwargs[key])
11268

    
11269
    for key in keyset:
11270
      if key not in kwargs:
11271
        raise errors.ProgrammerError("Missing input parameter '%s' to"
11272
                                     " IAllocator" % key)
11273
    self._BuildInputData(fn)
11274

    
11275
  def _ComputeClusterData(self):
11276
    """Compute the generic allocator input data.
11277

11278
    This is the data that is independent of the actual operation.
11279

11280
    """
11281
    cfg = self.cfg
11282
    cluster_info = cfg.GetClusterInfo()
11283
    # cluster data
11284
    data = {
11285
      "version": constants.IALLOCATOR_VERSION,
11286
      "cluster_name": cfg.GetClusterName(),
11287
      "cluster_tags": list(cluster_info.GetTags()),
11288
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11289
      # we don't have job IDs
11290
      }
11291
    ninfo = cfg.GetAllNodesInfo()
11292
    iinfo = cfg.GetAllInstancesInfo().values()
11293
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11294

    
11295
    # node data
11296
    node_list = [n.name for n in ninfo.values() if n.vm_capable]
11297

    
11298
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11299
      hypervisor_name = self.hypervisor
11300
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11301
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11302
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11303
      hypervisor_name = cluster_info.enabled_hypervisors[0]
11304

    
11305
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11306
                                        hypervisor_name)
11307
    node_iinfo = \
11308
      self.rpc.call_all_instances_info(node_list,
11309
                                       cluster_info.enabled_hypervisors)
11310

    
11311
    data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11312

    
11313
    config_ndata = self._ComputeBasicNodeData(ninfo)
11314
    data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11315
                                                 i_list, config_ndata)
11316
    assert len(data["nodes"]) == len(ninfo), \
11317
        "Incomplete node data computed"
11318

    
11319
    data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11320

    
11321
    self.in_data = data
11322

    
11323
  @staticmethod
11324
  def _ComputeNodeGroupData(cfg):
11325
    """Compute node groups data.
11326

11327
    """
11328
    ng = {}
11329
    for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11330
      ng[guuid] = {
11331
        "name": gdata.name,
11332
        "alloc_policy": gdata.alloc_policy,
11333
        }
11334
    return ng
11335

    
11336
  @staticmethod
11337
  def _ComputeBasicNodeData(node_cfg):
11338
    """Compute global node data.
11339

11340
    @rtype: dict
11341
    @returns: a dict of name: (node dict, node config)
11342

11343
    """
11344
    node_results = {}
11345
    for ninfo in node_cfg.values():
11346
      # fill in static (config-based) values
11347
      pnr = {
11348
        "tags": list(ninfo.GetTags()),
11349
        "primary_ip": ninfo.primary_ip,
11350
        "secondary_ip": ninfo.secondary_ip,
11351
        "offline": ninfo.offline,
11352
        "drained": ninfo.drained,
11353
        "master_candidate": ninfo.master_candidate,
11354
        "group": ninfo.group,
11355
        "master_capable": ninfo.master_capable,
11356
        "vm_capable": ninfo.vm_capable,
11357
        }
11358

    
11359
      node_results[ninfo.name] = pnr
11360

    
11361
    return node_results
11362

    
11363
  @staticmethod
11364
  def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11365
                              node_results):
11366
    """Compute global node data.
11367

11368
    @param node_results: the basic node structures as filled from the config
11369

11370
    """
11371
    # make a copy of the current dict
11372
    node_results = dict(node_results)
11373
    for nname, nresult in node_data.items():
11374
      assert nname in node_results, "Missing basic data for node %s" % nname
11375
      ninfo = node_cfg[nname]
11376

    
11377
      if not (ninfo.offline or ninfo.drained):
11378
        nresult.Raise("Can't get data for node %s" % nname)
11379
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11380
                                nname)
11381
        remote_info = nresult.payload
11382

    
11383
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
11384
                     'vg_size', 'vg_free', 'cpu_total']:
11385
          if attr not in remote_info:
11386
            raise errors.OpExecError("Node '%s' didn't return attribute"
11387
                                     " '%s'" % (nname, attr))
11388
          if not isinstance(remote_info[attr], int):
11389
            raise errors.OpExecError("Node '%s' returned invalid value"
11390
                                     " for '%s': %s" %
11391
                                     (nname, attr, remote_info[attr]))
11392
        # compute memory used by primary instances
11393
        i_p_mem = i_p_up_mem = 0
11394
        for iinfo, beinfo in i_list:
11395
          if iinfo.primary_node == nname:
11396
            i_p_mem += beinfo[constants.BE_MEMORY]
11397
            if iinfo.name not in node_iinfo[nname].payload:
11398
              i_used_mem = 0
11399
            else:
11400
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11401
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11402
            remote_info['memory_free'] -= max(0, i_mem_diff)
11403

    
11404
            if iinfo.admin_up:
11405
              i_p_up_mem += beinfo[constants.BE_MEMORY]
11406

    
11407
        # compute memory used by instances
11408
        pnr_dyn = {
11409
          "total_memory": remote_info['memory_total'],
11410
          "reserved_memory": remote_info['memory_dom0'],
11411
          "free_memory": remote_info['memory_free'],
11412
          "total_disk": remote_info['vg_size'],
11413
          "free_disk": remote_info['vg_free'],
11414
          "total_cpus": remote_info['cpu_total'],
11415
          "i_pri_memory": i_p_mem,
11416
          "i_pri_up_memory": i_p_up_mem,
11417
          }
11418
        pnr_dyn.update(node_results[nname])
11419
        node_results[nname] = pnr_dyn
11420

    
11421
    return node_results
11422

    
11423
  @staticmethod
11424
  def _ComputeInstanceData(cluster_info, i_list):
11425
    """Compute global instance data.
11426

11427
    """
11428
    instance_data = {}
11429
    for iinfo, beinfo in i_list:
11430
      nic_data = []
11431
      for nic in iinfo.nics:
11432
        filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11433
        nic_dict = {"mac": nic.mac,
11434
                    "ip": nic.ip,
11435
                    "mode": filled_params[constants.NIC_MODE],
11436
                    "link": filled_params[constants.NIC_LINK],
11437
                   }
11438
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11439
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11440
        nic_data.append(nic_dict)
11441
      pir = {
11442
        "tags": list(iinfo.GetTags()),
11443
        "admin_up": iinfo.admin_up,
11444
        "vcpus": beinfo[constants.BE_VCPUS],
11445
        "memory": beinfo[constants.BE_MEMORY],
11446
        "os": iinfo.os,
11447
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11448
        "nics": nic_data,
11449
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
11450
        "disk_template": iinfo.disk_template,
11451
        "hypervisor": iinfo.hypervisor,
11452
        }
11453
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11454
                                                 pir["disks"])
11455
      instance_data[iinfo.name] = pir
11456

    
11457
    return instance_data
11458

    
11459
  def _AddNewInstance(self):
11460
    """Add new instance data to allocator structure.
11461

11462
    This in combination with _AllocatorGetClusterData will create the
11463
    correct structure needed as input for the allocator.
11464

11465
    The checks for the completeness of the opcode must have already been
11466
    done.
11467

11468
    """
11469
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11470

    
11471
    if self.disk_template in constants.DTS_INT_MIRROR:
11472
      self.required_nodes = 2
11473
    else:
11474
      self.required_nodes = 1
11475
    request = {
11476
      "name": self.name,
11477
      "disk_template": self.disk_template,
11478
      "tags": self.tags,
11479
      "os": self.os,
11480
      "vcpus": self.vcpus,
11481
      "memory": self.mem_size,
11482
      "disks": self.disks,
11483
      "disk_space_total": disk_space,
11484
      "nics": self.nics,
11485
      "required_nodes": self.required_nodes,
11486
      }
11487
    return request
11488

    
11489
  def _AddRelocateInstance(self):
11490
    """Add relocate instance data to allocator structure.
11491

11492
    This in combination with _IAllocatorGetClusterData will create the
11493
    correct structure needed as input for the allocator.
11494

11495
    The checks for the completeness of the opcode must have already been
11496
    done.
11497

11498
    """
11499
    instance = self.cfg.GetInstanceInfo(self.name)
11500
    if instance is None:
11501
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
11502
                                   " IAllocator" % self.name)
11503

    
11504
    if instance.disk_template not in constants.DTS_MIRRORED:
11505
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11506
                                 errors.ECODE_INVAL)
11507

    
11508
    if instance.disk_template in constants.DTS_INT_MIRROR and \
11509
        len(instance.secondary_nodes) != 1:
11510
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
11511
                                 errors.ECODE_STATE)
11512

    
11513
    self.required_nodes = 1
11514
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
11515
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11516

    
11517
    request = {
11518
      "name": self.name,
11519
      "disk_space_total": disk_space,
11520
      "required_nodes": self.required_nodes,
11521
      "relocate_from": self.relocate_from,
11522
      }
11523
    return request
11524

    
11525
  def _AddEvacuateNodes(self):
11526
    """Add evacuate nodes data to allocator structure.
11527

11528
    """
11529
    request = {
11530
      "evac_nodes": self.evac_nodes
11531
      }
11532
    return request
11533

    
11534
  def _BuildInputData(self, fn):
11535
    """Build input data structures.
11536

11537
    """
11538
    self._ComputeClusterData()
11539

    
11540
    request = fn()
11541
    request["type"] = self.mode
11542
    self.in_data["request"] = request
11543

    
11544
    self.in_text = serializer.Dump(self.in_data)
11545

    
11546
  def Run(self, name, validate=True, call_fn=None):
11547
    """Run an instance allocator and return the results.
11548

11549
    """
11550
    if call_fn is None:
11551
      call_fn = self.rpc.call_iallocator_runner
11552

    
11553
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11554
    result.Raise("Failure while running the iallocator script")
11555

    
11556
    self.out_text = result.payload
11557
    if validate:
11558
      self._ValidateResult()
11559

    
11560
  def _ValidateResult(self):
11561
    """Process the allocator results.
11562

11563
    This will process and if successful save the result in
11564
    self.out_data and the other parameters.
11565

11566
    """
11567
    try:
11568
      rdict = serializer.Load(self.out_text)
11569
    except Exception, err:
11570
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11571

    
11572
    if not isinstance(rdict, dict):
11573
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
11574

    
11575
    # TODO: remove backwards compatiblity in later versions
11576
    if "nodes" in rdict and "result" not in rdict:
11577
      rdict["result"] = rdict["nodes"]
11578
      del rdict["nodes"]
11579

    
11580
    for key in "success", "info", "result":
11581
      if key not in rdict:
11582
        raise errors.OpExecError("Can't parse iallocator results:"
11583
                                 " missing key '%s'" % key)
11584
      setattr(self, key, rdict[key])
11585

    
11586
    if not isinstance(rdict["result"], list):
11587
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11588
                               " is not a list")
11589
    self.out_data = rdict
11590

    
11591

    
11592
class LUTestAllocator(NoHooksLU):
11593
  """Run allocator tests.
11594

11595
  This LU runs the allocator tests
11596

11597
  """
11598
  def CheckPrereq(self):
11599
    """Check prerequisites.
11600

11601
    This checks the opcode parameters depending on the director and mode test.
11602

11603
    """
11604
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11605
      for attr in ["mem_size", "disks", "disk_template",
11606
                   "os", "tags", "nics", "vcpus"]:
11607
        if not hasattr(self.op, attr):
11608
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
11609
                                     attr, errors.ECODE_INVAL)
11610
      iname = self.cfg.ExpandInstanceName(self.op.name)
11611
      if iname is not None:
11612
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
11613
                                   iname, errors.ECODE_EXISTS)
11614
      if not isinstance(self.op.nics, list):
11615
        raise errors.OpPrereqError("Invalid parameter 'nics'",
11616
                                   errors.ECODE_INVAL)
11617
      if not isinstance(self.op.disks, list):
11618
        raise errors.OpPrereqError("Invalid parameter 'disks'",
11619
                                   errors.ECODE_INVAL)
11620
      for row in self.op.disks:
11621
        if (not isinstance(row, dict) or
11622
            "size" not in row or
11623
            not isinstance(row["size"], int) or
11624
            "mode" not in row or
11625
            row["mode"] not in ['r', 'w']):
11626
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
11627
                                     " parameter", errors.ECODE_INVAL)
11628
      if self.op.hypervisor is None:
11629
        self.op.hypervisor = self.cfg.GetHypervisorType()
11630
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11631
      fname = _ExpandInstanceName(self.cfg, self.op.name)
11632
      self.op.name = fname
11633
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
11634
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11635
      if not hasattr(self.op, "evac_nodes"):
11636
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
11637
                                   " opcode input", errors.ECODE_INVAL)
11638
    else:
11639
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
11640
                                 self.op.mode, errors.ECODE_INVAL)
11641

    
11642
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
11643
      if self.op.allocator is None:
11644
        raise errors.OpPrereqError("Missing allocator name",
11645
                                   errors.ECODE_INVAL)
11646
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
11647
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
11648
                                 self.op.direction, errors.ECODE_INVAL)
11649

    
11650
  def Exec(self, feedback_fn):
11651
    """Run the allocator test.
11652

11653
    """
11654
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
11655
      ial = IAllocator(self.cfg, self.rpc,
11656
                       mode=self.op.mode,
11657
                       name=self.op.name,
11658
                       mem_size=self.op.mem_size,
11659
                       disks=self.op.disks,
11660
                       disk_template=self.op.disk_template,
11661
                       os=self.op.os,
11662
                       tags=self.op.tags,
11663
                       nics=self.op.nics,
11664
                       vcpus=self.op.vcpus,
11665
                       hypervisor=self.op.hypervisor,
11666
                       )
11667
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
11668
      ial = IAllocator(self.cfg, self.rpc,
11669
                       mode=self.op.mode,
11670
                       name=self.op.name,
11671
                       relocate_from=list(self.relocate_from),
11672
                       )
11673
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
11674
      ial = IAllocator(self.cfg, self.rpc,
11675
                       mode=self.op.mode,
11676
                       evac_nodes=self.op.evac_nodes)
11677
    else:
11678
      raise errors.ProgrammerError("Uncatched mode %s in"
11679
                                   " LUTestAllocator.Exec", self.op.mode)
11680

    
11681
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
11682
      result = ial.in_text
11683
    else:
11684
      ial.Run(self.op.allocator, validate=False)
11685
      result = ial.out_text
11686
    return result
11687

    
11688

    
11689
#: Query type implementations
11690
_QUERY_IMPL = {
11691
  constants.QR_INSTANCE: _InstanceQuery,
11692
  constants.QR_NODE: _NodeQuery,
11693
  constants.QR_GROUP: _GroupQuery,
11694
  constants.QR_OS: _OsQuery,
11695
  }
11696

    
11697
assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
11698

    
11699

    
11700
def _GetQueryImplementation(name):
11701
  """Returns the implemtnation for a query type.
11702

11703
  @param name: Query type, must be one of L{constants.QR_VIA_OP}
11704

11705
  """
11706
  try:
11707
    return _QUERY_IMPL[name]
11708
  except KeyError:
11709
    raise errors.OpPrereqError("Unknown query resource '%s'" % name,
11710
                               errors.ECODE_INVAL)