Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 25a8792c

History | View | Annotate | Download (327.4 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36
import OpenSSL
37

    
38
from ganeti import ssh
39
from ganeti import utils
40
from ganeti import errors
41
from ganeti import hypervisor
42
from ganeti import locking
43
from ganeti import constants
44
from ganeti import objects
45
from ganeti import serializer
46
from ganeti import ssconf
47

    
48

    
49
class LogicalUnit(object):
50
  """Logical Unit base class.
51

52
  Subclasses must follow these rules:
53
    - implement ExpandNames
54
    - implement CheckPrereq (except when tasklets are used)
55
    - implement Exec (except when tasklets are used)
56
    - implement BuildHooksEnv
57
    - redefine HPATH and HTYPE
58
    - optionally redefine their run requirements:
59
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
60

61
  Note that all commands require root permissions.
62

63
  @ivar dry_run_result: the value (if any) that will be returned to the caller
64
      in dry-run mode (signalled by opcode dry_run parameter)
65

66
  """
67
  HPATH = None
68
  HTYPE = None
69
  _OP_REQP = []
70
  REQ_BGL = True
71

    
72
  def __init__(self, processor, op, context, rpc):
73
    """Constructor for LogicalUnit.
74

75
    This needs to be overridden in derived classes in order to check op
76
    validity.
77

78
    """
79
    self.proc = processor
80
    self.op = op
81
    self.cfg = context.cfg
82
    self.context = context
83
    self.rpc = rpc
84
    # Dicts used to declare locking needs to mcpu
85
    self.needed_locks = None
86
    self.acquired_locks = {}
87
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
88
    self.add_locks = {}
89
    self.remove_locks = {}
90
    # Used to force good behavior when calling helper functions
91
    self.recalculate_locks = {}
92
    self.__ssh = None
93
    # logging
94
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
95
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
96
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
97
    # support for dry-run
98
    self.dry_run_result = None
99
    # support for generic debug attribute
100
    if (not hasattr(self.op, "debug_level") or
101
        not isinstance(self.op.debug_level, int)):
102
      self.op.debug_level = 0
103

    
104
    # Tasklets
105
    self.tasklets = None
106

    
107
    for attr_name in self._OP_REQP:
108
      attr_val = getattr(op, attr_name, None)
109
      if attr_val is None:
110
        raise errors.OpPrereqError("Required parameter '%s' missing" %
111
                                   attr_name, errors.ECODE_INVAL)
112

    
113
    self.CheckArguments()
114

    
115
  def __GetSSH(self):
116
    """Returns the SshRunner object
117

118
    """
119
    if not self.__ssh:
120
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
121
    return self.__ssh
122

    
123
  ssh = property(fget=__GetSSH)
124

    
125
  def CheckArguments(self):
126
    """Check syntactic validity for the opcode arguments.
127

128
    This method is for doing a simple syntactic check and ensure
129
    validity of opcode parameters, without any cluster-related
130
    checks. While the same can be accomplished in ExpandNames and/or
131
    CheckPrereq, doing these separate is better because:
132

133
      - ExpandNames is left as as purely a lock-related function
134
      - CheckPrereq is run after we have acquired locks (and possible
135
        waited for them)
136

137
    The function is allowed to change the self.op attribute so that
138
    later methods can no longer worry about missing parameters.
139

140
    """
141
    pass
142

    
143
  def ExpandNames(self):
144
    """Expand names for this LU.
145

146
    This method is called before starting to execute the opcode, and it should
147
    update all the parameters of the opcode to their canonical form (e.g. a
148
    short node name must be fully expanded after this method has successfully
149
    completed). This way locking, hooks, logging, ecc. can work correctly.
150

151
    LUs which implement this method must also populate the self.needed_locks
152
    member, as a dict with lock levels as keys, and a list of needed lock names
153
    as values. Rules:
154

155
      - use an empty dict if you don't need any lock
156
      - if you don't need any lock at a particular level omit that level
157
      - don't put anything for the BGL level
158
      - if you want all locks at a level use locking.ALL_SET as a value
159

160
    If you need to share locks (rather than acquire them exclusively) at one
161
    level you can modify self.share_locks, setting a true value (usually 1) for
162
    that level. By default locks are not shared.
163

164
    This function can also define a list of tasklets, which then will be
165
    executed in order instead of the usual LU-level CheckPrereq and Exec
166
    functions, if those are not defined by the LU.
167

168
    Examples::
169

170
      # Acquire all nodes and one instance
171
      self.needed_locks = {
172
        locking.LEVEL_NODE: locking.ALL_SET,
173
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
174
      }
175
      # Acquire just two nodes
176
      self.needed_locks = {
177
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
178
      }
179
      # Acquire no locks
180
      self.needed_locks = {} # No, you can't leave it to the default value None
181

182
    """
183
    # The implementation of this method is mandatory only if the new LU is
184
    # concurrent, so that old LUs don't need to be changed all at the same
185
    # time.
186
    if self.REQ_BGL:
187
      self.needed_locks = {} # Exclusive LUs don't need locks.
188
    else:
189
      raise NotImplementedError
190

    
191
  def DeclareLocks(self, level):
192
    """Declare LU locking needs for a level
193

194
    While most LUs can just declare their locking needs at ExpandNames time,
195
    sometimes there's the need to calculate some locks after having acquired
196
    the ones before. This function is called just before acquiring locks at a
197
    particular level, but after acquiring the ones at lower levels, and permits
198
    such calculations. It can be used to modify self.needed_locks, and by
199
    default it does nothing.
200

201
    This function is only called if you have something already set in
202
    self.needed_locks for the level.
203

204
    @param level: Locking level which is going to be locked
205
    @type level: member of ganeti.locking.LEVELS
206

207
    """
208

    
209
  def CheckPrereq(self):
210
    """Check prerequisites for this LU.
211

212
    This method should check that the prerequisites for the execution
213
    of this LU are fulfilled. It can do internode communication, but
214
    it should be idempotent - no cluster or system changes are
215
    allowed.
216

217
    The method should raise errors.OpPrereqError in case something is
218
    not fulfilled. Its return value is ignored.
219

220
    This method should also update all the parameters of the opcode to
221
    their canonical form if it hasn't been done by ExpandNames before.
222

223
    """
224
    if self.tasklets is not None:
225
      for (idx, tl) in enumerate(self.tasklets):
226
        logging.debug("Checking prerequisites for tasklet %s/%s",
227
                      idx + 1, len(self.tasklets))
228
        tl.CheckPrereq()
229
    else:
230
      raise NotImplementedError
231

    
232
  def Exec(self, feedback_fn):
233
    """Execute the LU.
234

235
    This method should implement the actual work. It should raise
236
    errors.OpExecError for failures that are somewhat dealt with in
237
    code, or expected.
238

239
    """
240
    if self.tasklets is not None:
241
      for (idx, tl) in enumerate(self.tasklets):
242
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
243
        tl.Exec(feedback_fn)
244
    else:
245
      raise NotImplementedError
246

    
247
  def BuildHooksEnv(self):
248
    """Build hooks environment for this LU.
249

250
    This method should return a three-node tuple consisting of: a dict
251
    containing the environment that will be used for running the
252
    specific hook for this LU, a list of node names on which the hook
253
    should run before the execution, and a list of node names on which
254
    the hook should run after the execution.
255

256
    The keys of the dict must not have 'GANETI_' prefixed as this will
257
    be handled in the hooks runner. Also note additional keys will be
258
    added by the hooks runner. If the LU doesn't define any
259
    environment, an empty dict (and not None) should be returned.
260

261
    No nodes should be returned as an empty list (and not None).
262

263
    Note that if the HPATH for a LU class is None, this function will
264
    not be called.
265

266
    """
267
    raise NotImplementedError
268

    
269
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
270
    """Notify the LU about the results of its hooks.
271

272
    This method is called every time a hooks phase is executed, and notifies
273
    the Logical Unit about the hooks' result. The LU can then use it to alter
274
    its result based on the hooks.  By default the method does nothing and the
275
    previous result is passed back unchanged but any LU can define it if it
276
    wants to use the local cluster hook-scripts somehow.
277

278
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
279
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
280
    @param hook_results: the results of the multi-node hooks rpc call
281
    @param feedback_fn: function used send feedback back to the caller
282
    @param lu_result: the previous Exec result this LU had, or None
283
        in the PRE phase
284
    @return: the new Exec result, based on the previous result
285
        and hook results
286

287
    """
288
    # API must be kept, thus we ignore the unused argument and could
289
    # be a function warnings
290
    # pylint: disable-msg=W0613,R0201
291
    return lu_result
292

    
293
  def _ExpandAndLockInstance(self):
294
    """Helper function to expand and lock an instance.
295

296
    Many LUs that work on an instance take its name in self.op.instance_name
297
    and need to expand it and then declare the expanded name for locking. This
298
    function does it, and then updates self.op.instance_name to the expanded
299
    name. It also initializes needed_locks as a dict, if this hasn't been done
300
    before.
301

302
    """
303
    if self.needed_locks is None:
304
      self.needed_locks = {}
305
    else:
306
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
307
        "_ExpandAndLockInstance called with instance-level locks set"
308
    self.op.instance_name = _ExpandInstanceName(self.cfg,
309
                                                self.op.instance_name)
310
    self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
311

    
312
  def _LockInstancesNodes(self, primary_only=False):
313
    """Helper function to declare instances' nodes for locking.
314

315
    This function should be called after locking one or more instances to lock
316
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
317
    with all primary or secondary nodes for instances already locked and
318
    present in self.needed_locks[locking.LEVEL_INSTANCE].
319

320
    It should be called from DeclareLocks, and for safety only works if
321
    self.recalculate_locks[locking.LEVEL_NODE] is set.
322

323
    In the future it may grow parameters to just lock some instance's nodes, or
324
    to just lock primaries or secondary nodes, if needed.
325

326
    If should be called in DeclareLocks in a way similar to::
327

328
      if level == locking.LEVEL_NODE:
329
        self._LockInstancesNodes()
330

331
    @type primary_only: boolean
332
    @param primary_only: only lock primary nodes of locked instances
333

334
    """
335
    assert locking.LEVEL_NODE in self.recalculate_locks, \
336
      "_LockInstancesNodes helper function called with no nodes to recalculate"
337

    
338
    # TODO: check if we're really been called with the instance locks held
339

    
340
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
341
    # future we might want to have different behaviors depending on the value
342
    # of self.recalculate_locks[locking.LEVEL_NODE]
343
    wanted_nodes = []
344
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
345
      instance = self.context.cfg.GetInstanceInfo(instance_name)
346
      wanted_nodes.append(instance.primary_node)
347
      if not primary_only:
348
        wanted_nodes.extend(instance.secondary_nodes)
349

    
350
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
351
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
352
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
353
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
354

    
355
    del self.recalculate_locks[locking.LEVEL_NODE]
356

    
357

    
358
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
359
  """Simple LU which runs no hooks.
360

361
  This LU is intended as a parent for other LogicalUnits which will
362
  run no hooks, in order to reduce duplicate code.
363

364
  """
365
  HPATH = None
366
  HTYPE = None
367

    
368
  def BuildHooksEnv(self):
369
    """Empty BuildHooksEnv for NoHooksLu.
370

371
    This just raises an error.
372

373
    """
374
    assert False, "BuildHooksEnv called for NoHooksLUs"
375

    
376

    
377
class Tasklet:
378
  """Tasklet base class.
379

380
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
381
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
382
  tasklets know nothing about locks.
383

384
  Subclasses must follow these rules:
385
    - Implement CheckPrereq
386
    - Implement Exec
387

388
  """
389
  def __init__(self, lu):
390
    self.lu = lu
391

    
392
    # Shortcuts
393
    self.cfg = lu.cfg
394
    self.rpc = lu.rpc
395

    
396
  def CheckPrereq(self):
397
    """Check prerequisites for this tasklets.
398

399
    This method should check whether the prerequisites for the execution of
400
    this tasklet are fulfilled. It can do internode communication, but it
401
    should be idempotent - no cluster or system changes are allowed.
402

403
    The method should raise errors.OpPrereqError in case something is not
404
    fulfilled. Its return value is ignored.
405

406
    This method should also update all parameters to their canonical form if it
407
    hasn't been done before.
408

409
    """
410
    raise NotImplementedError
411

    
412
  def Exec(self, feedback_fn):
413
    """Execute the tasklet.
414

415
    This method should implement the actual work. It should raise
416
    errors.OpExecError for failures that are somewhat dealt with in code, or
417
    expected.
418

419
    """
420
    raise NotImplementedError
421

    
422

    
423
def _GetWantedNodes(lu, nodes):
424
  """Returns list of checked and expanded node names.
425

426
  @type lu: L{LogicalUnit}
427
  @param lu: the logical unit on whose behalf we execute
428
  @type nodes: list
429
  @param nodes: list of node names or None for all nodes
430
  @rtype: list
431
  @return: the list of nodes, sorted
432
  @raise errors.ProgrammerError: if the nodes parameter is wrong type
433

434
  """
435
  if not isinstance(nodes, list):
436
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
437
                               errors.ECODE_INVAL)
438

    
439
  if not nodes:
440
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
441
      " non-empty list of nodes whose name is to be expanded.")
442

    
443
  wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
444
  return utils.NiceSort(wanted)
445

    
446

    
447
def _GetWantedInstances(lu, instances):
448
  """Returns list of checked and expanded instance names.
449

450
  @type lu: L{LogicalUnit}
451
  @param lu: the logical unit on whose behalf we execute
452
  @type instances: list
453
  @param instances: list of instance names or None for all instances
454
  @rtype: list
455
  @return: the list of instances, sorted
456
  @raise errors.OpPrereqError: if the instances parameter is wrong type
457
  @raise errors.OpPrereqError: if any of the passed instances is not found
458

459
  """
460
  if not isinstance(instances, list):
461
    raise errors.OpPrereqError("Invalid argument type 'instances'",
462
                               errors.ECODE_INVAL)
463

    
464
  if instances:
465
    wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
466
  else:
467
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
468
  return wanted
469

    
470

    
471
def _CheckOutputFields(static, dynamic, selected):
472
  """Checks whether all selected fields are valid.
473

474
  @type static: L{utils.FieldSet}
475
  @param static: static fields set
476
  @type dynamic: L{utils.FieldSet}
477
  @param dynamic: dynamic fields set
478

479
  """
480
  f = utils.FieldSet()
481
  f.Extend(static)
482
  f.Extend(dynamic)
483

    
484
  delta = f.NonMatching(selected)
485
  if delta:
486
    raise errors.OpPrereqError("Unknown output fields selected: %s"
487
                               % ",".join(delta), errors.ECODE_INVAL)
488

    
489

    
490
def _CheckBooleanOpField(op, name):
491
  """Validates boolean opcode parameters.
492

493
  This will ensure that an opcode parameter is either a boolean value,
494
  or None (but that it always exists).
495

496
  """
497
  val = getattr(op, name, None)
498
  if not (val is None or isinstance(val, bool)):
499
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
500
                               (name, str(val)), errors.ECODE_INVAL)
501
  setattr(op, name, val)
502

    
503

    
504
def _CheckGlobalHvParams(params):
505
  """Validates that given hypervisor params are not global ones.
506

507
  This will ensure that instances don't get customised versions of
508
  global params.
509

510
  """
511
  used_globals = constants.HVC_GLOBALS.intersection(params)
512
  if used_globals:
513
    msg = ("The following hypervisor parameters are global and cannot"
514
           " be customized at instance level, please modify them at"
515
           " cluster level: %s" % utils.CommaJoin(used_globals))
516
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
517

    
518

    
519
def _CheckNodeOnline(lu, node):
520
  """Ensure that a given node is online.
521

522
  @param lu: the LU on behalf of which we make the check
523
  @param node: the node to check
524
  @raise errors.OpPrereqError: if the node is offline
525

526
  """
527
  if lu.cfg.GetNodeInfo(node).offline:
528
    raise errors.OpPrereqError("Can't use offline node %s" % node,
529
                               errors.ECODE_INVAL)
530

    
531

    
532
def _CheckNodeNotDrained(lu, node):
533
  """Ensure that a given node is not drained.
534

535
  @param lu: the LU on behalf of which we make the check
536
  @param node: the node to check
537
  @raise errors.OpPrereqError: if the node is drained
538

539
  """
540
  if lu.cfg.GetNodeInfo(node).drained:
541
    raise errors.OpPrereqError("Can't use drained node %s" % node,
542
                               errors.ECODE_INVAL)
543

    
544

    
545
def _CheckNodeHasOS(lu, node, os_name, force_variant):
546
  """Ensure that a node supports a given OS.
547

548
  @param lu: the LU on behalf of which we make the check
549
  @param node: the node to check
550
  @param os_name: the OS to query about
551
  @param force_variant: whether to ignore variant errors
552
  @raise errors.OpPrereqError: if the node is not supporting the OS
553

554
  """
555
  result = lu.rpc.call_os_get(node, os_name)
556
  result.Raise("OS '%s' not in supported OS list for node %s" %
557
               (os_name, node),
558
               prereq=True, ecode=errors.ECODE_INVAL)
559
  if not force_variant:
560
    _CheckOSVariant(result.payload, os_name)
561

    
562

    
563
def _CheckDiskTemplate(template):
564
  """Ensure a given disk template is valid.
565

566
  """
567
  if template not in constants.DISK_TEMPLATES:
568
    msg = ("Invalid disk template name '%s', valid templates are: %s" %
569
           (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
570
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
571

    
572

    
573
def _CheckInstanceDown(lu, instance, reason):
574
  """Ensure that an instance is not running."""
575
  if instance.admin_up:
576
    raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
577
                               (instance.name, reason), errors.ECODE_STATE)
578

    
579
  pnode = instance.primary_node
580
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
581
  ins_l.Raise("Can't contact node %s for instance information" % pnode,
582
              prereq=True, ecode=errors.ECODE_ENVIRON)
583

    
584
  if instance.name in ins_l.payload:
585
    raise errors.OpPrereqError("Instance %s is running, %s" %
586
                               (instance.name, reason), errors.ECODE_STATE)
587

    
588

    
589
def _ExpandItemName(fn, name, kind):
590
  """Expand an item name.
591

592
  @param fn: the function to use for expansion
593
  @param name: requested item name
594
  @param kind: text description ('Node' or 'Instance')
595
  @return: the resolved (full) name
596
  @raise errors.OpPrereqError: if the item is not found
597

598
  """
599
  full_name = fn(name)
600
  if full_name is None:
601
    raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
602
                               errors.ECODE_NOENT)
603
  return full_name
604

    
605

    
606
def _ExpandNodeName(cfg, name):
607
  """Wrapper over L{_ExpandItemName} for nodes."""
608
  return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
609

    
610

    
611
def _ExpandInstanceName(cfg, name):
612
  """Wrapper over L{_ExpandItemName} for instance."""
613
  return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
614

    
615

    
616
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
617
                          memory, vcpus, nics, disk_template, disks,
618
                          bep, hvp, hypervisor_name):
619
  """Builds instance related env variables for hooks
620

621
  This builds the hook environment from individual variables.
622

623
  @type name: string
624
  @param name: the name of the instance
625
  @type primary_node: string
626
  @param primary_node: the name of the instance's primary node
627
  @type secondary_nodes: list
628
  @param secondary_nodes: list of secondary nodes as strings
629
  @type os_type: string
630
  @param os_type: the name of the instance's OS
631
  @type status: boolean
632
  @param status: the should_run status of the instance
633
  @type memory: string
634
  @param memory: the memory size of the instance
635
  @type vcpus: string
636
  @param vcpus: the count of VCPUs the instance has
637
  @type nics: list
638
  @param nics: list of tuples (ip, mac, mode, link) representing
639
      the NICs the instance has
640
  @type disk_template: string
641
  @param disk_template: the disk template of the instance
642
  @type disks: list
643
  @param disks: the list of (size, mode) pairs
644
  @type bep: dict
645
  @param bep: the backend parameters for the instance
646
  @type hvp: dict
647
  @param hvp: the hypervisor parameters for the instance
648
  @type hypervisor_name: string
649
  @param hypervisor_name: the hypervisor for the instance
650
  @rtype: dict
651
  @return: the hook environment for this instance
652

653
  """
654
  if status:
655
    str_status = "up"
656
  else:
657
    str_status = "down"
658
  env = {
659
    "OP_TARGET": name,
660
    "INSTANCE_NAME": name,
661
    "INSTANCE_PRIMARY": primary_node,
662
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
663
    "INSTANCE_OS_TYPE": os_type,
664
    "INSTANCE_STATUS": str_status,
665
    "INSTANCE_MEMORY": memory,
666
    "INSTANCE_VCPUS": vcpus,
667
    "INSTANCE_DISK_TEMPLATE": disk_template,
668
    "INSTANCE_HYPERVISOR": hypervisor_name,
669
  }
670

    
671
  if nics:
672
    nic_count = len(nics)
673
    for idx, (ip, mac, mode, link) in enumerate(nics):
674
      if ip is None:
675
        ip = ""
676
      env["INSTANCE_NIC%d_IP" % idx] = ip
677
      env["INSTANCE_NIC%d_MAC" % idx] = mac
678
      env["INSTANCE_NIC%d_MODE" % idx] = mode
679
      env["INSTANCE_NIC%d_LINK" % idx] = link
680
      if mode == constants.NIC_MODE_BRIDGED:
681
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
682
  else:
683
    nic_count = 0
684

    
685
  env["INSTANCE_NIC_COUNT"] = nic_count
686

    
687
  if disks:
688
    disk_count = len(disks)
689
    for idx, (size, mode) in enumerate(disks):
690
      env["INSTANCE_DISK%d_SIZE" % idx] = size
691
      env["INSTANCE_DISK%d_MODE" % idx] = mode
692
  else:
693
    disk_count = 0
694

    
695
  env["INSTANCE_DISK_COUNT"] = disk_count
696

    
697
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
698
    for key, value in source.items():
699
      env["INSTANCE_%s_%s" % (kind, key)] = value
700

    
701
  return env
702

    
703

    
704
def _NICListToTuple(lu, nics):
705
  """Build a list of nic information tuples.
706

707
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
708
  value in LUQueryInstanceData.
709

710
  @type lu:  L{LogicalUnit}
711
  @param lu: the logical unit on whose behalf we execute
712
  @type nics: list of L{objects.NIC}
713
  @param nics: list of nics to convert to hooks tuples
714

715
  """
716
  hooks_nics = []
717
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
718
  for nic in nics:
719
    ip = nic.ip
720
    mac = nic.mac
721
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
722
    mode = filled_params[constants.NIC_MODE]
723
    link = filled_params[constants.NIC_LINK]
724
    hooks_nics.append((ip, mac, mode, link))
725
  return hooks_nics
726

    
727

    
728
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
729
  """Builds instance related env variables for hooks from an object.
730

731
  @type lu: L{LogicalUnit}
732
  @param lu: the logical unit on whose behalf we execute
733
  @type instance: L{objects.Instance}
734
  @param instance: the instance for which we should build the
735
      environment
736
  @type override: dict
737
  @param override: dictionary with key/values that will override
738
      our values
739
  @rtype: dict
740
  @return: the hook environment dictionary
741

742
  """
743
  cluster = lu.cfg.GetClusterInfo()
744
  bep = cluster.FillBE(instance)
745
  hvp = cluster.FillHV(instance)
746
  args = {
747
    'name': instance.name,
748
    'primary_node': instance.primary_node,
749
    'secondary_nodes': instance.secondary_nodes,
750
    'os_type': instance.os,
751
    'status': instance.admin_up,
752
    'memory': bep[constants.BE_MEMORY],
753
    'vcpus': bep[constants.BE_VCPUS],
754
    'nics': _NICListToTuple(lu, instance.nics),
755
    'disk_template': instance.disk_template,
756
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
757
    'bep': bep,
758
    'hvp': hvp,
759
    'hypervisor_name': instance.hypervisor,
760
  }
761
  if override:
762
    args.update(override)
763
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
764

    
765

    
766
def _AdjustCandidatePool(lu, exceptions):
767
  """Adjust the candidate pool after node operations.
768

769
  """
770
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
771
  if mod_list:
772
    lu.LogInfo("Promoted nodes to master candidate role: %s",
773
               utils.CommaJoin(node.name for node in mod_list))
774
    for name in mod_list:
775
      lu.context.ReaddNode(name)
776
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
777
  if mc_now > mc_max:
778
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
779
               (mc_now, mc_max))
780

    
781

    
782
def _DecideSelfPromotion(lu, exceptions=None):
783
  """Decide whether I should promote myself as a master candidate.
784

785
  """
786
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
787
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
788
  # the new node will increase mc_max with one, so:
789
  mc_should = min(mc_should + 1, cp_size)
790
  return mc_now < mc_should
791

    
792

    
793
def _CheckNicsBridgesExist(lu, target_nics, target_node,
794
                               profile=constants.PP_DEFAULT):
795
  """Check that the brigdes needed by a list of nics exist.
796

797
  """
798
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
799
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
800
                for nic in target_nics]
801
  brlist = [params[constants.NIC_LINK] for params in paramslist
802
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
803
  if brlist:
804
    result = lu.rpc.call_bridges_exist(target_node, brlist)
805
    result.Raise("Error checking bridges on destination node '%s'" %
806
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
807

    
808

    
809
def _CheckInstanceBridgesExist(lu, instance, node=None):
810
  """Check that the brigdes needed by an instance exist.
811

812
  """
813
  if node is None:
814
    node = instance.primary_node
815
  _CheckNicsBridgesExist(lu, instance.nics, node)
816

    
817

    
818
def _CheckOSVariant(os_obj, name):
819
  """Check whether an OS name conforms to the os variants specification.
820

821
  @type os_obj: L{objects.OS}
822
  @param os_obj: OS object to check
823
  @type name: string
824
  @param name: OS name passed by the user, to check for validity
825

826
  """
827
  if not os_obj.supported_variants:
828
    return
829
  try:
830
    variant = name.split("+", 1)[1]
831
  except IndexError:
832
    raise errors.OpPrereqError("OS name must include a variant",
833
                               errors.ECODE_INVAL)
834

    
835
  if variant not in os_obj.supported_variants:
836
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
837

    
838

    
839
def _GetNodeInstancesInner(cfg, fn):
840
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
841

    
842

    
843
def _GetNodeInstances(cfg, node_name):
844
  """Returns a list of all primary and secondary instances on a node.
845

846
  """
847

    
848
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
849

    
850

    
851
def _GetNodePrimaryInstances(cfg, node_name):
852
  """Returns primary instances on a node.
853

854
  """
855
  return _GetNodeInstancesInner(cfg,
856
                                lambda inst: node_name == inst.primary_node)
857

    
858

    
859
def _GetNodeSecondaryInstances(cfg, node_name):
860
  """Returns secondary instances on a node.
861

862
  """
863
  return _GetNodeInstancesInner(cfg,
864
                                lambda inst: node_name in inst.secondary_nodes)
865

    
866

    
867
def _GetStorageTypeArgs(cfg, storage_type):
868
  """Returns the arguments for a storage type.
869

870
  """
871
  # Special case for file storage
872
  if storage_type == constants.ST_FILE:
873
    # storage.FileStorage wants a list of storage directories
874
    return [[cfg.GetFileStorageDir()]]
875

    
876
  return []
877

    
878

    
879
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
880
  faulty = []
881

    
882
  for dev in instance.disks:
883
    cfg.SetDiskID(dev, node_name)
884

    
885
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
886
  result.Raise("Failed to get disk status from node %s" % node_name,
887
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
888

    
889
  for idx, bdev_status in enumerate(result.payload):
890
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
891
      faulty.append(idx)
892

    
893
  return faulty
894

    
895

    
896
def _FormatTimestamp(secs):
897
  """Formats a Unix timestamp with the local timezone.
898

899
  """
900
  return time.strftime("%F %T %Z", time.gmtime(secs))
901

    
902

    
903
class LUPostInitCluster(LogicalUnit):
904
  """Logical unit for running hooks after cluster initialization.
905

906
  """
907
  HPATH = "cluster-init"
908
  HTYPE = constants.HTYPE_CLUSTER
909
  _OP_REQP = []
910

    
911
  def BuildHooksEnv(self):
912
    """Build hooks env.
913

914
    """
915
    env = {"OP_TARGET": self.cfg.GetClusterName()}
916
    mn = self.cfg.GetMasterNode()
917
    return env, [], [mn]
918

    
919
  def CheckPrereq(self):
920
    """No prerequisites to check.
921

922
    """
923
    return True
924

    
925
  def Exec(self, feedback_fn):
926
    """Nothing to do.
927

928
    """
929
    return True
930

    
931

    
932
class LUDestroyCluster(LogicalUnit):
933
  """Logical unit for destroying the cluster.
934

935
  """
936
  HPATH = "cluster-destroy"
937
  HTYPE = constants.HTYPE_CLUSTER
938
  _OP_REQP = []
939

    
940
  def BuildHooksEnv(self):
941
    """Build hooks env.
942

943
    """
944
    env = {"OP_TARGET": self.cfg.GetClusterName()}
945
    return env, [], []
946

    
947
  def CheckPrereq(self):
948
    """Check prerequisites.
949

950
    This checks whether the cluster is empty.
951

952
    Any errors are signaled by raising errors.OpPrereqError.
953

954
    """
955
    master = self.cfg.GetMasterNode()
956

    
957
    nodelist = self.cfg.GetNodeList()
958
    if len(nodelist) != 1 or nodelist[0] != master:
959
      raise errors.OpPrereqError("There are still %d node(s) in"
960
                                 " this cluster." % (len(nodelist) - 1),
961
                                 errors.ECODE_INVAL)
962
    instancelist = self.cfg.GetInstanceList()
963
    if instancelist:
964
      raise errors.OpPrereqError("There are still %d instance(s) in"
965
                                 " this cluster." % len(instancelist),
966
                                 errors.ECODE_INVAL)
967

    
968
  def Exec(self, feedback_fn):
969
    """Destroys the cluster.
970

971
    """
972
    master = self.cfg.GetMasterNode()
973
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
974

    
975
    # Run post hooks on master node before it's removed
976
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
977
    try:
978
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
979
    except:
980
      # pylint: disable-msg=W0702
981
      self.LogWarning("Errors occurred running hooks on %s" % master)
982

    
983
    result = self.rpc.call_node_stop_master(master, False)
984
    result.Raise("Could not disable the master role")
985

    
986
    if modify_ssh_setup:
987
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
988
      utils.CreateBackup(priv_key)
989
      utils.CreateBackup(pub_key)
990

    
991
    return master
992

    
993

    
994
def _VerifyCertificateInner(filename, expired, not_before, not_after, now,
995
                            warn_days=constants.SSL_CERT_EXPIRATION_WARN,
996
                            error_days=constants.SSL_CERT_EXPIRATION_ERROR):
997
  """Verifies certificate details for LUVerifyCluster.
998

999
  """
1000
  if expired:
1001
    msg = "Certificate %s is expired" % filename
1002

    
1003
    if not_before is not None and not_after is not None:
1004
      msg += (" (valid from %s to %s)" %
1005
              (_FormatTimestamp(not_before),
1006
               _FormatTimestamp(not_after)))
1007
    elif not_before is not None:
1008
      msg += " (valid from %s)" % _FormatTimestamp(not_before)
1009
    elif not_after is not None:
1010
      msg += " (valid until %s)" % _FormatTimestamp(not_after)
1011

    
1012
    return (LUVerifyCluster.ETYPE_ERROR, msg)
1013

    
1014
  elif not_before is not None and not_before > now:
1015
    return (LUVerifyCluster.ETYPE_WARNING,
1016
            "Certificate %s not yet valid (valid from %s)" %
1017
            (filename, _FormatTimestamp(not_before)))
1018

    
1019
  elif not_after is not None:
1020
    remaining_days = int((not_after - now) / (24 * 3600))
1021

    
1022
    msg = ("Certificate %s expires in %d days" % (filename, remaining_days))
1023

    
1024
    if remaining_days <= error_days:
1025
      return (LUVerifyCluster.ETYPE_ERROR, msg)
1026

    
1027
    if remaining_days <= warn_days:
1028
      return (LUVerifyCluster.ETYPE_WARNING, msg)
1029

    
1030
  return (None, None)
1031

    
1032

    
1033
def _VerifyCertificate(filename):
1034
  """Verifies a certificate for LUVerifyCluster.
1035

1036
  @type filename: string
1037
  @param filename: Path to PEM file
1038

1039
  """
1040
  try:
1041
    cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1042
                                           utils.ReadFile(filename))
1043
  except Exception, err: # pylint: disable-msg=W0703
1044
    return (LUVerifyCluster.ETYPE_ERROR,
1045
            "Failed to load X509 certificate %s: %s" % (filename, err))
1046

    
1047
  # Depending on the pyOpenSSL version, this can just return (None, None)
1048
  (not_before, not_after) = utils.GetX509CertValidity(cert)
1049

    
1050
  return _VerifyCertificateInner(filename, cert.has_expired(),
1051
                                 not_before, not_after, time.time())
1052

    
1053

    
1054
class LUVerifyCluster(LogicalUnit):
1055
  """Verifies the cluster status.
1056

1057
  """
1058
  HPATH = "cluster-verify"
1059
  HTYPE = constants.HTYPE_CLUSTER
1060
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1061
  REQ_BGL = False
1062

    
1063
  TCLUSTER = "cluster"
1064
  TNODE = "node"
1065
  TINSTANCE = "instance"
1066

    
1067
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1068
  ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1069
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1070
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1071
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1072
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1073
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1074
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1075
  ENODEDRBD = (TNODE, "ENODEDRBD")
1076
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1077
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
1078
  ENODEHV = (TNODE, "ENODEHV")
1079
  ENODELVM = (TNODE, "ENODELVM")
1080
  ENODEN1 = (TNODE, "ENODEN1")
1081
  ENODENET = (TNODE, "ENODENET")
1082
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1083
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1084
  ENODERPC = (TNODE, "ENODERPC")
1085
  ENODESSH = (TNODE, "ENODESSH")
1086
  ENODEVERSION = (TNODE, "ENODEVERSION")
1087
  ENODESETUP = (TNODE, "ENODESETUP")
1088
  ENODETIME = (TNODE, "ENODETIME")
1089

    
1090
  ETYPE_FIELD = "code"
1091
  ETYPE_ERROR = "ERROR"
1092
  ETYPE_WARNING = "WARNING"
1093

    
1094
  def ExpandNames(self):
1095
    self.needed_locks = {
1096
      locking.LEVEL_NODE: locking.ALL_SET,
1097
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1098
    }
1099
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1100

    
1101
  def _Error(self, ecode, item, msg, *args, **kwargs):
1102
    """Format an error message.
1103

1104
    Based on the opcode's error_codes parameter, either format a
1105
    parseable error code, or a simpler error string.
1106

1107
    This must be called only from Exec and functions called from Exec.
1108

1109
    """
1110
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1111
    itype, etxt = ecode
1112
    # first complete the msg
1113
    if args:
1114
      msg = msg % args
1115
    # then format the whole message
1116
    if self.op.error_codes:
1117
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1118
    else:
1119
      if item:
1120
        item = " " + item
1121
      else:
1122
        item = ""
1123
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1124
    # and finally report it via the feedback_fn
1125
    self._feedback_fn("  - %s" % msg)
1126

    
1127
  def _ErrorIf(self, cond, *args, **kwargs):
1128
    """Log an error message if the passed condition is True.
1129

1130
    """
1131
    cond = bool(cond) or self.op.debug_simulate_errors
1132
    if cond:
1133
      self._Error(*args, **kwargs)
1134
    # do not mark the operation as failed for WARN cases only
1135
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1136
      self.bad = self.bad or cond
1137

    
1138
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1139
                  node_result, master_files, drbd_map, vg_name):
1140
    """Run multiple tests against a node.
1141

1142
    Test list:
1143

1144
      - compares ganeti version
1145
      - checks vg existence and size > 20G
1146
      - checks config file checksum
1147
      - checks ssh to other nodes
1148

1149
    @type nodeinfo: L{objects.Node}
1150
    @param nodeinfo: the node to check
1151
    @param file_list: required list of files
1152
    @param local_cksum: dictionary of local files and their checksums
1153
    @param node_result: the results from the node
1154
    @param master_files: list of files that only masters should have
1155
    @param drbd_map: the useddrbd minors for this node, in
1156
        form of minor: (instance, must_exist) which correspond to instances
1157
        and their running status
1158
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1159

1160
    """
1161
    node = nodeinfo.name
1162
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1163

    
1164
    # main result, node_result should be a non-empty dict
1165
    test = not node_result or not isinstance(node_result, dict)
1166
    _ErrorIf(test, self.ENODERPC, node,
1167
                  "unable to verify node: no data returned")
1168
    if test:
1169
      return
1170

    
1171
    # compares ganeti version
1172
    local_version = constants.PROTOCOL_VERSION
1173
    remote_version = node_result.get('version', None)
1174
    test = not (remote_version and
1175
                isinstance(remote_version, (list, tuple)) and
1176
                len(remote_version) == 2)
1177
    _ErrorIf(test, self.ENODERPC, node,
1178
             "connection to node returned invalid data")
1179
    if test:
1180
      return
1181

    
1182
    test = local_version != remote_version[0]
1183
    _ErrorIf(test, self.ENODEVERSION, node,
1184
             "incompatible protocol versions: master %s,"
1185
             " node %s", local_version, remote_version[0])
1186
    if test:
1187
      return
1188

    
1189
    # node seems compatible, we can actually try to look into its results
1190

    
1191
    # full package version
1192
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1193
                  self.ENODEVERSION, node,
1194
                  "software version mismatch: master %s, node %s",
1195
                  constants.RELEASE_VERSION, remote_version[1],
1196
                  code=self.ETYPE_WARNING)
1197

    
1198
    # checks vg existence and size > 20G
1199
    if vg_name is not None:
1200
      vglist = node_result.get(constants.NV_VGLIST, None)
1201
      test = not vglist
1202
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1203
      if not test:
1204
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1205
                                              constants.MIN_VG_SIZE)
1206
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1207

    
1208
    # checks config file checksum
1209

    
1210
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1211
    test = not isinstance(remote_cksum, dict)
1212
    _ErrorIf(test, self.ENODEFILECHECK, node,
1213
             "node hasn't returned file checksum data")
1214
    if not test:
1215
      for file_name in file_list:
1216
        node_is_mc = nodeinfo.master_candidate
1217
        must_have = (file_name not in master_files) or node_is_mc
1218
        # missing
1219
        test1 = file_name not in remote_cksum
1220
        # invalid checksum
1221
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1222
        # existing and good
1223
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1224
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1225
                 "file '%s' missing", file_name)
1226
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1227
                 "file '%s' has wrong checksum", file_name)
1228
        # not candidate and this is not a must-have file
1229
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1230
                 "file '%s' should not exist on non master"
1231
                 " candidates (and the file is outdated)", file_name)
1232
        # all good, except non-master/non-must have combination
1233
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1234
                 "file '%s' should not exist"
1235
                 " on non master candidates", file_name)
1236

    
1237
    # checks ssh to any
1238

    
1239
    test = constants.NV_NODELIST not in node_result
1240
    _ErrorIf(test, self.ENODESSH, node,
1241
             "node hasn't returned node ssh connectivity data")
1242
    if not test:
1243
      if node_result[constants.NV_NODELIST]:
1244
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1245
          _ErrorIf(True, self.ENODESSH, node,
1246
                   "ssh communication with node '%s': %s", a_node, a_msg)
1247

    
1248
    test = constants.NV_NODENETTEST not in node_result
1249
    _ErrorIf(test, self.ENODENET, node,
1250
             "node hasn't returned node tcp connectivity data")
1251
    if not test:
1252
      if node_result[constants.NV_NODENETTEST]:
1253
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1254
        for anode in nlist:
1255
          _ErrorIf(True, self.ENODENET, node,
1256
                   "tcp communication with node '%s': %s",
1257
                   anode, node_result[constants.NV_NODENETTEST][anode])
1258

    
1259
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1260
    if isinstance(hyp_result, dict):
1261
      for hv_name, hv_result in hyp_result.iteritems():
1262
        test = hv_result is not None
1263
        _ErrorIf(test, self.ENODEHV, node,
1264
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1265

    
1266
    # check used drbd list
1267
    if vg_name is not None:
1268
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1269
      test = not isinstance(used_minors, (tuple, list))
1270
      _ErrorIf(test, self.ENODEDRBD, node,
1271
               "cannot parse drbd status file: %s", str(used_minors))
1272
      if not test:
1273
        for minor, (iname, must_exist) in drbd_map.items():
1274
          test = minor not in used_minors and must_exist
1275
          _ErrorIf(test, self.ENODEDRBD, node,
1276
                   "drbd minor %d of instance %s is not active",
1277
                   minor, iname)
1278
        for minor in used_minors:
1279
          test = minor not in drbd_map
1280
          _ErrorIf(test, self.ENODEDRBD, node,
1281
                   "unallocated drbd minor %d is in use", minor)
1282
    test = node_result.get(constants.NV_NODESETUP,
1283
                           ["Missing NODESETUP results"])
1284
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1285
             "; ".join(test))
1286

    
1287
    # check pv names
1288
    if vg_name is not None:
1289
      pvlist = node_result.get(constants.NV_PVLIST, None)
1290
      test = pvlist is None
1291
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1292
      if not test:
1293
        # check that ':' is not present in PV names, since it's a
1294
        # special character for lvcreate (denotes the range of PEs to
1295
        # use on the PV)
1296
        for _, pvname, owner_vg in pvlist:
1297
          test = ":" in pvname
1298
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1299
                   " '%s' of VG '%s'", pvname, owner_vg)
1300

    
1301
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1302
                      node_instance, n_offline):
1303
    """Verify an instance.
1304

1305
    This function checks to see if the required block devices are
1306
    available on the instance's node.
1307

1308
    """
1309
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1310
    node_current = instanceconfig.primary_node
1311

    
1312
    node_vol_should = {}
1313
    instanceconfig.MapLVsByNode(node_vol_should)
1314

    
1315
    for node in node_vol_should:
1316
      if node in n_offline:
1317
        # ignore missing volumes on offline nodes
1318
        continue
1319
      for volume in node_vol_should[node]:
1320
        test = node not in node_vol_is or volume not in node_vol_is[node]
1321
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1322
                 "volume %s missing on node %s", volume, node)
1323

    
1324
    if instanceconfig.admin_up:
1325
      test = ((node_current not in node_instance or
1326
               not instance in node_instance[node_current]) and
1327
              node_current not in n_offline)
1328
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1329
               "instance not running on its primary node %s",
1330
               node_current)
1331

    
1332
    for node in node_instance:
1333
      if (not node == node_current):
1334
        test = instance in node_instance[node]
1335
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1336
                 "instance should not run on node %s", node)
1337

    
1338
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1339
    """Verify if there are any unknown volumes in the cluster.
1340

1341
    The .os, .swap and backup volumes are ignored. All other volumes are
1342
    reported as unknown.
1343

1344
    """
1345
    for node in node_vol_is:
1346
      for volume in node_vol_is[node]:
1347
        test = (node not in node_vol_should or
1348
                volume not in node_vol_should[node])
1349
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1350
                      "volume %s is unknown", volume)
1351

    
1352
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1353
    """Verify the list of running instances.
1354

1355
    This checks what instances are running but unknown to the cluster.
1356

1357
    """
1358
    for node in node_instance:
1359
      for o_inst in node_instance[node]:
1360
        test = o_inst not in instancelist
1361
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1362
                      "instance %s on node %s should not exist", o_inst, node)
1363

    
1364
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1365
    """Verify N+1 Memory Resilience.
1366

1367
    Check that if one single node dies we can still start all the instances it
1368
    was primary for.
1369

1370
    """
1371
    for node, nodeinfo in node_info.iteritems():
1372
      # This code checks that every node which is now listed as secondary has
1373
      # enough memory to host all instances it is supposed to should a single
1374
      # other node in the cluster fail.
1375
      # FIXME: not ready for failover to an arbitrary node
1376
      # FIXME: does not support file-backed instances
1377
      # WARNING: we currently take into account down instances as well as up
1378
      # ones, considering that even if they're down someone might want to start
1379
      # them even in the event of a node failure.
1380
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1381
        needed_mem = 0
1382
        for instance in instances:
1383
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1384
          if bep[constants.BE_AUTO_BALANCE]:
1385
            needed_mem += bep[constants.BE_MEMORY]
1386
        test = nodeinfo['mfree'] < needed_mem
1387
        self._ErrorIf(test, self.ENODEN1, node,
1388
                      "not enough memory on to accommodate"
1389
                      " failovers should peer node %s fail", prinode)
1390

    
1391
  def CheckPrereq(self):
1392
    """Check prerequisites.
1393

1394
    Transform the list of checks we're going to skip into a set and check that
1395
    all its members are valid.
1396

1397
    """
1398
    self.skip_set = frozenset(self.op.skip_checks)
1399
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1400
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1401
                                 errors.ECODE_INVAL)
1402

    
1403
  def BuildHooksEnv(self):
1404
    """Build hooks env.
1405

1406
    Cluster-Verify hooks just ran in the post phase and their failure makes
1407
    the output be logged in the verify output and the verification to fail.
1408

1409
    """
1410
    all_nodes = self.cfg.GetNodeList()
1411
    env = {
1412
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1413
      }
1414
    for node in self.cfg.GetAllNodesInfo().values():
1415
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1416

    
1417
    return env, [], all_nodes
1418

    
1419
  def Exec(self, feedback_fn):
1420
    """Verify integrity of cluster, performing various test on nodes.
1421

1422
    """
1423
    self.bad = False
1424
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1425
    verbose = self.op.verbose
1426
    self._feedback_fn = feedback_fn
1427
    feedback_fn("* Verifying global settings")
1428
    for msg in self.cfg.VerifyConfig():
1429
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1430

    
1431
    # Check the cluster certificates
1432
    for cert_filename in constants.ALL_CERT_FILES:
1433
      (errcode, msg) = _VerifyCertificate(cert_filename)
1434
      _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1435

    
1436
    vg_name = self.cfg.GetVGName()
1437
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1438
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1439
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1440
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1441
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1442
                        for iname in instancelist)
1443
    i_non_redundant = [] # Non redundant instances
1444
    i_non_a_balanced = [] # Non auto-balanced instances
1445
    n_offline = [] # List of offline nodes
1446
    n_drained = [] # List of nodes being drained
1447
    node_volume = {}
1448
    node_instance = {}
1449
    node_info = {}
1450
    instance_cfg = {}
1451

    
1452
    # FIXME: verify OS list
1453
    # do local checksums
1454
    master_files = [constants.CLUSTER_CONF_FILE]
1455

    
1456
    file_names = ssconf.SimpleStore().GetFileList()
1457
    file_names.extend(constants.ALL_CERT_FILES)
1458
    file_names.extend(master_files)
1459

    
1460
    local_checksums = utils.FingerprintFiles(file_names)
1461

    
1462
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1463
    node_verify_param = {
1464
      constants.NV_FILELIST: file_names,
1465
      constants.NV_NODELIST: [node.name for node in nodeinfo
1466
                              if not node.offline],
1467
      constants.NV_HYPERVISOR: hypervisors,
1468
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1469
                                  node.secondary_ip) for node in nodeinfo
1470
                                 if not node.offline],
1471
      constants.NV_INSTANCELIST: hypervisors,
1472
      constants.NV_VERSION: None,
1473
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1474
      constants.NV_NODESETUP: None,
1475
      constants.NV_TIME: None,
1476
      }
1477

    
1478
    if vg_name is not None:
1479
      node_verify_param[constants.NV_VGLIST] = None
1480
      node_verify_param[constants.NV_LVLIST] = vg_name
1481
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1482
      node_verify_param[constants.NV_DRBDLIST] = None
1483

    
1484
    # Due to the way our RPC system works, exact response times cannot be
1485
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1486
    # time before and after executing the request, we can at least have a time
1487
    # window.
1488
    nvinfo_starttime = time.time()
1489
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1490
                                           self.cfg.GetClusterName())
1491
    nvinfo_endtime = time.time()
1492

    
1493
    cluster = self.cfg.GetClusterInfo()
1494
    master_node = self.cfg.GetMasterNode()
1495
    all_drbd_map = self.cfg.ComputeDRBDMap()
1496

    
1497
    feedback_fn("* Verifying node status")
1498
    for node_i in nodeinfo:
1499
      node = node_i.name
1500

    
1501
      if node_i.offline:
1502
        if verbose:
1503
          feedback_fn("* Skipping offline node %s" % (node,))
1504
        n_offline.append(node)
1505
        continue
1506

    
1507
      if node == master_node:
1508
        ntype = "master"
1509
      elif node_i.master_candidate:
1510
        ntype = "master candidate"
1511
      elif node_i.drained:
1512
        ntype = "drained"
1513
        n_drained.append(node)
1514
      else:
1515
        ntype = "regular"
1516
      if verbose:
1517
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1518

    
1519
      msg = all_nvinfo[node].fail_msg
1520
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1521
      if msg:
1522
        continue
1523

    
1524
      nresult = all_nvinfo[node].payload
1525
      node_drbd = {}
1526
      for minor, instance in all_drbd_map[node].items():
1527
        test = instance not in instanceinfo
1528
        _ErrorIf(test, self.ECLUSTERCFG, None,
1529
                 "ghost instance '%s' in temporary DRBD map", instance)
1530
          # ghost instance should not be running, but otherwise we
1531
          # don't give double warnings (both ghost instance and
1532
          # unallocated minor in use)
1533
        if test:
1534
          node_drbd[minor] = (instance, False)
1535
        else:
1536
          instance = instanceinfo[instance]
1537
          node_drbd[minor] = (instance.name, instance.admin_up)
1538

    
1539
      self._VerifyNode(node_i, file_names, local_checksums,
1540
                       nresult, master_files, node_drbd, vg_name)
1541

    
1542
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1543
      if vg_name is None:
1544
        node_volume[node] = {}
1545
      elif isinstance(lvdata, basestring):
1546
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1547
                 utils.SafeEncode(lvdata))
1548
        node_volume[node] = {}
1549
      elif not isinstance(lvdata, dict):
1550
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1551
        continue
1552
      else:
1553
        node_volume[node] = lvdata
1554

    
1555
      # node_instance
1556
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1557
      test = not isinstance(idata, list)
1558
      _ErrorIf(test, self.ENODEHV, node,
1559
               "rpc call to node failed (instancelist): %s",
1560
               utils.SafeEncode(str(idata)))
1561
      if test:
1562
        continue
1563

    
1564
      node_instance[node] = idata
1565

    
1566
      # node_info
1567
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1568
      test = not isinstance(nodeinfo, dict)
1569
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1570
      if test:
1571
        continue
1572

    
1573
      # Node time
1574
      ntime = nresult.get(constants.NV_TIME, None)
1575
      try:
1576
        ntime_merged = utils.MergeTime(ntime)
1577
      except (ValueError, TypeError):
1578
        _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1579

    
1580
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1581
        ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1582
      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1583
        ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1584
      else:
1585
        ntime_diff = None
1586

    
1587
      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1588
               "Node time diverges by at least %s from master node time",
1589
               ntime_diff)
1590

    
1591
      if ntime_diff is not None:
1592
        continue
1593

    
1594
      try:
1595
        node_info[node] = {
1596
          "mfree": int(nodeinfo['memory_free']),
1597
          "pinst": [],
1598
          "sinst": [],
1599
          # dictionary holding all instances this node is secondary for,
1600
          # grouped by their primary node. Each key is a cluster node, and each
1601
          # value is a list of instances which have the key as primary and the
1602
          # current node as secondary.  this is handy to calculate N+1 memory
1603
          # availability if you can only failover from a primary to its
1604
          # secondary.
1605
          "sinst-by-pnode": {},
1606
        }
1607
        # FIXME: devise a free space model for file based instances as well
1608
        if vg_name is not None:
1609
          test = (constants.NV_VGLIST not in nresult or
1610
                  vg_name not in nresult[constants.NV_VGLIST])
1611
          _ErrorIf(test, self.ENODELVM, node,
1612
                   "node didn't return data for the volume group '%s'"
1613
                   " - it is either missing or broken", vg_name)
1614
          if test:
1615
            continue
1616
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1617
      except (ValueError, KeyError):
1618
        _ErrorIf(True, self.ENODERPC, node,
1619
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1620
        continue
1621

    
1622
    node_vol_should = {}
1623

    
1624
    feedback_fn("* Verifying instance status")
1625
    for instance in instancelist:
1626
      if verbose:
1627
        feedback_fn("* Verifying instance %s" % instance)
1628
      inst_config = instanceinfo[instance]
1629
      self._VerifyInstance(instance, inst_config, node_volume,
1630
                           node_instance, n_offline)
1631
      inst_nodes_offline = []
1632

    
1633
      inst_config.MapLVsByNode(node_vol_should)
1634

    
1635
      instance_cfg[instance] = inst_config
1636

    
1637
      pnode = inst_config.primary_node
1638
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1639
               self.ENODERPC, pnode, "instance %s, connection to"
1640
               " primary node failed", instance)
1641
      if pnode in node_info:
1642
        node_info[pnode]['pinst'].append(instance)
1643

    
1644
      if pnode in n_offline:
1645
        inst_nodes_offline.append(pnode)
1646

    
1647
      # If the instance is non-redundant we cannot survive losing its primary
1648
      # node, so we are not N+1 compliant. On the other hand we have no disk
1649
      # templates with more than one secondary so that situation is not well
1650
      # supported either.
1651
      # FIXME: does not support file-backed instances
1652
      if len(inst_config.secondary_nodes) == 0:
1653
        i_non_redundant.append(instance)
1654
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1655
               self.EINSTANCELAYOUT, instance,
1656
               "instance has multiple secondary nodes", code="WARNING")
1657

    
1658
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1659
        i_non_a_balanced.append(instance)
1660

    
1661
      for snode in inst_config.secondary_nodes:
1662
        _ErrorIf(snode not in node_info and snode not in n_offline,
1663
                 self.ENODERPC, snode,
1664
                 "instance %s, connection to secondary node"
1665
                 " failed", instance)
1666

    
1667
        if snode in node_info:
1668
          node_info[snode]['sinst'].append(instance)
1669
          if pnode not in node_info[snode]['sinst-by-pnode']:
1670
            node_info[snode]['sinst-by-pnode'][pnode] = []
1671
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1672

    
1673
        if snode in n_offline:
1674
          inst_nodes_offline.append(snode)
1675

    
1676
      # warn that the instance lives on offline nodes
1677
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1678
               "instance lives on offline node(s) %s",
1679
               utils.CommaJoin(inst_nodes_offline))
1680

    
1681
    feedback_fn("* Verifying orphan volumes")
1682
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1683

    
1684
    feedback_fn("* Verifying remaining instances")
1685
    self._VerifyOrphanInstances(instancelist, node_instance)
1686

    
1687
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1688
      feedback_fn("* Verifying N+1 Memory redundancy")
1689
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1690

    
1691
    feedback_fn("* Other Notes")
1692
    if i_non_redundant:
1693
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1694
                  % len(i_non_redundant))
1695

    
1696
    if i_non_a_balanced:
1697
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1698
                  % len(i_non_a_balanced))
1699

    
1700
    if n_offline:
1701
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1702

    
1703
    if n_drained:
1704
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1705

    
1706
    return not self.bad
1707

    
1708
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1709
    """Analyze the post-hooks' result
1710

1711
    This method analyses the hook result, handles it, and sends some
1712
    nicely-formatted feedback back to the user.
1713

1714
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1715
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1716
    @param hooks_results: the results of the multi-node hooks rpc call
1717
    @param feedback_fn: function used send feedback back to the caller
1718
    @param lu_result: previous Exec result
1719
    @return: the new Exec result, based on the previous result
1720
        and hook results
1721

1722
    """
1723
    # We only really run POST phase hooks, and are only interested in
1724
    # their results
1725
    if phase == constants.HOOKS_PHASE_POST:
1726
      # Used to change hooks' output to proper indentation
1727
      indent_re = re.compile('^', re.M)
1728
      feedback_fn("* Hooks Results")
1729
      assert hooks_results, "invalid result from hooks"
1730

    
1731
      for node_name in hooks_results:
1732
        res = hooks_results[node_name]
1733
        msg = res.fail_msg
1734
        test = msg and not res.offline
1735
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1736
                      "Communication failure in hooks execution: %s", msg)
1737
        if res.offline or msg:
1738
          # No need to investigate payload if node is offline or gave an error.
1739
          # override manually lu_result here as _ErrorIf only
1740
          # overrides self.bad
1741
          lu_result = 1
1742
          continue
1743
        for script, hkr, output in res.payload:
1744
          test = hkr == constants.HKR_FAIL
1745
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1746
                        "Script %s failed, output:", script)
1747
          if test:
1748
            output = indent_re.sub('      ', output)
1749
            feedback_fn("%s" % output)
1750
            lu_result = 0
1751

    
1752
      return lu_result
1753

    
1754

    
1755
class LUVerifyDisks(NoHooksLU):
1756
  """Verifies the cluster disks status.
1757

1758
  """
1759
  _OP_REQP = []
1760
  REQ_BGL = False
1761

    
1762
  def ExpandNames(self):
1763
    self.needed_locks = {
1764
      locking.LEVEL_NODE: locking.ALL_SET,
1765
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1766
    }
1767
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1768

    
1769
  def CheckPrereq(self):
1770
    """Check prerequisites.
1771

1772
    This has no prerequisites.
1773

1774
    """
1775
    pass
1776

    
1777
  def Exec(self, feedback_fn):
1778
    """Verify integrity of cluster disks.
1779

1780
    @rtype: tuple of three items
1781
    @return: a tuple of (dict of node-to-node_error, list of instances
1782
        which need activate-disks, dict of instance: (node, volume) for
1783
        missing volumes
1784

1785
    """
1786
    result = res_nodes, res_instances, res_missing = {}, [], {}
1787

    
1788
    vg_name = self.cfg.GetVGName()
1789
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1790
    instances = [self.cfg.GetInstanceInfo(name)
1791
                 for name in self.cfg.GetInstanceList()]
1792

    
1793
    nv_dict = {}
1794
    for inst in instances:
1795
      inst_lvs = {}
1796
      if (not inst.admin_up or
1797
          inst.disk_template not in constants.DTS_NET_MIRROR):
1798
        continue
1799
      inst.MapLVsByNode(inst_lvs)
1800
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1801
      for node, vol_list in inst_lvs.iteritems():
1802
        for vol in vol_list:
1803
          nv_dict[(node, vol)] = inst
1804

    
1805
    if not nv_dict:
1806
      return result
1807

    
1808
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1809

    
1810
    for node in nodes:
1811
      # node_volume
1812
      node_res = node_lvs[node]
1813
      if node_res.offline:
1814
        continue
1815
      msg = node_res.fail_msg
1816
      if msg:
1817
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1818
        res_nodes[node] = msg
1819
        continue
1820

    
1821
      lvs = node_res.payload
1822
      for lv_name, (_, _, lv_online) in lvs.items():
1823
        inst = nv_dict.pop((node, lv_name), None)
1824
        if (not lv_online and inst is not None
1825
            and inst.name not in res_instances):
1826
          res_instances.append(inst.name)
1827

    
1828
    # any leftover items in nv_dict are missing LVs, let's arrange the
1829
    # data better
1830
    for key, inst in nv_dict.iteritems():
1831
      if inst.name not in res_missing:
1832
        res_missing[inst.name] = []
1833
      res_missing[inst.name].append(key)
1834

    
1835
    return result
1836

    
1837

    
1838
class LURepairDiskSizes(NoHooksLU):
1839
  """Verifies the cluster disks sizes.
1840

1841
  """
1842
  _OP_REQP = ["instances"]
1843
  REQ_BGL = False
1844

    
1845
  def ExpandNames(self):
1846
    if not isinstance(self.op.instances, list):
1847
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1848
                                 errors.ECODE_INVAL)
1849

    
1850
    if self.op.instances:
1851
      self.wanted_names = []
1852
      for name in self.op.instances:
1853
        full_name = _ExpandInstanceName(self.cfg, name)
1854
        self.wanted_names.append(full_name)
1855
      self.needed_locks = {
1856
        locking.LEVEL_NODE: [],
1857
        locking.LEVEL_INSTANCE: self.wanted_names,
1858
        }
1859
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1860
    else:
1861
      self.wanted_names = None
1862
      self.needed_locks = {
1863
        locking.LEVEL_NODE: locking.ALL_SET,
1864
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1865
        }
1866
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1867

    
1868
  def DeclareLocks(self, level):
1869
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1870
      self._LockInstancesNodes(primary_only=True)
1871

    
1872
  def CheckPrereq(self):
1873
    """Check prerequisites.
1874

1875
    This only checks the optional instance list against the existing names.
1876

1877
    """
1878
    if self.wanted_names is None:
1879
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1880

    
1881
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1882
                             in self.wanted_names]
1883

    
1884
  def _EnsureChildSizes(self, disk):
1885
    """Ensure children of the disk have the needed disk size.
1886

1887
    This is valid mainly for DRBD8 and fixes an issue where the
1888
    children have smaller disk size.
1889

1890
    @param disk: an L{ganeti.objects.Disk} object
1891

1892
    """
1893
    if disk.dev_type == constants.LD_DRBD8:
1894
      assert disk.children, "Empty children for DRBD8?"
1895
      fchild = disk.children[0]
1896
      mismatch = fchild.size < disk.size
1897
      if mismatch:
1898
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1899
                     fchild.size, disk.size)
1900
        fchild.size = disk.size
1901

    
1902
      # and we recurse on this child only, not on the metadev
1903
      return self._EnsureChildSizes(fchild) or mismatch
1904
    else:
1905
      return False
1906

    
1907
  def Exec(self, feedback_fn):
1908
    """Verify the size of cluster disks.
1909

1910
    """
1911
    # TODO: check child disks too
1912
    # TODO: check differences in size between primary/secondary nodes
1913
    per_node_disks = {}
1914
    for instance in self.wanted_instances:
1915
      pnode = instance.primary_node
1916
      if pnode not in per_node_disks:
1917
        per_node_disks[pnode] = []
1918
      for idx, disk in enumerate(instance.disks):
1919
        per_node_disks[pnode].append((instance, idx, disk))
1920

    
1921
    changed = []
1922
    for node, dskl in per_node_disks.items():
1923
      newl = [v[2].Copy() for v in dskl]
1924
      for dsk in newl:
1925
        self.cfg.SetDiskID(dsk, node)
1926
      result = self.rpc.call_blockdev_getsizes(node, newl)
1927
      if result.fail_msg:
1928
        self.LogWarning("Failure in blockdev_getsizes call to node"
1929
                        " %s, ignoring", node)
1930
        continue
1931
      if len(result.data) != len(dskl):
1932
        self.LogWarning("Invalid result from node %s, ignoring node results",
1933
                        node)
1934
        continue
1935
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1936
        if size is None:
1937
          self.LogWarning("Disk %d of instance %s did not return size"
1938
                          " information, ignoring", idx, instance.name)
1939
          continue
1940
        if not isinstance(size, (int, long)):
1941
          self.LogWarning("Disk %d of instance %s did not return valid"
1942
                          " size information, ignoring", idx, instance.name)
1943
          continue
1944
        size = size >> 20
1945
        if size != disk.size:
1946
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1947
                       " correcting: recorded %d, actual %d", idx,
1948
                       instance.name, disk.size, size)
1949
          disk.size = size
1950
          self.cfg.Update(instance, feedback_fn)
1951
          changed.append((instance.name, idx, size))
1952
        if self._EnsureChildSizes(disk):
1953
          self.cfg.Update(instance, feedback_fn)
1954
          changed.append((instance.name, idx, disk.size))
1955
    return changed
1956

    
1957

    
1958
class LURenameCluster(LogicalUnit):
1959
  """Rename the cluster.
1960

1961
  """
1962
  HPATH = "cluster-rename"
1963
  HTYPE = constants.HTYPE_CLUSTER
1964
  _OP_REQP = ["name"]
1965

    
1966
  def BuildHooksEnv(self):
1967
    """Build hooks env.
1968

1969
    """
1970
    env = {
1971
      "OP_TARGET": self.cfg.GetClusterName(),
1972
      "NEW_NAME": self.op.name,
1973
      }
1974
    mn = self.cfg.GetMasterNode()
1975
    all_nodes = self.cfg.GetNodeList()
1976
    return env, [mn], all_nodes
1977

    
1978
  def CheckPrereq(self):
1979
    """Verify that the passed name is a valid one.
1980

1981
    """
1982
    hostname = utils.GetHostInfo(self.op.name)
1983

    
1984
    new_name = hostname.name
1985
    self.ip = new_ip = hostname.ip
1986
    old_name = self.cfg.GetClusterName()
1987
    old_ip = self.cfg.GetMasterIP()
1988
    if new_name == old_name and new_ip == old_ip:
1989
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1990
                                 " cluster has changed",
1991
                                 errors.ECODE_INVAL)
1992
    if new_ip != old_ip:
1993
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1994
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1995
                                   " reachable on the network. Aborting." %
1996
                                   new_ip, errors.ECODE_NOTUNIQUE)
1997

    
1998
    self.op.name = new_name
1999

    
2000
  def Exec(self, feedback_fn):
2001
    """Rename the cluster.
2002

2003
    """
2004
    clustername = self.op.name
2005
    ip = self.ip
2006

    
2007
    # shutdown the master IP
2008
    master = self.cfg.GetMasterNode()
2009
    result = self.rpc.call_node_stop_master(master, False)
2010
    result.Raise("Could not disable the master role")
2011

    
2012
    try:
2013
      cluster = self.cfg.GetClusterInfo()
2014
      cluster.cluster_name = clustername
2015
      cluster.master_ip = ip
2016
      self.cfg.Update(cluster, feedback_fn)
2017

    
2018
      # update the known hosts file
2019
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2020
      node_list = self.cfg.GetNodeList()
2021
      try:
2022
        node_list.remove(master)
2023
      except ValueError:
2024
        pass
2025
      result = self.rpc.call_upload_file(node_list,
2026
                                         constants.SSH_KNOWN_HOSTS_FILE)
2027
      for to_node, to_result in result.iteritems():
2028
        msg = to_result.fail_msg
2029
        if msg:
2030
          msg = ("Copy of file %s to node %s failed: %s" %
2031
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2032
          self.proc.LogWarning(msg)
2033

    
2034
    finally:
2035
      result = self.rpc.call_node_start_master(master, False, False)
2036
      msg = result.fail_msg
2037
      if msg:
2038
        self.LogWarning("Could not re-enable the master role on"
2039
                        " the master, please restart manually: %s", msg)
2040

    
2041

    
2042
def _RecursiveCheckIfLVMBased(disk):
2043
  """Check if the given disk or its children are lvm-based.
2044

2045
  @type disk: L{objects.Disk}
2046
  @param disk: the disk to check
2047
  @rtype: boolean
2048
  @return: boolean indicating whether a LD_LV dev_type was found or not
2049

2050
  """
2051
  if disk.children:
2052
    for chdisk in disk.children:
2053
      if _RecursiveCheckIfLVMBased(chdisk):
2054
        return True
2055
  return disk.dev_type == constants.LD_LV
2056

    
2057

    
2058
class LUSetClusterParams(LogicalUnit):
2059
  """Change the parameters of the cluster.
2060

2061
  """
2062
  HPATH = "cluster-modify"
2063
  HTYPE = constants.HTYPE_CLUSTER
2064
  _OP_REQP = []
2065
  REQ_BGL = False
2066

    
2067
  def CheckArguments(self):
2068
    """Check parameters
2069

2070
    """
2071
    if not hasattr(self.op, "candidate_pool_size"):
2072
      self.op.candidate_pool_size = None
2073
    if self.op.candidate_pool_size is not None:
2074
      try:
2075
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2076
      except (ValueError, TypeError), err:
2077
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2078
                                   str(err), errors.ECODE_INVAL)
2079
      if self.op.candidate_pool_size < 1:
2080
        raise errors.OpPrereqError("At least one master candidate needed",
2081
                                   errors.ECODE_INVAL)
2082

    
2083
  def ExpandNames(self):
2084
    # FIXME: in the future maybe other cluster params won't require checking on
2085
    # all nodes to be modified.
2086
    self.needed_locks = {
2087
      locking.LEVEL_NODE: locking.ALL_SET,
2088
    }
2089
    self.share_locks[locking.LEVEL_NODE] = 1
2090

    
2091
  def BuildHooksEnv(self):
2092
    """Build hooks env.
2093

2094
    """
2095
    env = {
2096
      "OP_TARGET": self.cfg.GetClusterName(),
2097
      "NEW_VG_NAME": self.op.vg_name,
2098
      }
2099
    mn = self.cfg.GetMasterNode()
2100
    return env, [mn], [mn]
2101

    
2102
  def CheckPrereq(self):
2103
    """Check prerequisites.
2104

2105
    This checks whether the given params don't conflict and
2106
    if the given volume group is valid.
2107

2108
    """
2109
    if self.op.vg_name is not None and not self.op.vg_name:
2110
      instances = self.cfg.GetAllInstancesInfo().values()
2111
      for inst in instances:
2112
        for disk in inst.disks:
2113
          if _RecursiveCheckIfLVMBased(disk):
2114
            raise errors.OpPrereqError("Cannot disable lvm storage while"
2115
                                       " lvm-based instances exist",
2116
                                       errors.ECODE_INVAL)
2117

    
2118
    node_list = self.acquired_locks[locking.LEVEL_NODE]
2119

    
2120
    # if vg_name not None, checks given volume group on all nodes
2121
    if self.op.vg_name:
2122
      vglist = self.rpc.call_vg_list(node_list)
2123
      for node in node_list:
2124
        msg = vglist[node].fail_msg
2125
        if msg:
2126
          # ignoring down node
2127
          self.LogWarning("Error while gathering data on node %s"
2128
                          " (ignoring node): %s", node, msg)
2129
          continue
2130
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2131
                                              self.op.vg_name,
2132
                                              constants.MIN_VG_SIZE)
2133
        if vgstatus:
2134
          raise errors.OpPrereqError("Error on node '%s': %s" %
2135
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2136

    
2137
    self.cluster = cluster = self.cfg.GetClusterInfo()
2138
    # validate params changes
2139
    if self.op.beparams:
2140
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2141
      self.new_beparams = objects.FillDict(
2142
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2143

    
2144
    if self.op.nicparams:
2145
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2146
      self.new_nicparams = objects.FillDict(
2147
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2148
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2149
      nic_errors = []
2150

    
2151
      # check all instances for consistency
2152
      for instance in self.cfg.GetAllInstancesInfo().values():
2153
        for nic_idx, nic in enumerate(instance.nics):
2154
          params_copy = copy.deepcopy(nic.nicparams)
2155
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2156

    
2157
          # check parameter syntax
2158
          try:
2159
            objects.NIC.CheckParameterSyntax(params_filled)
2160
          except errors.ConfigurationError, err:
2161
            nic_errors.append("Instance %s, nic/%d: %s" %
2162
                              (instance.name, nic_idx, err))
2163

    
2164
          # if we're moving instances to routed, check that they have an ip
2165
          target_mode = params_filled[constants.NIC_MODE]
2166
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2167
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2168
                              (instance.name, nic_idx))
2169
      if nic_errors:
2170
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2171
                                   "\n".join(nic_errors))
2172

    
2173
    # hypervisor list/parameters
2174
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2175
    if self.op.hvparams:
2176
      if not isinstance(self.op.hvparams, dict):
2177
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2178
                                   errors.ECODE_INVAL)
2179
      for hv_name, hv_dict in self.op.hvparams.items():
2180
        if hv_name not in self.new_hvparams:
2181
          self.new_hvparams[hv_name] = hv_dict
2182
        else:
2183
          self.new_hvparams[hv_name].update(hv_dict)
2184

    
2185
    # os hypervisor parameters
2186
    self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2187
    if self.op.os_hvp:
2188
      if not isinstance(self.op.os_hvp, dict):
2189
        raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2190
                                   errors.ECODE_INVAL)
2191
      for os_name, hvs in self.op.os_hvp.items():
2192
        if not isinstance(hvs, dict):
2193
          raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2194
                                      " input"), errors.ECODE_INVAL)
2195
        if os_name not in self.new_os_hvp:
2196
          self.new_os_hvp[os_name] = hvs
2197
        else:
2198
          for hv_name, hv_dict in hvs.items():
2199
            if hv_name not in self.new_os_hvp[os_name]:
2200
              self.new_os_hvp[os_name][hv_name] = hv_dict
2201
            else:
2202
              self.new_os_hvp[os_name][hv_name].update(hv_dict)
2203

    
2204
    if self.op.enabled_hypervisors is not None:
2205
      self.hv_list = self.op.enabled_hypervisors
2206
      if not self.hv_list:
2207
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2208
                                   " least one member",
2209
                                   errors.ECODE_INVAL)
2210
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2211
      if invalid_hvs:
2212
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2213
                                   " entries: %s" %
2214
                                   utils.CommaJoin(invalid_hvs),
2215
                                   errors.ECODE_INVAL)
2216
    else:
2217
      self.hv_list = cluster.enabled_hypervisors
2218

    
2219
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2220
      # either the enabled list has changed, or the parameters have, validate
2221
      for hv_name, hv_params in self.new_hvparams.items():
2222
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2223
            (self.op.enabled_hypervisors and
2224
             hv_name in self.op.enabled_hypervisors)):
2225
          # either this is a new hypervisor, or its parameters have changed
2226
          hv_class = hypervisor.GetHypervisor(hv_name)
2227
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2228
          hv_class.CheckParameterSyntax(hv_params)
2229
          _CheckHVParams(self, node_list, hv_name, hv_params)
2230

    
2231
    if self.op.os_hvp:
2232
      # no need to check any newly-enabled hypervisors, since the
2233
      # defaults have already been checked in the above code-block
2234
      for os_name, os_hvp in self.new_os_hvp.items():
2235
        for hv_name, hv_params in os_hvp.items():
2236
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2237
          # we need to fill in the new os_hvp on top of the actual hv_p
2238
          cluster_defaults = self.new_hvparams.get(hv_name, {})
2239
          new_osp = objects.FillDict(cluster_defaults, hv_params)
2240
          hv_class = hypervisor.GetHypervisor(hv_name)
2241
          hv_class.CheckParameterSyntax(new_osp)
2242
          _CheckHVParams(self, node_list, hv_name, new_osp)
2243

    
2244

    
2245
  def Exec(self, feedback_fn):
2246
    """Change the parameters of the cluster.
2247

2248
    """
2249
    if self.op.vg_name is not None:
2250
      new_volume = self.op.vg_name
2251
      if not new_volume:
2252
        new_volume = None
2253
      if new_volume != self.cfg.GetVGName():
2254
        self.cfg.SetVGName(new_volume)
2255
      else:
2256
        feedback_fn("Cluster LVM configuration already in desired"
2257
                    " state, not changing")
2258
    if self.op.hvparams:
2259
      self.cluster.hvparams = self.new_hvparams
2260
    if self.op.os_hvp:
2261
      self.cluster.os_hvp = self.new_os_hvp
2262
    if self.op.enabled_hypervisors is not None:
2263
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2264
    if self.op.beparams:
2265
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2266
    if self.op.nicparams:
2267
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2268

    
2269
    if self.op.candidate_pool_size is not None:
2270
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2271
      # we need to update the pool size here, otherwise the save will fail
2272
      _AdjustCandidatePool(self, [])
2273

    
2274
    self.cfg.Update(self.cluster, feedback_fn)
2275

    
2276

    
2277
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2278
  """Distribute additional files which are part of the cluster configuration.
2279

2280
  ConfigWriter takes care of distributing the config and ssconf files, but
2281
  there are more files which should be distributed to all nodes. This function
2282
  makes sure those are copied.
2283

2284
  @param lu: calling logical unit
2285
  @param additional_nodes: list of nodes not in the config to distribute to
2286

2287
  """
2288
  # 1. Gather target nodes
2289
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2290
  dist_nodes = lu.cfg.GetOnlineNodeList()
2291
  if additional_nodes is not None:
2292
    dist_nodes.extend(additional_nodes)
2293
  if myself.name in dist_nodes:
2294
    dist_nodes.remove(myself.name)
2295

    
2296
  # 2. Gather files to distribute
2297
  dist_files = set([constants.ETC_HOSTS,
2298
                    constants.SSH_KNOWN_HOSTS_FILE,
2299
                    constants.RAPI_CERT_FILE,
2300
                    constants.RAPI_USERS_FILE,
2301
                    constants.CONFD_HMAC_KEY,
2302
                   ])
2303

    
2304
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2305
  for hv_name in enabled_hypervisors:
2306
    hv_class = hypervisor.GetHypervisor(hv_name)
2307
    dist_files.update(hv_class.GetAncillaryFiles())
2308

    
2309
  # 3. Perform the files upload
2310
  for fname in dist_files:
2311
    if os.path.exists(fname):
2312
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2313
      for to_node, to_result in result.items():
2314
        msg = to_result.fail_msg
2315
        if msg:
2316
          msg = ("Copy of file %s to node %s failed: %s" %
2317
                 (fname, to_node, msg))
2318
          lu.proc.LogWarning(msg)
2319

    
2320

    
2321
class LURedistributeConfig(NoHooksLU):
2322
  """Force the redistribution of cluster configuration.
2323

2324
  This is a very simple LU.
2325

2326
  """
2327
  _OP_REQP = []
2328
  REQ_BGL = False
2329

    
2330
  def ExpandNames(self):
2331
    self.needed_locks = {
2332
      locking.LEVEL_NODE: locking.ALL_SET,
2333
    }
2334
    self.share_locks[locking.LEVEL_NODE] = 1
2335

    
2336
  def CheckPrereq(self):
2337
    """Check prerequisites.
2338

2339
    """
2340

    
2341
  def Exec(self, feedback_fn):
2342
    """Redistribute the configuration.
2343

2344
    """
2345
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2346
    _RedistributeAncillaryFiles(self)
2347

    
2348

    
2349
def _WaitForSync(lu, instance, oneshot=False):
2350
  """Sleep and poll for an instance's disk to sync.
2351

2352
  """
2353
  if not instance.disks:
2354
    return True
2355

    
2356
  if not oneshot:
2357
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2358

    
2359
  node = instance.primary_node
2360

    
2361
  for dev in instance.disks:
2362
    lu.cfg.SetDiskID(dev, node)
2363

    
2364
  # TODO: Convert to utils.Retry
2365

    
2366
  retries = 0
2367
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2368
  while True:
2369
    max_time = 0
2370
    done = True
2371
    cumul_degraded = False
2372
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2373
    msg = rstats.fail_msg
2374
    if msg:
2375
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2376
      retries += 1
2377
      if retries >= 10:
2378
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2379
                                 " aborting." % node)
2380
      time.sleep(6)
2381
      continue
2382
    rstats = rstats.payload
2383
    retries = 0
2384
    for i, mstat in enumerate(rstats):
2385
      if mstat is None:
2386
        lu.LogWarning("Can't compute data for node %s/%s",
2387
                           node, instance.disks[i].iv_name)
2388
        continue
2389

    
2390
      cumul_degraded = (cumul_degraded or
2391
                        (mstat.is_degraded and mstat.sync_percent is None))
2392
      if mstat.sync_percent is not None:
2393
        done = False
2394
        if mstat.estimated_time is not None:
2395
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2396
          max_time = mstat.estimated_time
2397
        else:
2398
          rem_time = "no time estimate"
2399
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2400
                        (instance.disks[i].iv_name, mstat.sync_percent,
2401
                         rem_time))
2402

    
2403
    # if we're done but degraded, let's do a few small retries, to
2404
    # make sure we see a stable and not transient situation; therefore
2405
    # we force restart of the loop
2406
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2407
      logging.info("Degraded disks found, %d retries left", degr_retries)
2408
      degr_retries -= 1
2409
      time.sleep(1)
2410
      continue
2411

    
2412
    if done or oneshot:
2413
      break
2414

    
2415
    time.sleep(min(60, max_time))
2416

    
2417
  if done:
2418
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2419
  return not cumul_degraded
2420

    
2421

    
2422
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2423
  """Check that mirrors are not degraded.
2424

2425
  The ldisk parameter, if True, will change the test from the
2426
  is_degraded attribute (which represents overall non-ok status for
2427
  the device(s)) to the ldisk (representing the local storage status).
2428

2429
  """
2430
  lu.cfg.SetDiskID(dev, node)
2431

    
2432
  result = True
2433

    
2434
  if on_primary or dev.AssembleOnSecondary():
2435
    rstats = lu.rpc.call_blockdev_find(node, dev)
2436
    msg = rstats.fail_msg
2437
    if msg:
2438
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2439
      result = False
2440
    elif not rstats.payload:
2441
      lu.LogWarning("Can't find disk on node %s", node)
2442
      result = False
2443
    else:
2444
      if ldisk:
2445
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2446
      else:
2447
        result = result and not rstats.payload.is_degraded
2448

    
2449
  if dev.children:
2450
    for child in dev.children:
2451
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2452

    
2453
  return result
2454

    
2455

    
2456
class LUDiagnoseOS(NoHooksLU):
2457
  """Logical unit for OS diagnose/query.
2458

2459
  """
2460
  _OP_REQP = ["output_fields", "names"]
2461
  REQ_BGL = False
2462
  _FIELDS_STATIC = utils.FieldSet()
2463
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2464
  # Fields that need calculation of global os validity
2465
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2466

    
2467
  def ExpandNames(self):
2468
    if self.op.names:
2469
      raise errors.OpPrereqError("Selective OS query not supported",
2470
                                 errors.ECODE_INVAL)
2471

    
2472
    _CheckOutputFields(static=self._FIELDS_STATIC,
2473
                       dynamic=self._FIELDS_DYNAMIC,
2474
                       selected=self.op.output_fields)
2475

    
2476
    # Lock all nodes, in shared mode
2477
    # Temporary removal of locks, should be reverted later
2478
    # TODO: reintroduce locks when they are lighter-weight
2479
    self.needed_locks = {}
2480
    #self.share_locks[locking.LEVEL_NODE] = 1
2481
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2482

    
2483
  def CheckPrereq(self):
2484
    """Check prerequisites.
2485

2486
    """
2487

    
2488
  @staticmethod
2489
  def _DiagnoseByOS(rlist):
2490
    """Remaps a per-node return list into an a per-os per-node dictionary
2491

2492
    @param rlist: a map with node names as keys and OS objects as values
2493

2494
    @rtype: dict
2495
    @return: a dictionary with osnames as keys and as value another map, with
2496
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2497

2498
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2499
                                     (/srv/..., False, "invalid api")],
2500
                           "node2": [(/srv/..., True, "")]}
2501
          }
2502

2503
    """
2504
    all_os = {}
2505
    # we build here the list of nodes that didn't fail the RPC (at RPC
2506
    # level), so that nodes with a non-responding node daemon don't
2507
    # make all OSes invalid
2508
    good_nodes = [node_name for node_name in rlist
2509
                  if not rlist[node_name].fail_msg]
2510
    for node_name, nr in rlist.items():
2511
      if nr.fail_msg or not nr.payload:
2512
        continue
2513
      for name, path, status, diagnose, variants in nr.payload:
2514
        if name not in all_os:
2515
          # build a list of nodes for this os containing empty lists
2516
          # for each node in node_list
2517
          all_os[name] = {}
2518
          for nname in good_nodes:
2519
            all_os[name][nname] = []
2520
        all_os[name][node_name].append((path, status, diagnose, variants))
2521
    return all_os
2522

    
2523
  def Exec(self, feedback_fn):
2524
    """Compute the list of OSes.
2525

2526
    """
2527
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2528
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2529
    pol = self._DiagnoseByOS(node_data)
2530
    output = []
2531
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2532
    calc_variants = "variants" in self.op.output_fields
2533

    
2534
    for os_name, os_data in pol.items():
2535
      row = []
2536
      if calc_valid:
2537
        valid = True
2538
        variants = None
2539
        for osl in os_data.values():
2540
          valid = valid and osl and osl[0][1]
2541
          if not valid:
2542
            variants = None
2543
            break
2544
          if calc_variants:
2545
            node_variants = osl[0][3]
2546
            if variants is None:
2547
              variants = node_variants
2548
            else:
2549
              variants = [v for v in variants if v in node_variants]
2550

    
2551
      for field in self.op.output_fields:
2552
        if field == "name":
2553
          val = os_name
2554
        elif field == "valid":
2555
          val = valid
2556
        elif field == "node_status":
2557
          # this is just a copy of the dict
2558
          val = {}
2559
          for node_name, nos_list in os_data.items():
2560
            val[node_name] = nos_list
2561
        elif field == "variants":
2562
          val =  variants
2563
        else:
2564
          raise errors.ParameterError(field)
2565
        row.append(val)
2566
      output.append(row)
2567

    
2568
    return output
2569

    
2570

    
2571
class LURemoveNode(LogicalUnit):
2572
  """Logical unit for removing a node.
2573

2574
  """
2575
  HPATH = "node-remove"
2576
  HTYPE = constants.HTYPE_NODE
2577
  _OP_REQP = ["node_name"]
2578

    
2579
  def BuildHooksEnv(self):
2580
    """Build hooks env.
2581

2582
    This doesn't run on the target node in the pre phase as a failed
2583
    node would then be impossible to remove.
2584

2585
    """
2586
    env = {
2587
      "OP_TARGET": self.op.node_name,
2588
      "NODE_NAME": self.op.node_name,
2589
      }
2590
    all_nodes = self.cfg.GetNodeList()
2591
    try:
2592
      all_nodes.remove(self.op.node_name)
2593
    except ValueError:
2594
      logging.warning("Node %s which is about to be removed not found"
2595
                      " in the all nodes list", self.op.node_name)
2596
    return env, all_nodes, all_nodes
2597

    
2598
  def CheckPrereq(self):
2599
    """Check prerequisites.
2600

2601
    This checks:
2602
     - the node exists in the configuration
2603
     - it does not have primary or secondary instances
2604
     - it's not the master
2605

2606
    Any errors are signaled by raising errors.OpPrereqError.
2607

2608
    """
2609
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2610
    node = self.cfg.GetNodeInfo(self.op.node_name)
2611
    assert node is not None
2612

    
2613
    instance_list = self.cfg.GetInstanceList()
2614

    
2615
    masternode = self.cfg.GetMasterNode()
2616
    if node.name == masternode:
2617
      raise errors.OpPrereqError("Node is the master node,"
2618
                                 " you need to failover first.",
2619
                                 errors.ECODE_INVAL)
2620

    
2621
    for instance_name in instance_list:
2622
      instance = self.cfg.GetInstanceInfo(instance_name)
2623
      if node.name in instance.all_nodes:
2624
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2625
                                   " please remove first." % instance_name,
2626
                                   errors.ECODE_INVAL)
2627
    self.op.node_name = node.name
2628
    self.node = node
2629

    
2630
  def Exec(self, feedback_fn):
2631
    """Removes the node from the cluster.
2632

2633
    """
2634
    node = self.node
2635
    logging.info("Stopping the node daemon and removing configs from node %s",
2636
                 node.name)
2637

    
2638
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2639

    
2640
    # Promote nodes to master candidate as needed
2641
    _AdjustCandidatePool(self, exceptions=[node.name])
2642
    self.context.RemoveNode(node.name)
2643

    
2644
    # Run post hooks on the node before it's removed
2645
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2646
    try:
2647
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2648
    except:
2649
      # pylint: disable-msg=W0702
2650
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2651

    
2652
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2653
    msg = result.fail_msg
2654
    if msg:
2655
      self.LogWarning("Errors encountered on the remote node while leaving"
2656
                      " the cluster: %s", msg)
2657

    
2658

    
2659
class LUQueryNodes(NoHooksLU):
2660
  """Logical unit for querying nodes.
2661

2662
  """
2663
  # pylint: disable-msg=W0142
2664
  _OP_REQP = ["output_fields", "names", "use_locking"]
2665
  REQ_BGL = False
2666

    
2667
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2668
                    "master_candidate", "offline", "drained"]
2669

    
2670
  _FIELDS_DYNAMIC = utils.FieldSet(
2671
    "dtotal", "dfree",
2672
    "mtotal", "mnode", "mfree",
2673
    "bootid",
2674
    "ctotal", "cnodes", "csockets",
2675
    )
2676

    
2677
  _FIELDS_STATIC = utils.FieldSet(*[
2678
    "pinst_cnt", "sinst_cnt",
2679
    "pinst_list", "sinst_list",
2680
    "pip", "sip", "tags",
2681
    "master",
2682
    "role"] + _SIMPLE_FIELDS
2683
    )
2684

    
2685
  def ExpandNames(self):
2686
    _CheckOutputFields(static=self._FIELDS_STATIC,
2687
                       dynamic=self._FIELDS_DYNAMIC,
2688
                       selected=self.op.output_fields)
2689

    
2690
    self.needed_locks = {}
2691
    self.share_locks[locking.LEVEL_NODE] = 1
2692

    
2693
    if self.op.names:
2694
      self.wanted = _GetWantedNodes(self, self.op.names)
2695
    else:
2696
      self.wanted = locking.ALL_SET
2697

    
2698
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2699
    self.do_locking = self.do_node_query and self.op.use_locking
2700
    if self.do_locking:
2701
      # if we don't request only static fields, we need to lock the nodes
2702
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2703

    
2704
  def CheckPrereq(self):
2705
    """Check prerequisites.
2706

2707
    """
2708
    # The validation of the node list is done in the _GetWantedNodes,
2709
    # if non empty, and if empty, there's no validation to do
2710
    pass
2711

    
2712
  def Exec(self, feedback_fn):
2713
    """Computes the list of nodes and their attributes.
2714

2715
    """
2716
    all_info = self.cfg.GetAllNodesInfo()
2717
    if self.do_locking:
2718
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2719
    elif self.wanted != locking.ALL_SET:
2720
      nodenames = self.wanted
2721
      missing = set(nodenames).difference(all_info.keys())
2722
      if missing:
2723
        raise errors.OpExecError(
2724
          "Some nodes were removed before retrieving their data: %s" % missing)
2725
    else:
2726
      nodenames = all_info.keys()
2727

    
2728
    nodenames = utils.NiceSort(nodenames)
2729
    nodelist = [all_info[name] for name in nodenames]
2730

    
2731
    # begin data gathering
2732

    
2733
    if self.do_node_query:
2734
      live_data = {}
2735
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2736
                                          self.cfg.GetHypervisorType())
2737
      for name in nodenames:
2738
        nodeinfo = node_data[name]
2739
        if not nodeinfo.fail_msg and nodeinfo.payload:
2740
          nodeinfo = nodeinfo.payload
2741
          fn = utils.TryConvert
2742
          live_data[name] = {
2743
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2744
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2745
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2746
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2747
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2748
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2749
            "bootid": nodeinfo.get('bootid', None),
2750
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2751
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2752
            }
2753
        else:
2754
          live_data[name] = {}
2755
    else:
2756
      live_data = dict.fromkeys(nodenames, {})
2757

    
2758
    node_to_primary = dict([(name, set()) for name in nodenames])
2759
    node_to_secondary = dict([(name, set()) for name in nodenames])
2760

    
2761
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2762
                             "sinst_cnt", "sinst_list"))
2763
    if inst_fields & frozenset(self.op.output_fields):
2764
      inst_data = self.cfg.GetAllInstancesInfo()
2765

    
2766
      for inst in inst_data.values():
2767
        if inst.primary_node in node_to_primary:
2768
          node_to_primary[inst.primary_node].add(inst.name)
2769
        for secnode in inst.secondary_nodes:
2770
          if secnode in node_to_secondary:
2771
            node_to_secondary[secnode].add(inst.name)
2772

    
2773
    master_node = self.cfg.GetMasterNode()
2774

    
2775
    # end data gathering
2776

    
2777
    output = []
2778
    for node in nodelist:
2779
      node_output = []
2780
      for field in self.op.output_fields:
2781
        if field in self._SIMPLE_FIELDS:
2782
          val = getattr(node, field)
2783
        elif field == "pinst_list":
2784
          val = list(node_to_primary[node.name])
2785
        elif field == "sinst_list":
2786
          val = list(node_to_secondary[node.name])
2787
        elif field == "pinst_cnt":
2788
          val = len(node_to_primary[node.name])
2789
        elif field == "sinst_cnt":
2790
          val = len(node_to_secondary[node.name])
2791
        elif field == "pip":
2792
          val = node.primary_ip
2793
        elif field == "sip":
2794
          val = node.secondary_ip
2795
        elif field == "tags":
2796
          val = list(node.GetTags())
2797
        elif field == "master":
2798
          val = node.name == master_node
2799
        elif self._FIELDS_DYNAMIC.Matches(field):
2800
          val = live_data[node.name].get(field, None)
2801
        elif field == "role":
2802
          if node.name == master_node:
2803
            val = "M"
2804
          elif node.master_candidate:
2805
            val = "C"
2806
          elif node.drained:
2807
            val = "D"
2808
          elif node.offline:
2809
            val = "O"
2810
          else:
2811
            val = "R"
2812
        else:
2813
          raise errors.ParameterError(field)
2814
        node_output.append(val)
2815
      output.append(node_output)
2816

    
2817
    return output
2818

    
2819

    
2820
class LUQueryNodeVolumes(NoHooksLU):
2821
  """Logical unit for getting volumes on node(s).
2822

2823
  """
2824
  _OP_REQP = ["nodes", "output_fields"]
2825
  REQ_BGL = False
2826
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2827
  _FIELDS_STATIC = utils.FieldSet("node")
2828

    
2829
  def ExpandNames(self):
2830
    _CheckOutputFields(static=self._FIELDS_STATIC,
2831
                       dynamic=self._FIELDS_DYNAMIC,
2832
                       selected=self.op.output_fields)
2833

    
2834
    self.needed_locks = {}
2835
    self.share_locks[locking.LEVEL_NODE] = 1
2836
    if not self.op.nodes:
2837
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2838
    else:
2839
      self.needed_locks[locking.LEVEL_NODE] = \
2840
        _GetWantedNodes(self, self.op.nodes)
2841

    
2842
  def CheckPrereq(self):
2843
    """Check prerequisites.
2844

2845
    This checks that the fields required are valid output fields.
2846

2847
    """
2848
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2849

    
2850
  def Exec(self, feedback_fn):
2851
    """Computes the list of nodes and their attributes.
2852

2853
    """
2854
    nodenames = self.nodes
2855
    volumes = self.rpc.call_node_volumes(nodenames)
2856

    
2857
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2858
             in self.cfg.GetInstanceList()]
2859

    
2860
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2861

    
2862
    output = []
2863
    for node in nodenames:
2864
      nresult = volumes[node]
2865
      if nresult.offline:
2866
        continue
2867
      msg = nresult.fail_msg
2868
      if msg:
2869
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2870
        continue
2871

    
2872
      node_vols = nresult.payload[:]
2873
      node_vols.sort(key=lambda vol: vol['dev'])
2874

    
2875
      for vol in node_vols:
2876
        node_output = []
2877
        for field in self.op.output_fields:
2878
          if field == "node":
2879
            val = node
2880
          elif field == "phys":
2881
            val = vol['dev']
2882
          elif field == "vg":
2883
            val = vol['vg']
2884
          elif field == "name":
2885
            val = vol['name']
2886
          elif field == "size":
2887
            val = int(float(vol['size']))
2888
          elif field == "instance":
2889
            for inst in ilist:
2890
              if node not in lv_by_node[inst]:
2891
                continue
2892
              if vol['name'] in lv_by_node[inst][node]:
2893
                val = inst.name
2894
                break
2895
            else:
2896
              val = '-'
2897
          else:
2898
            raise errors.ParameterError(field)
2899
          node_output.append(str(val))
2900

    
2901
        output.append(node_output)
2902

    
2903
    return output
2904

    
2905

    
2906
class LUQueryNodeStorage(NoHooksLU):
2907
  """Logical unit for getting information on storage units on node(s).
2908

2909
  """
2910
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2911
  REQ_BGL = False
2912
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2913

    
2914
  def ExpandNames(self):
2915
    storage_type = self.op.storage_type
2916

    
2917
    if storage_type not in constants.VALID_STORAGE_TYPES:
2918
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2919
                                 errors.ECODE_INVAL)
2920

    
2921
    _CheckOutputFields(static=self._FIELDS_STATIC,
2922
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2923
                       selected=self.op.output_fields)
2924

    
2925
    self.needed_locks = {}
2926
    self.share_locks[locking.LEVEL_NODE] = 1
2927

    
2928
    if self.op.nodes:
2929
      self.needed_locks[locking.LEVEL_NODE] = \
2930
        _GetWantedNodes(self, self.op.nodes)
2931
    else:
2932
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2933

    
2934
  def CheckPrereq(self):
2935
    """Check prerequisites.
2936

2937
    This checks that the fields required are valid output fields.
2938

2939
    """
2940
    self.op.name = getattr(self.op, "name", None)
2941

    
2942
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2943

    
2944
  def Exec(self, feedback_fn):
2945
    """Computes the list of nodes and their attributes.
2946

2947
    """
2948
    # Always get name to sort by
2949
    if constants.SF_NAME in self.op.output_fields:
2950
      fields = self.op.output_fields[:]
2951
    else:
2952
      fields = [constants.SF_NAME] + self.op.output_fields
2953

    
2954
    # Never ask for node or type as it's only known to the LU
2955
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2956
      while extra in fields:
2957
        fields.remove(extra)
2958

    
2959
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2960
    name_idx = field_idx[constants.SF_NAME]
2961

    
2962
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2963
    data = self.rpc.call_storage_list(self.nodes,
2964
                                      self.op.storage_type, st_args,
2965
                                      self.op.name, fields)
2966

    
2967
    result = []
2968

    
2969
    for node in utils.NiceSort(self.nodes):
2970
      nresult = data[node]
2971
      if nresult.offline:
2972
        continue
2973

    
2974
      msg = nresult.fail_msg
2975
      if msg:
2976
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2977
        continue
2978

    
2979
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2980

    
2981
      for name in utils.NiceSort(rows.keys()):
2982
        row = rows[name]
2983

    
2984
        out = []
2985

    
2986
        for field in self.op.output_fields:
2987
          if field == constants.SF_NODE:
2988
            val = node
2989
          elif field == constants.SF_TYPE:
2990
            val = self.op.storage_type
2991
          elif field in field_idx:
2992
            val = row[field_idx[field]]
2993
          else:
2994
            raise errors.ParameterError(field)
2995

    
2996
          out.append(val)
2997

    
2998
        result.append(out)
2999

    
3000
    return result
3001

    
3002

    
3003
class LUModifyNodeStorage(NoHooksLU):
3004
  """Logical unit for modifying a storage volume on a node.
3005

3006
  """
3007
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3008
  REQ_BGL = False
3009

    
3010
  def CheckArguments(self):
3011
    self.opnode_name = _ExpandNodeName(self.cfg, self.op.node_name)
3012

    
3013
    storage_type = self.op.storage_type
3014
    if storage_type not in constants.VALID_STORAGE_TYPES:
3015
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
3016
                                 errors.ECODE_INVAL)
3017

    
3018
  def ExpandNames(self):
3019
    self.needed_locks = {
3020
      locking.LEVEL_NODE: self.op.node_name,
3021
      }
3022

    
3023
  def CheckPrereq(self):
3024
    """Check prerequisites.
3025

3026
    """
3027
    storage_type = self.op.storage_type
3028

    
3029
    try:
3030
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3031
    except KeyError:
3032
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
3033
                                 " modified" % storage_type,
3034
                                 errors.ECODE_INVAL)
3035

    
3036
    diff = set(self.op.changes.keys()) - modifiable
3037
    if diff:
3038
      raise errors.OpPrereqError("The following fields can not be modified for"
3039
                                 " storage units of type '%s': %r" %
3040
                                 (storage_type, list(diff)),
3041
                                 errors.ECODE_INVAL)
3042

    
3043
  def Exec(self, feedback_fn):
3044
    """Computes the list of nodes and their attributes.
3045

3046
    """
3047
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3048
    result = self.rpc.call_storage_modify(self.op.node_name,
3049
                                          self.op.storage_type, st_args,
3050
                                          self.op.name, self.op.changes)
3051
    result.Raise("Failed to modify storage unit '%s' on %s" %
3052
                 (self.op.name, self.op.node_name))
3053

    
3054

    
3055
class LUAddNode(LogicalUnit):
3056
  """Logical unit for adding node to the cluster.
3057

3058
  """
3059
  HPATH = "node-add"
3060
  HTYPE = constants.HTYPE_NODE
3061
  _OP_REQP = ["node_name"]
3062

    
3063
  def CheckArguments(self):
3064
    # validate/normalize the node name
3065
    self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3066

    
3067
  def BuildHooksEnv(self):
3068
    """Build hooks env.
3069

3070
    This will run on all nodes before, and on all nodes + the new node after.
3071

3072
    """
3073
    env = {
3074
      "OP_TARGET": self.op.node_name,
3075
      "NODE_NAME": self.op.node_name,
3076
      "NODE_PIP": self.op.primary_ip,
3077
      "NODE_SIP": self.op.secondary_ip,
3078
      }
3079
    nodes_0 = self.cfg.GetNodeList()
3080
    nodes_1 = nodes_0 + [self.op.node_name, ]
3081
    return env, nodes_0, nodes_1
3082

    
3083
  def CheckPrereq(self):
3084
    """Check prerequisites.
3085

3086
    This checks:
3087
     - the new node is not already in the config
3088
     - it is resolvable
3089
     - its parameters (single/dual homed) matches the cluster
3090

3091
    Any errors are signaled by raising errors.OpPrereqError.
3092

3093
    """
3094
    node_name = self.op.node_name
3095
    cfg = self.cfg
3096

    
3097
    dns_data = utils.GetHostInfo(node_name)
3098

    
3099
    node = dns_data.name
3100
    primary_ip = self.op.primary_ip = dns_data.ip
3101
    secondary_ip = getattr(self.op, "secondary_ip", None)
3102
    if secondary_ip is None:
3103
      secondary_ip = primary_ip
3104
    if not utils.IsValidIP(secondary_ip):
3105
      raise errors.OpPrereqError("Invalid secondary IP given",
3106
                                 errors.ECODE_INVAL)
3107
    self.op.secondary_ip = secondary_ip
3108

    
3109
    node_list = cfg.GetNodeList()
3110
    if not self.op.readd and node in node_list:
3111
      raise errors.OpPrereqError("Node %s is already in the configuration" %
3112
                                 node, errors.ECODE_EXISTS)
3113
    elif self.op.readd and node not in node_list:
3114
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3115
                                 errors.ECODE_NOENT)
3116

    
3117
    for existing_node_name in node_list:
3118
      existing_node = cfg.GetNodeInfo(existing_node_name)
3119

    
3120
      if self.op.readd and node == existing_node_name:
3121
        if (existing_node.primary_ip != primary_ip or
3122
            existing_node.secondary_ip != secondary_ip):
3123
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
3124
                                     " address configuration as before",
3125
                                     errors.ECODE_INVAL)
3126
        continue
3127

    
3128
      if (existing_node.primary_ip == primary_ip or
3129
          existing_node.secondary_ip == primary_ip or
3130
          existing_node.primary_ip == secondary_ip or
3131
          existing_node.secondary_ip == secondary_ip):
3132
        raise errors.OpPrereqError("New node ip address(es) conflict with"
3133
                                   " existing node %s" % existing_node.name,
3134
                                   errors.ECODE_NOTUNIQUE)
3135

    
3136
    # check that the type of the node (single versus dual homed) is the
3137
    # same as for the master
3138
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3139
    master_singlehomed = myself.secondary_ip == myself.primary_ip
3140
    newbie_singlehomed = secondary_ip == primary_ip
3141
    if master_singlehomed != newbie_singlehomed:
3142
      if master_singlehomed:
3143
        raise errors.OpPrereqError("The master has no private ip but the"
3144
                                   " new node has one",
3145
                                   errors.ECODE_INVAL)
3146
      else:
3147
        raise errors.OpPrereqError("The master has a private ip but the"
3148
                                   " new node doesn't have one",
3149
                                   errors.ECODE_INVAL)
3150

    
3151
    # checks reachability
3152
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3153
      raise errors.OpPrereqError("Node not reachable by ping",
3154
                                 errors.ECODE_ENVIRON)
3155

    
3156
    if not newbie_singlehomed:
3157
      # check reachability from my secondary ip to newbie's secondary ip
3158
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3159
                           source=myself.secondary_ip):
3160
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3161
                                   " based ping to noded port",
3162
                                   errors.ECODE_ENVIRON)
3163

    
3164
    if self.op.readd:
3165
      exceptions = [node]
3166
    else:
3167
      exceptions = []
3168

    
3169
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3170

    
3171
    if self.op.readd:
3172
      self.new_node = self.cfg.GetNodeInfo(node)
3173
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3174
    else:
3175
      self.new_node = objects.Node(name=node,
3176
                                   primary_ip=primary_ip,
3177
                                   secondary_ip=secondary_ip,
3178
                                   master_candidate=self.master_candidate,
3179
                                   offline=False, drained=False)
3180

    
3181
  def Exec(self, feedback_fn):
3182
    """Adds the new node to the cluster.
3183

3184
    """
3185
    new_node = self.new_node
3186
    node = new_node.name
3187

    
3188
    # for re-adds, reset the offline/drained/master-candidate flags;
3189
    # we need to reset here, otherwise offline would prevent RPC calls
3190
    # later in the procedure; this also means that if the re-add
3191
    # fails, we are left with a non-offlined, broken node
3192
    if self.op.readd:
3193
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3194
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3195
      # if we demote the node, we do cleanup later in the procedure
3196
      new_node.master_candidate = self.master_candidate
3197

    
3198
    # notify the user about any possible mc promotion
3199
    if new_node.master_candidate:
3200
      self.LogInfo("Node will be a master candidate")
3201

    
3202
    # check connectivity
3203
    result = self.rpc.call_version([node])[node]
3204
    result.Raise("Can't get version information from node %s" % node)
3205
    if constants.PROTOCOL_VERSION == result.payload:
3206
      logging.info("Communication to node %s fine, sw version %s match",
3207
                   node, result.payload)
3208
    else:
3209
      raise errors.OpExecError("Version mismatch master version %s,"
3210
                               " node version %s" %
3211
                               (constants.PROTOCOL_VERSION, result.payload))
3212

    
3213
    # setup ssh on node
3214
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3215
      logging.info("Copy ssh key to node %s", node)
3216
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3217
      keyarray = []
3218
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3219
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3220
                  priv_key, pub_key]
3221

    
3222
      for i in keyfiles:
3223
        keyarray.append(utils.ReadFile(i))
3224

    
3225
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3226
                                      keyarray[2], keyarray[3], keyarray[4],
3227
                                      keyarray[5])
3228
      result.Raise("Cannot transfer ssh keys to the new node")
3229

    
3230
    # Add node to our /etc/hosts, and add key to known_hosts
3231
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3232
      utils.AddHostToEtcHosts(new_node.name)
3233

    
3234
    if new_node.secondary_ip != new_node.primary_ip:
3235
      result = self.rpc.call_node_has_ip_address(new_node.name,
3236
                                                 new_node.secondary_ip)
3237
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3238
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3239
      if not result.payload:
3240
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3241
                                 " you gave (%s). Please fix and re-run this"
3242
                                 " command." % new_node.secondary_ip)
3243

    
3244
    node_verify_list = [self.cfg.GetMasterNode()]
3245
    node_verify_param = {
3246
      constants.NV_NODELIST: [node],
3247
      # TODO: do a node-net-test as well?
3248
    }
3249

    
3250
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3251
                                       self.cfg.GetClusterName())
3252
    for verifier in node_verify_list:
3253
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3254
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3255
      if nl_payload:
3256
        for failed in nl_payload:
3257
          feedback_fn("ssh/hostname verification failed"
3258
                      " (checking from %s): %s" %
3259
                      (verifier, nl_payload[failed]))
3260
        raise errors.OpExecError("ssh/hostname verification failed.")
3261

    
3262
    if self.op.readd:
3263
      _RedistributeAncillaryFiles(self)
3264
      self.context.ReaddNode(new_node)
3265
      # make sure we redistribute the config
3266
      self.cfg.Update(new_node, feedback_fn)
3267
      # and make sure the new node will not have old files around
3268
      if not new_node.master_candidate:
3269
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3270
        msg = result.fail_msg
3271
        if msg:
3272
          self.LogWarning("Node failed to demote itself from master"
3273
                          " candidate status: %s" % msg)
3274
    else:
3275
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3276
      self.context.AddNode(new_node, self.proc.GetECId())
3277

    
3278

    
3279
class LUSetNodeParams(LogicalUnit):
3280
  """Modifies the parameters of a node.
3281

3282
  """
3283
  HPATH = "node-modify"
3284
  HTYPE = constants.HTYPE_NODE
3285
  _OP_REQP = ["node_name"]
3286
  REQ_BGL = False
3287

    
3288
  def CheckArguments(self):
3289
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3290
    _CheckBooleanOpField(self.op, 'master_candidate')
3291
    _CheckBooleanOpField(self.op, 'offline')
3292
    _CheckBooleanOpField(self.op, 'drained')
3293
    _CheckBooleanOpField(self.op, 'auto_promote')
3294
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3295
    if all_mods.count(None) == 3:
3296
      raise errors.OpPrereqError("Please pass at least one modification",
3297
                                 errors.ECODE_INVAL)
3298
    if all_mods.count(True) > 1:
3299
      raise errors.OpPrereqError("Can't set the node into more than one"
3300
                                 " state at the same time",
3301
                                 errors.ECODE_INVAL)
3302

    
3303
    # Boolean value that tells us whether we're offlining or draining the node
3304
    self.offline_or_drain = (self.op.offline == True or
3305
                             self.op.drained == True)
3306
    self.deoffline_or_drain = (self.op.offline == False or
3307
                               self.op.drained == False)
3308
    self.might_demote = (self.op.master_candidate == False or
3309
                         self.offline_or_drain)
3310

    
3311
    self.lock_all = self.op.auto_promote and self.might_demote
3312

    
3313

    
3314
  def ExpandNames(self):
3315
    if self.lock_all:
3316
      self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3317
    else:
3318
      self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3319

    
3320
  def BuildHooksEnv(self):
3321
    """Build hooks env.
3322

3323
    This runs on the master node.
3324

3325
    """
3326
    env = {
3327
      "OP_TARGET": self.op.node_name,
3328
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3329
      "OFFLINE": str(self.op.offline),
3330
      "DRAINED": str(self.op.drained),
3331
      }
3332
    nl = [self.cfg.GetMasterNode(),
3333
          self.op.node_name]
3334
    return env, nl, nl
3335

    
3336
  def CheckPrereq(self):
3337
    """Check prerequisites.
3338

3339
    This only checks the instance list against the existing names.
3340

3341
    """
3342
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3343

    
3344
    if (self.op.master_candidate is not None or
3345
        self.op.drained is not None or
3346
        self.op.offline is not None):
3347
      # we can't change the master's node flags
3348
      if self.op.node_name == self.cfg.GetMasterNode():
3349
        raise errors.OpPrereqError("The master role can be changed"
3350
                                   " only via masterfailover",
3351
                                   errors.ECODE_INVAL)
3352

    
3353

    
3354
    if node.master_candidate and self.might_demote and not self.lock_all:
3355
      assert not self.op.auto_promote, "auto-promote set but lock_all not"
3356
      # check if after removing the current node, we're missing master
3357
      # candidates
3358
      (mc_remaining, mc_should, _) = \
3359
          self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3360
      if mc_remaining < mc_should:
3361
        raise errors.OpPrereqError("Not enough master candidates, please"
3362
                                   " pass auto_promote to allow promotion",
3363
                                   errors.ECODE_INVAL)
3364

    
3365
    if (self.op.master_candidate == True and
3366
        ((node.offline and not self.op.offline == False) or
3367
         (node.drained and not self.op.drained == False))):
3368
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3369
                                 " to master_candidate" % node.name,
3370
                                 errors.ECODE_INVAL)
3371

    
3372
    # If we're being deofflined/drained, we'll MC ourself if needed
3373
    if (self.deoffline_or_drain and not self.offline_or_drain and not
3374
        self.op.master_candidate == True and not node.master_candidate):
3375
      self.op.master_candidate = _DecideSelfPromotion(self)
3376
      if self.op.master_candidate:
3377
        self.LogInfo("Autopromoting node to master candidate")
3378

    
3379
    return
3380

    
3381
  def Exec(self, feedback_fn):
3382
    """Modifies a node.
3383

3384
    """
3385
    node = self.node
3386

    
3387
    result = []
3388
    changed_mc = False
3389

    
3390
    if self.op.offline is not None:
3391
      node.offline = self.op.offline
3392
      result.append(("offline", str(self.op.offline)))
3393
      if self.op.offline == True:
3394
        if node.master_candidate:
3395
          node.master_candidate = False
3396
          changed_mc = True
3397
          result.append(("master_candidate", "auto-demotion due to offline"))
3398
        if node.drained:
3399
          node.drained = False
3400
          result.append(("drained", "clear drained status due to offline"))
3401

    
3402
    if self.op.master_candidate is not None:
3403
      node.master_candidate = self.op.master_candidate
3404
      changed_mc = True
3405
      result.append(("master_candidate", str(self.op.master_candidate)))
3406
      if self.op.master_candidate == False:
3407
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3408
        msg = rrc.fail_msg
3409
        if msg:
3410
          self.LogWarning("Node failed to demote itself: %s" % msg)
3411

    
3412
    if self.op.drained is not None:
3413
      node.drained = self.op.drained
3414
      result.append(("drained", str(self.op.drained)))
3415
      if self.op.drained == True:
3416
        if node.master_candidate:
3417
          node.master_candidate = False
3418
          changed_mc = True
3419
          result.append(("master_candidate", "auto-demotion due to drain"))
3420
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3421
          msg = rrc.fail_msg
3422
          if msg:
3423
            self.LogWarning("Node failed to demote itself: %s" % msg)
3424
        if node.offline:
3425
          node.offline = False
3426
          result.append(("offline", "clear offline status due to drain"))
3427

    
3428
    # we locked all nodes, we adjust the CP before updating this node
3429
    if self.lock_all:
3430
      _AdjustCandidatePool(self, [node.name])
3431

    
3432
    # this will trigger configuration file update, if needed
3433
    self.cfg.Update(node, feedback_fn)
3434

    
3435
    # this will trigger job queue propagation or cleanup
3436
    if changed_mc:
3437
      self.context.ReaddNode(node)
3438

    
3439
    return result
3440

    
3441

    
3442
class LUPowercycleNode(NoHooksLU):
3443
  """Powercycles a node.
3444

3445
  """
3446
  _OP_REQP = ["node_name", "force"]
3447
  REQ_BGL = False
3448

    
3449
  def CheckArguments(self):
3450
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3451
    if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3452
      raise errors.OpPrereqError("The node is the master and the force"
3453
                                 " parameter was not set",
3454
                                 errors.ECODE_INVAL)
3455

    
3456
  def ExpandNames(self):
3457
    """Locking for PowercycleNode.
3458

3459
    This is a last-resort option and shouldn't block on other
3460
    jobs. Therefore, we grab no locks.
3461

3462
    """
3463
    self.needed_locks = {}
3464

    
3465
  def CheckPrereq(self):
3466
    """Check prerequisites.
3467

3468
    This LU has no prereqs.
3469

3470
    """
3471
    pass
3472

    
3473
  def Exec(self, feedback_fn):
3474
    """Reboots a node.
3475

3476
    """
3477
    result = self.rpc.call_node_powercycle(self.op.node_name,
3478
                                           self.cfg.GetHypervisorType())
3479
    result.Raise("Failed to schedule the reboot")
3480
    return result.payload
3481

    
3482

    
3483
class LUQueryClusterInfo(NoHooksLU):
3484
  """Query cluster configuration.
3485

3486
  """
3487
  _OP_REQP = []
3488
  REQ_BGL = False
3489

    
3490
  def ExpandNames(self):
3491
    self.needed_locks = {}
3492

    
3493
  def CheckPrereq(self):
3494
    """No prerequsites needed for this LU.
3495

3496
    """
3497
    pass
3498

    
3499
  def Exec(self, feedback_fn):
3500
    """Return cluster config.
3501

3502
    """
3503
    cluster = self.cfg.GetClusterInfo()
3504
    os_hvp = {}
3505

    
3506
    # Filter just for enabled hypervisors
3507
    for os_name, hv_dict in cluster.os_hvp.items():
3508
      os_hvp[os_name] = {}
3509
      for hv_name, hv_params in hv_dict.items():
3510
        if hv_name in cluster.enabled_hypervisors:
3511
          os_hvp[os_name][hv_name] = hv_params
3512

    
3513
    result = {
3514
      "software_version": constants.RELEASE_VERSION,
3515
      "protocol_version": constants.PROTOCOL_VERSION,
3516
      "config_version": constants.CONFIG_VERSION,
3517
      "os_api_version": max(constants.OS_API_VERSIONS),
3518
      "export_version": constants.EXPORT_VERSION,
3519
      "architecture": (platform.architecture()[0], platform.machine()),
3520
      "name": cluster.cluster_name,
3521
      "master": cluster.master_node,
3522
      "default_hypervisor": cluster.enabled_hypervisors[0],
3523
      "enabled_hypervisors": cluster.enabled_hypervisors,
3524
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3525
                        for hypervisor_name in cluster.enabled_hypervisors]),
3526
      "os_hvp": os_hvp,
3527
      "beparams": cluster.beparams,
3528
      "nicparams": cluster.nicparams,
3529
      "candidate_pool_size": cluster.candidate_pool_size,
3530
      "master_netdev": cluster.master_netdev,
3531
      "volume_group_name": cluster.volume_group_name,
3532
      "file_storage_dir": cluster.file_storage_dir,
3533
      "ctime": cluster.ctime,
3534
      "mtime": cluster.mtime,
3535
      "uuid": cluster.uuid,
3536
      "tags": list(cluster.GetTags()),
3537
      }
3538

    
3539
    return result
3540

    
3541

    
3542
class LUQueryConfigValues(NoHooksLU):
3543
  """Return configuration values.
3544

3545
  """
3546
  _OP_REQP = []
3547
  REQ_BGL = False
3548
  _FIELDS_DYNAMIC = utils.FieldSet()
3549
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3550
                                  "watcher_pause")
3551

    
3552
  def ExpandNames(self):
3553
    self.needed_locks = {}
3554

    
3555
    _CheckOutputFields(static=self._FIELDS_STATIC,
3556
                       dynamic=self._FIELDS_DYNAMIC,
3557
                       selected=self.op.output_fields)
3558

    
3559
  def CheckPrereq(self):
3560
    """No prerequisites.
3561

3562
    """
3563
    pass
3564

    
3565
  def Exec(self, feedback_fn):
3566
    """Dump a representation of the cluster config to the standard output.
3567

3568
    """
3569
    values = []
3570
    for field in self.op.output_fields:
3571
      if field == "cluster_name":
3572
        entry = self.cfg.GetClusterName()
3573
      elif field == "master_node":
3574
        entry = self.cfg.GetMasterNode()
3575
      elif field == "drain_flag":
3576
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3577
      elif field == "watcher_pause":
3578
        entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3579
      else:
3580
        raise errors.ParameterError(field)
3581
      values.append(entry)
3582
    return values
3583

    
3584

    
3585
class LUActivateInstanceDisks(NoHooksLU):
3586
  """Bring up an instance's disks.
3587

3588
  """
3589
  _OP_REQP = ["instance_name"]
3590
  REQ_BGL = False
3591

    
3592
  def ExpandNames(self):
3593
    self._ExpandAndLockInstance()
3594
    self.needed_locks[locking.LEVEL_NODE] = []
3595
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3596

    
3597
  def DeclareLocks(self, level):
3598
    if level == locking.LEVEL_NODE:
3599
      self._LockInstancesNodes()
3600

    
3601
  def CheckPrereq(self):
3602
    """Check prerequisites.
3603

3604
    This checks that the instance is in the cluster.
3605

3606
    """
3607
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3608
    assert self.instance is not None, \
3609
      "Cannot retrieve locked instance %s" % self.op.instance_name
3610
    _CheckNodeOnline(self, self.instance.primary_node)
3611
    if not hasattr(self.op, "ignore_size"):
3612
      self.op.ignore_size = False
3613

    
3614
  def Exec(self, feedback_fn):
3615
    """Activate the disks.
3616

3617
    """
3618
    disks_ok, disks_info = \
3619
              _AssembleInstanceDisks(self, self.instance,
3620
                                     ignore_size=self.op.ignore_size)
3621
    if not disks_ok:
3622
      raise errors.OpExecError("Cannot activate block devices")
3623

    
3624
    return disks_info
3625

    
3626

    
3627
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3628
                           ignore_size=False):
3629
  """Prepare the block devices for an instance.
3630

3631
  This sets up the block devices on all nodes.
3632

3633
  @type lu: L{LogicalUnit}
3634
  @param lu: the logical unit on whose behalf we execute
3635
  @type instance: L{objects.Instance}
3636
  @param instance: the instance for whose disks we assemble
3637
  @type ignore_secondaries: boolean
3638
  @param ignore_secondaries: if true, errors on secondary nodes
3639
      won't result in an error return from the function
3640
  @type ignore_size: boolean
3641
  @param ignore_size: if true, the current known size of the disk
3642
      will not be used during the disk activation, useful for cases
3643
      when the size is wrong
3644
  @return: False if the operation failed, otherwise a list of
3645
      (host, instance_visible_name, node_visible_name)
3646
      with the mapping from node devices to instance devices
3647

3648
  """
3649
  device_info = []
3650
  disks_ok = True
3651
  iname = instance.name
3652
  # With the two passes mechanism we try to reduce the window of
3653
  # opportunity for the race condition of switching DRBD to primary
3654
  # before handshaking occured, but we do not eliminate it
3655

    
3656
  # The proper fix would be to wait (with some limits) until the
3657
  # connection has been made and drbd transitions from WFConnection
3658
  # into any other network-connected state (Connected, SyncTarget,
3659
  # SyncSource, etc.)
3660

    
3661
  # 1st pass, assemble on all nodes in secondary mode
3662
  for inst_disk in instance.disks:
3663
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3664
      if ignore_size:
3665
        node_disk = node_disk.Copy()
3666
        node_disk.UnsetSize()
3667
      lu.cfg.SetDiskID(node_disk, node)
3668
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3669
      msg = result.fail_msg
3670
      if msg:
3671
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3672
                           " (is_primary=False, pass=1): %s",
3673
                           inst_disk.iv_name, node, msg)
3674
        if not ignore_secondaries:
3675
          disks_ok = False
3676

    
3677
  # FIXME: race condition on drbd migration to primary
3678

    
3679
  # 2nd pass, do only the primary node
3680
  for inst_disk in instance.disks:
3681
    dev_path = None
3682

    
3683
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3684
      if node != instance.primary_node:
3685
        continue
3686
      if ignore_size:
3687
        node_disk = node_disk.Copy()
3688
        node_disk.UnsetSize()
3689
      lu.cfg.SetDiskID(node_disk, node)
3690
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3691
      msg = result.fail_msg
3692
      if msg:
3693
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3694
                           " (is_primary=True, pass=2): %s",
3695
                           inst_disk.iv_name, node, msg)
3696
        disks_ok = False
3697
      else:
3698
        dev_path = result.payload
3699

    
3700
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3701

    
3702
  # leave the disks configured for the primary node
3703
  # this is a workaround that would be fixed better by
3704
  # improving the logical/physical id handling
3705
  for disk in instance.disks:
3706
    lu.cfg.SetDiskID(disk, instance.primary_node)
3707

    
3708
  return disks_ok, device_info
3709

    
3710

    
3711
def _StartInstanceDisks(lu, instance, force):
3712
  """Start the disks of an instance.
3713

3714
  """
3715
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3716
                                           ignore_secondaries=force)
3717
  if not disks_ok:
3718
    _ShutdownInstanceDisks(lu, instance)
3719
    if force is not None and not force:
3720
      lu.proc.LogWarning("", hint="If the message above refers to a"
3721
                         " secondary node,"
3722
                         " you can retry the operation using '--force'.")
3723
    raise errors.OpExecError("Disk consistency error")
3724

    
3725

    
3726
class LUDeactivateInstanceDisks(NoHooksLU):
3727
  """Shutdown an instance's disks.
3728

3729
  """
3730
  _OP_REQP = ["instance_name"]
3731
  REQ_BGL = False
3732

    
3733
  def ExpandNames(self):
3734
    self._ExpandAndLockInstance()
3735
    self.needed_locks[locking.LEVEL_NODE] = []
3736
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3737

    
3738
  def DeclareLocks(self, level):
3739
    if level == locking.LEVEL_NODE:
3740
      self._LockInstancesNodes()
3741

    
3742
  def CheckPrereq(self):
3743
    """Check prerequisites.
3744

3745
    This checks that the instance is in the cluster.
3746

3747
    """
3748
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3749
    assert self.instance is not None, \
3750
      "Cannot retrieve locked instance %s" % self.op.instance_name
3751

    
3752
  def Exec(self, feedback_fn):
3753
    """Deactivate the disks
3754

3755
    """
3756
    instance = self.instance
3757
    _SafeShutdownInstanceDisks(self, instance)
3758

    
3759

    
3760
def _SafeShutdownInstanceDisks(lu, instance):
3761
  """Shutdown block devices of an instance.
3762

3763
  This function checks if an instance is running, before calling
3764
  _ShutdownInstanceDisks.
3765

3766
  """
3767
  _CheckInstanceDown(lu, instance, "cannot shutdown disks")
3768
  _ShutdownInstanceDisks(lu, instance)
3769

    
3770

    
3771
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3772
  """Shutdown block devices of an instance.
3773

3774
  This does the shutdown on all nodes of the instance.
3775

3776
  If the ignore_primary is false, errors on the primary node are
3777
  ignored.
3778

3779
  """
3780
  all_result = True
3781
  for disk in instance.disks:
3782
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3783
      lu.cfg.SetDiskID(top_disk, node)
3784
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3785
      msg = result.fail_msg
3786
      if msg:
3787
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3788
                      disk.iv_name, node, msg)
3789
        if not ignore_primary or node != instance.primary_node:
3790
          all_result = False
3791
  return all_result
3792

    
3793

    
3794
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3795
  """Checks if a node has enough free memory.
3796

3797
  This function check if a given node has the needed amount of free
3798
  memory. In case the node has less memory or we cannot get the
3799
  information from the node, this function raise an OpPrereqError
3800
  exception.
3801

3802
  @type lu: C{LogicalUnit}
3803
  @param lu: a logical unit from which we get configuration data
3804
  @type node: C{str}
3805
  @param node: the node to check
3806
  @type reason: C{str}
3807
  @param reason: string to use in the error message
3808
  @type requested: C{int}
3809
  @param requested: the amount of memory in MiB to check for
3810
  @type hypervisor_name: C{str}
3811
  @param hypervisor_name: the hypervisor to ask for memory stats
3812
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3813
      we cannot check the node
3814

3815
  """
3816
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3817
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3818
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3819
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3820
  if not isinstance(free_mem, int):
3821
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3822
                               " was '%s'" % (node, free_mem),
3823
                               errors.ECODE_ENVIRON)
3824
  if requested > free_mem:
3825
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3826
                               " needed %s MiB, available %s MiB" %
3827
                               (node, reason, requested, free_mem),
3828
                               errors.ECODE_NORES)
3829

    
3830

    
3831
def _CheckNodesFreeDisk(lu, nodenames, requested):
3832
  """Checks if nodes have enough free disk space in the default VG.
3833

3834
  This function check if all given nodes have the needed amount of
3835
  free disk. In case any node has less disk or we cannot get the
3836
  information from the node, this function raise an OpPrereqError
3837
  exception.
3838

3839
  @type lu: C{LogicalUnit}
3840
  @param lu: a logical unit from which we get configuration data
3841
  @type nodenames: C{list}
3842
  @param node: the list of node names to check
3843
  @type requested: C{int}
3844
  @param requested: the amount of disk in MiB to check for
3845
  @raise errors.OpPrereqError: if the node doesn't have enough disk, or
3846
      we cannot check the node
3847

3848
  """
3849
  nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
3850
                                   lu.cfg.GetHypervisorType())
3851
  for node in nodenames:
3852
    info = nodeinfo[node]
3853
    info.Raise("Cannot get current information from node %s" % node,
3854
               prereq=True, ecode=errors.ECODE_ENVIRON)
3855
    vg_free = info.payload.get("vg_free", None)
3856
    if not isinstance(vg_free, int):
3857
      raise errors.OpPrereqError("Can't compute free disk space on node %s,"
3858
                                 " result was '%s'" % (node, vg_free),
3859
                                 errors.ECODE_ENVIRON)
3860
    if requested > vg_free:
3861
      raise errors.OpPrereqError("Not enough disk space on target node %s:"
3862
                                 " required %d MiB, available %d MiB" %
3863
                                 (node, requested, vg_free),
3864
                                 errors.ECODE_NORES)
3865

    
3866

    
3867
class LUStartupInstance(LogicalUnit):
3868
  """Starts an instance.
3869

3870
  """
3871
  HPATH = "instance-start"
3872
  HTYPE = constants.HTYPE_INSTANCE
3873
  _OP_REQP = ["instance_name", "force"]
3874
  REQ_BGL = False
3875

    
3876
  def ExpandNames(self):
3877
    self._ExpandAndLockInstance()
3878

    
3879
  def BuildHooksEnv(self):
3880
    """Build hooks env.
3881

3882
    This runs on master, primary and secondary nodes of the instance.
3883

3884
    """
3885
    env = {
3886
      "FORCE": self.op.force,
3887
      }
3888
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3889
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3890
    return env, nl, nl
3891

    
3892
  def CheckPrereq(self):
3893
    """Check prerequisites.
3894

3895
    This checks that the instance is in the cluster.
3896

3897
    """
3898
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3899
    assert self.instance is not None, \
3900
      "Cannot retrieve locked instance %s" % self.op.instance_name
3901

    
3902
    # extra beparams
3903
    self.beparams = getattr(self.op, "beparams", {})
3904
    if self.beparams:
3905
      if not isinstance(self.beparams, dict):
3906
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3907
                                   " dict" % (type(self.beparams), ),
3908
                                   errors.ECODE_INVAL)
3909
      # fill the beparams dict
3910
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3911
      self.op.beparams = self.beparams
3912

    
3913
    # extra hvparams
3914
    self.hvparams = getattr(self.op, "hvparams", {})
3915
    if self.hvparams:
3916
      if not isinstance(self.hvparams, dict):
3917
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3918
                                   " dict" % (type(self.hvparams), ),
3919
                                   errors.ECODE_INVAL)
3920

    
3921
      # check hypervisor parameter syntax (locally)
3922
      cluster = self.cfg.GetClusterInfo()
3923
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3924
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3925
                                    instance.hvparams)
3926
      filled_hvp.update(self.hvparams)
3927
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3928
      hv_type.CheckParameterSyntax(filled_hvp)
3929
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3930
      self.op.hvparams = self.hvparams
3931

    
3932
    _CheckNodeOnline(self, instance.primary_node)
3933

    
3934
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3935
    # check bridges existence
3936
    _CheckInstanceBridgesExist(self, instance)
3937

    
3938
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3939
                                              instance.name,
3940
                                              instance.hypervisor)
3941
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3942
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3943
    if not remote_info.payload: # not running already
3944
      _CheckNodeFreeMemory(self, instance.primary_node,
3945
                           "starting instance %s" % instance.name,
3946
                           bep[constants.BE_MEMORY], instance.hypervisor)
3947

    
3948
  def Exec(self, feedback_fn):
3949
    """Start the instance.
3950

3951
    """
3952
    instance = self.instance
3953
    force = self.op.force
3954

    
3955
    self.cfg.MarkInstanceUp(instance.name)
3956

    
3957
    node_current = instance.primary_node
3958

    
3959
    _StartInstanceDisks(self, instance, force)
3960

    
3961
    result = self.rpc.call_instance_start(node_current, instance,
3962
                                          self.hvparams, self.beparams)
3963
    msg = result.fail_msg
3964
    if msg:
3965
      _ShutdownInstanceDisks(self, instance)
3966
      raise errors.OpExecError("Could not start instance: %s" % msg)
3967

    
3968

    
3969
class LURebootInstance(LogicalUnit):
3970
  """Reboot an instance.
3971

3972
  """
3973
  HPATH = "instance-reboot"
3974
  HTYPE = constants.HTYPE_INSTANCE
3975
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3976
  REQ_BGL = False
3977

    
3978
  def CheckArguments(self):
3979
    """Check the arguments.
3980

3981
    """
3982
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3983
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3984

    
3985
  def ExpandNames(self):
3986
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3987
                                   constants.INSTANCE_REBOOT_HARD,
3988
                                   constants.INSTANCE_REBOOT_FULL]:
3989
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3990
                                  (constants.INSTANCE_REBOOT_SOFT,
3991
                                   constants.INSTANCE_REBOOT_HARD,
3992
                                   constants.INSTANCE_REBOOT_FULL))
3993
    self._ExpandAndLockInstance()
3994

    
3995
  def BuildHooksEnv(self):
3996
    """Build hooks env.
3997

3998
    This runs on master, primary and secondary nodes of the instance.
3999

4000
    """
4001
    env = {
4002
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4003
      "REBOOT_TYPE": self.op.reboot_type,
4004
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4005
      }
4006
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4007
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4008
    return env, nl, nl
4009

    
4010
  def CheckPrereq(self):
4011
    """Check prerequisites.
4012

4013
    This checks that the instance is in the cluster.
4014

4015
    """
4016
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4017
    assert self.instance is not None, \
4018
      "Cannot retrieve locked instance %s" % self.op.instance_name
4019

    
4020
    _CheckNodeOnline(self, instance.primary_node)
4021

    
4022
    # check bridges existence
4023
    _CheckInstanceBridgesExist(self, instance)
4024

    
4025
  def Exec(self, feedback_fn):
4026
    """Reboot the instance.
4027

4028
    """
4029
    instance = self.instance
4030
    ignore_secondaries = self.op.ignore_secondaries
4031
    reboot_type = self.op.reboot_type
4032

    
4033
    node_current = instance.primary_node
4034

    
4035
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4036
                       constants.INSTANCE_REBOOT_HARD]:
4037
      for disk in instance.disks:
4038
        self.cfg.SetDiskID(disk, node_current)
4039
      result = self.rpc.call_instance_reboot(node_current, instance,
4040
                                             reboot_type,
4041
                                             self.shutdown_timeout)
4042
      result.Raise("Could not reboot instance")
4043
    else:
4044
      result = self.rpc.call_instance_shutdown(node_current, instance,
4045
                                               self.shutdown_timeout)
4046
      result.Raise("Could not shutdown instance for full reboot")
4047
      _ShutdownInstanceDisks(self, instance)
4048
      _StartInstanceDisks(self, instance, ignore_secondaries)
4049
      result = self.rpc.call_instance_start(node_current, instance, None, None)
4050
      msg = result.fail_msg
4051
      if msg:
4052
        _ShutdownInstanceDisks(self, instance)
4053
        raise errors.OpExecError("Could not start instance for"
4054
                                 " full reboot: %s" % msg)
4055

    
4056
    self.cfg.MarkInstanceUp(instance.name)
4057

    
4058

    
4059
class LUShutdownInstance(LogicalUnit):
4060
  """Shutdown an instance.
4061

4062
  """
4063
  HPATH = "instance-stop"
4064
  HTYPE = constants.HTYPE_INSTANCE
4065
  _OP_REQP = ["instance_name"]
4066
  REQ_BGL = False
4067

    
4068
  def CheckArguments(self):
4069
    """Check the arguments.
4070

4071
    """
4072
    self.timeout = getattr(self.op, "timeout",
4073
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
4074

    
4075
  def ExpandNames(self):
4076
    self._ExpandAndLockInstance()
4077

    
4078
  def BuildHooksEnv(self):
4079
    """Build hooks env.
4080

4081
    This runs on master, primary and secondary nodes of the instance.
4082

4083
    """
4084
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4085
    env["TIMEOUT"] = self.timeout
4086
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4087
    return env, nl, nl
4088

    
4089
  def CheckPrereq(self):
4090
    """Check prerequisites.
4091

4092
    This checks that the instance is in the cluster.
4093

4094
    """
4095
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4096
    assert self.instance is not None, \
4097
      "Cannot retrieve locked instance %s" % self.op.instance_name
4098
    _CheckNodeOnline(self, self.instance.primary_node)
4099

    
4100
  def Exec(self, feedback_fn):
4101
    """Shutdown the instance.
4102

4103
    """
4104
    instance = self.instance
4105
    node_current = instance.primary_node
4106
    timeout = self.timeout
4107
    self.cfg.MarkInstanceDown(instance.name)
4108
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4109
    msg = result.fail_msg
4110
    if msg:
4111
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4112

    
4113
    _ShutdownInstanceDisks(self, instance)
4114

    
4115

    
4116
class LUReinstallInstance(LogicalUnit):
4117
  """Reinstall an instance.
4118

4119
  """
4120
  HPATH = "instance-reinstall"
4121
  HTYPE = constants.HTYPE_INSTANCE
4122
  _OP_REQP = ["instance_name"]
4123
  REQ_BGL = False
4124

    
4125
  def ExpandNames(self):
4126
    self._ExpandAndLockInstance()
4127

    
4128
  def BuildHooksEnv(self):
4129
    """Build hooks env.
4130

4131
    This runs on master, primary and secondary nodes of the instance.
4132

4133
    """
4134
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4135
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4136
    return env, nl, nl
4137

    
4138
  def CheckPrereq(self):
4139
    """Check prerequisites.
4140

4141
    This checks that the instance is in the cluster and is not running.
4142

4143
    """
4144
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4145
    assert instance is not None, \
4146
      "Cannot retrieve locked instance %s" % self.op.instance_name
4147
    _CheckNodeOnline(self, instance.primary_node)
4148

    
4149
    if instance.disk_template == constants.DT_DISKLESS:
4150
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4151
                                 self.op.instance_name,
4152
                                 errors.ECODE_INVAL)
4153
    _CheckInstanceDown(self, instance, "cannot reinstall")
4154

    
4155
    self.op.os_type = getattr(self.op, "os_type", None)
4156
    self.op.force_variant = getattr(self.op, "force_variant", False)
4157
    if self.op.os_type is not None:
4158
      # OS verification
4159
      pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4160
      _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4161

    
4162
    self.instance = instance
4163

    
4164
  def Exec(self, feedback_fn):
4165
    """Reinstall the instance.
4166

4167
    """
4168
    inst = self.instance
4169

    
4170
    if self.op.os_type is not None:
4171
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4172
      inst.os = self.op.os_type
4173
      self.cfg.Update(inst, feedback_fn)
4174

    
4175
    _StartInstanceDisks(self, inst, None)
4176
    try:
4177
      feedback_fn("Running the instance OS create scripts...")
4178
      # FIXME: pass debug option from opcode to backend
4179
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4180
                                             self.op.debug_level)
4181
      result.Raise("Could not install OS for instance %s on node %s" %
4182
                   (inst.name, inst.primary_node))
4183
    finally:
4184
      _ShutdownInstanceDisks(self, inst)
4185

    
4186

    
4187
class LURecreateInstanceDisks(LogicalUnit):
4188
  """Recreate an instance's missing disks.
4189

4190
  """
4191
  HPATH = "instance-recreate-disks"
4192
  HTYPE = constants.HTYPE_INSTANCE
4193
  _OP_REQP = ["instance_name", "disks"]
4194
  REQ_BGL = False
4195

    
4196
  def CheckArguments(self):
4197
    """Check the arguments.
4198

4199
    """
4200
    if not isinstance(self.op.disks, list):
4201
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4202
    for item in self.op.disks:
4203
      if (not isinstance(item, int) or
4204
          item < 0):
4205
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4206
                                   str(item), errors.ECODE_INVAL)
4207

    
4208
  def ExpandNames(self):
4209
    self._ExpandAndLockInstance()
4210

    
4211
  def BuildHooksEnv(self):
4212
    """Build hooks env.
4213

4214
    This runs on master, primary and secondary nodes of the instance.
4215

4216
    """
4217
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4218
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4219
    return env, nl, nl
4220

    
4221
  def CheckPrereq(self):
4222
    """Check prerequisites.
4223

4224
    This checks that the instance is in the cluster and is not running.
4225

4226
    """
4227
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4228
    assert instance is not None, \
4229
      "Cannot retrieve locked instance %s" % self.op.instance_name
4230
    _CheckNodeOnline(self, instance.primary_node)
4231

    
4232
    if instance.disk_template == constants.DT_DISKLESS:
4233
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4234
                                 self.op.instance_name, errors.ECODE_INVAL)
4235
    _CheckInstanceDown(self, instance, "cannot recreate disks")
4236

    
4237
    if not self.op.disks:
4238
      self.op.disks = range(len(instance.disks))
4239
    else:
4240
      for idx in self.op.disks:
4241
        if idx >= len(instance.disks):
4242
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4243
                                     errors.ECODE_INVAL)
4244

    
4245
    self.instance = instance
4246

    
4247
  def Exec(self, feedback_fn):
4248
    """Recreate the disks.
4249

4250
    """
4251
    to_skip = []
4252
    for idx, _ in enumerate(self.instance.disks):
4253
      if idx not in self.op.disks: # disk idx has not been passed in
4254
        to_skip.append(idx)
4255
        continue
4256

    
4257
    _CreateDisks(self, self.instance, to_skip=to_skip)
4258

    
4259

    
4260
class LURenameInstance(LogicalUnit):
4261
  """Rename an instance.
4262

4263
  """
4264
  HPATH = "instance-rename"
4265
  HTYPE = constants.HTYPE_INSTANCE
4266
  _OP_REQP = ["instance_name", "new_name"]
4267

    
4268
  def BuildHooksEnv(self):
4269
    """Build hooks env.
4270

4271
    This runs on master, primary and secondary nodes of the instance.
4272

4273
    """
4274
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4275
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4276
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4277
    return env, nl, nl
4278

    
4279
  def CheckPrereq(self):
4280
    """Check prerequisites.
4281

4282
    This checks that the instance is in the cluster and is not running.
4283

4284
    """
4285
    self.op.instance_name = _ExpandInstanceName(self.cfg,
4286
                                                self.op.instance_name)
4287
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4288
    assert instance is not None
4289
    _CheckNodeOnline(self, instance.primary_node)
4290
    _CheckInstanceDown(self, instance, "cannot rename")
4291
    self.instance = instance
4292

    
4293
    # new name verification
4294
    name_info = utils.GetHostInfo(self.op.new_name)
4295

    
4296
    self.op.new_name = new_name = name_info.name
4297
    instance_list = self.cfg.GetInstanceList()
4298
    if new_name in instance_list:
4299
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4300
                                 new_name, errors.ECODE_EXISTS)
4301

    
4302
    if not getattr(self.op, "ignore_ip", False):
4303
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4304
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4305
                                   (name_info.ip, new_name),
4306
                                   errors.ECODE_NOTUNIQUE)
4307

    
4308

    
4309
  def Exec(self, feedback_fn):
4310
    """Reinstall the instance.
4311

4312
    """
4313
    inst = self.instance
4314
    old_name = inst.name
4315

    
4316
    if inst.disk_template == constants.DT_FILE:
4317
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4318

    
4319
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4320
    # Change the instance lock. This is definitely safe while we hold the BGL
4321
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4322
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4323

    
4324
    # re-read the instance from the configuration after rename
4325
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4326

    
4327
    if inst.disk_template == constants.DT_FILE:
4328
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4329
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4330
                                                     old_file_storage_dir,
4331
                                                     new_file_storage_dir)
4332
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4333
                   " (but the instance has been renamed in Ganeti)" %
4334
                   (inst.primary_node, old_file_storage_dir,
4335
                    new_file_storage_dir))
4336

    
4337
    _StartInstanceDisks(self, inst, None)
4338
    try:
4339
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4340
                                                 old_name, self.op.debug_level)
4341
      msg = result.fail_msg
4342
      if msg:
4343
        msg = ("Could not run OS rename script for instance %s on node %s"
4344
               " (but the instance has been renamed in Ganeti): %s" %
4345
               (inst.name, inst.primary_node, msg))
4346
        self.proc.LogWarning(msg)
4347
    finally:
4348
      _ShutdownInstanceDisks(self, inst)
4349

    
4350

    
4351
class LURemoveInstance(LogicalUnit):
4352
  """Remove an instance.
4353

4354
  """
4355
  HPATH = "instance-remove"
4356
  HTYPE = constants.HTYPE_INSTANCE
4357
  _OP_REQP = ["instance_name", "ignore_failures"]
4358
  REQ_BGL = False
4359

    
4360
  def CheckArguments(self):
4361
    """Check the arguments.
4362

4363
    """
4364
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4365
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4366

    
4367
  def ExpandNames(self):
4368
    self._ExpandAndLockInstance()
4369
    self.needed_locks[locking.LEVEL_NODE] = []
4370
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4371

    
4372
  def DeclareLocks(self, level):
4373
    if level == locking.LEVEL_NODE:
4374
      self._LockInstancesNodes()
4375

    
4376
  def BuildHooksEnv(self):
4377
    """Build hooks env.
4378

4379
    This runs on master, primary and secondary nodes of the instance.
4380

4381
    """
4382
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4383
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4384
    nl = [self.cfg.GetMasterNode()]
4385
    nl_post = list(self.instance.all_nodes) + nl
4386
    return env, nl, nl_post
4387

    
4388
  def CheckPrereq(self):
4389
    """Check prerequisites.
4390

4391
    This checks that the instance is in the cluster.
4392

4393
    """
4394
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4395
    assert self.instance is not None, \
4396
      "Cannot retrieve locked instance %s" % self.op.instance_name
4397

    
4398
  def Exec(self, feedback_fn):
4399
    """Remove the instance.
4400

4401
    """
4402
    instance = self.instance
4403
    logging.info("Shutting down instance %s on node %s",
4404
                 instance.name, instance.primary_node)
4405

    
4406
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4407
                                             self.shutdown_timeout)
4408
    msg = result.fail_msg
4409
    if msg:
4410
      if self.op.ignore_failures:
4411
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4412
      else:
4413
        raise errors.OpExecError("Could not shutdown instance %s on"
4414
                                 " node %s: %s" %
4415
                                 (instance.name, instance.primary_node, msg))
4416

    
4417
    logging.info("Removing block devices for instance %s", instance.name)
4418

    
4419
    if not _RemoveDisks(self, instance):
4420
      if self.op.ignore_failures:
4421
        feedback_fn("Warning: can't remove instance's disks")
4422
      else:
4423
        raise errors.OpExecError("Can't remove instance's disks")
4424

    
4425
    logging.info("Removing instance %s out of cluster config", instance.name)
4426

    
4427
    self.cfg.RemoveInstance(instance.name)
4428
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4429

    
4430

    
4431
class LUQueryInstances(NoHooksLU):
4432
  """Logical unit for querying instances.
4433

4434
  """
4435
  # pylint: disable-msg=W0142
4436
  _OP_REQP = ["output_fields", "names", "use_locking"]
4437
  REQ_BGL = False
4438
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4439
                    "serial_no", "ctime", "mtime", "uuid"]
4440
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4441
                                    "admin_state",
4442
                                    "disk_template", "ip", "mac", "bridge",
4443
                                    "nic_mode", "nic_link",
4444
                                    "sda_size", "sdb_size", "vcpus", "tags",
4445
                                    "network_port", "beparams",
4446
                                    r"(disk)\.(size)/([0-9]+)",
4447
                                    r"(disk)\.(sizes)", "disk_usage",
4448
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4449
                                    r"(nic)\.(bridge)/([0-9]+)",
4450
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4451
                                    r"(disk|nic)\.(count)",
4452
                                    "hvparams",
4453
                                    ] + _SIMPLE_FIELDS +
4454
                                  ["hv/%s" % name
4455
                                   for name in constants.HVS_PARAMETERS
4456
                                   if name not in constants.HVC_GLOBALS] +
4457
                                  ["be/%s" % name
4458
                                   for name in constants.BES_PARAMETERS])
4459
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4460

    
4461

    
4462
  def ExpandNames(self):
4463
    _CheckOutputFields(static=self._FIELDS_STATIC,
4464
                       dynamic=self._FIELDS_DYNAMIC,
4465
                       selected=self.op.output_fields)
4466

    
4467
    self.needed_locks = {}
4468
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4469
    self.share_locks[locking.LEVEL_NODE] = 1
4470

    
4471
    if self.op.names:
4472
      self.wanted = _GetWantedInstances(self, self.op.names)
4473
    else:
4474
      self.wanted = locking.ALL_SET
4475

    
4476
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4477
    self.do_locking = self.do_node_query and self.op.use_locking
4478
    if self.do_locking:
4479
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4480
      self.needed_locks[locking.LEVEL_NODE] = []
4481
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4482

    
4483
  def DeclareLocks(self, level):
4484
    if level == locking.LEVEL_NODE and self.do_locking:
4485
      self._LockInstancesNodes()
4486

    
4487
  def CheckPrereq(self):
4488
    """Check prerequisites.
4489

4490
    """
4491
    pass
4492

    
4493
  def Exec(self, feedback_fn):
4494
    """Computes the list of nodes and their attributes.
4495

4496
    """
4497
    # pylint: disable-msg=R0912
4498
    # way too many branches here
4499
    all_info = self.cfg.GetAllInstancesInfo()
4500
    if self.wanted == locking.ALL_SET:
4501
      # caller didn't specify instance names, so ordering is not important
4502
      if self.do_locking:
4503
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4504
      else:
4505
        instance_names = all_info.keys()
4506
      instance_names = utils.NiceSort(instance_names)
4507
    else:
4508
      # caller did specify names, so we must keep the ordering
4509
      if self.do_locking:
4510
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4511
      else:
4512
        tgt_set = all_info.keys()
4513
      missing = set(self.wanted).difference(tgt_set)
4514
      if missing:
4515
        raise errors.OpExecError("Some instances were removed before"
4516
                                 " retrieving their data: %s" % missing)
4517
      instance_names = self.wanted
4518

    
4519
    instance_list = [all_info[iname] for iname in instance_names]
4520

    
4521
    # begin data gathering
4522

    
4523
    nodes = frozenset([inst.primary_node for inst in instance_list])
4524
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4525

    
4526
    bad_nodes = []
4527
    off_nodes = []
4528
    if self.do_node_query:
4529
      live_data = {}
4530
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4531
      for name in nodes:
4532
        result = node_data[name]
4533
        if result.offline:
4534
          # offline nodes will be in both lists
4535
          off_nodes.append(name)
4536
        if result.fail_msg:
4537
          bad_nodes.append(name)
4538
        else:
4539
          if result.payload:
4540
            live_data.update(result.payload)
4541
          # else no instance is alive
4542
    else:
4543
      live_data = dict([(name, {}) for name in instance_names])
4544

    
4545
    # end data gathering
4546

    
4547
    HVPREFIX = "hv/"
4548
    BEPREFIX = "be/"
4549
    output = []
4550
    cluster = self.cfg.GetClusterInfo()
4551
    for instance in instance_list:
4552
      iout = []
4553
      i_hv = cluster.FillHV(instance, skip_globals=True)
4554
      i_be = cluster.FillBE(instance)
4555
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4556
                                 nic.nicparams) for nic in instance.nics]
4557
      for field in self.op.output_fields:
4558
        st_match = self._FIELDS_STATIC.Matches(field)
4559
        if field in self._SIMPLE_FIELDS:
4560
          val = getattr(instance, field)
4561
        elif field == "pnode":
4562
          val = instance.primary_node
4563
        elif field == "snodes":
4564
          val = list(instance.secondary_nodes)
4565
        elif field == "admin_state":
4566
          val = instance.admin_up
4567
        elif field == "oper_state":
4568
          if instance.primary_node in bad_nodes:
4569
            val = None
4570
          else:
4571
            val = bool(live_data.get(instance.name))
4572
        elif field == "status":
4573
          if instance.primary_node in off_nodes:
4574
            val = "ERROR_nodeoffline"
4575
          elif instance.primary_node in bad_nodes:
4576
            val = "ERROR_nodedown"
4577
          else:
4578
            running = bool(live_data.get(instance.name))
4579
            if running:
4580
              if instance.admin_up:
4581
                val = "running"
4582
              else:
4583
                val = "ERROR_up"
4584
            else:
4585
              if instance.admin_up:
4586
                val = "ERROR_down"
4587
              else:
4588
                val = "ADMIN_down"
4589
        elif field == "oper_ram":
4590
          if instance.primary_node in bad_nodes:
4591
            val = None
4592
          elif instance.name in live_data:
4593
            val = live_data[instance.name].get("memory", "?")
4594
          else:
4595
            val = "-"
4596
        elif field == "vcpus":
4597
          val = i_be[constants.BE_VCPUS]
4598
        elif field == "disk_template":
4599
          val = instance.disk_template
4600
        elif field == "ip":
4601
          if instance.nics:
4602
            val = instance.nics[0].ip
4603
          else:
4604
            val = None
4605
        elif field == "nic_mode":
4606
          if instance.nics:
4607
            val = i_nicp[0][constants.NIC_MODE]
4608
          else:
4609
            val = None
4610
        elif field == "nic_link":
4611
          if instance.nics:
4612
            val = i_nicp[0][constants.NIC_LINK]
4613
          else:
4614
            val = None
4615
        elif field == "bridge":
4616
          if (instance.nics and
4617
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4618
            val = i_nicp[0][constants.NIC_LINK]
4619
          else:
4620
            val = None
4621
        elif field == "mac":
4622
          if instance.nics:
4623
            val = instance.nics[0].mac
4624
          else:
4625
            val = None
4626
        elif field == "sda_size" or field == "sdb_size":
4627
          idx = ord(field[2]) - ord('a')
4628
          try:
4629
            val = instance.FindDisk(idx).size
4630
          except errors.OpPrereqError:
4631
            val = None
4632
        elif field == "disk_usage": # total disk usage per node
4633
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4634
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4635
        elif field == "tags":
4636
          val = list(instance.GetTags())
4637
        elif field == "hvparams":
4638
          val = i_hv
4639
        elif (field.startswith(HVPREFIX) and
4640
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4641
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4642
          val = i_hv.get(field[len(HVPREFIX):], None)
4643
        elif field == "beparams":
4644
          val = i_be
4645
        elif (field.startswith(BEPREFIX) and
4646
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4647
          val = i_be.get(field[len(BEPREFIX):], None)
4648
        elif st_match and st_match.groups():
4649
          # matches a variable list
4650
          st_groups = st_match.groups()
4651
          if st_groups and st_groups[0] == "disk":
4652
            if st_groups[1] == "count":
4653
              val = len(instance.disks)
4654
            elif st_groups[1] == "sizes":
4655
              val = [disk.size for disk in instance.disks]
4656
            elif st_groups[1] == "size":
4657
              try:
4658
                val = instance.FindDisk(st_groups[2]).size
4659
              except errors.OpPrereqError:
4660
                val = None
4661
            else:
4662
              assert False, "Unhandled disk parameter"
4663
          elif st_groups[0] == "nic":
4664
            if st_groups[1] == "count":
4665
              val = len(instance.nics)
4666
            elif st_groups[1] == "macs":
4667
              val = [nic.mac for nic in instance.nics]
4668
            elif st_groups[1] == "ips":
4669
              val = [nic.ip for nic in instance.nics]
4670
            elif st_groups[1] == "modes":
4671
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4672
            elif st_groups[1] == "links":
4673
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4674
            elif st_groups[1] == "bridges":
4675
              val = []
4676
              for nicp in i_nicp:
4677
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4678
                  val.append(nicp[constants.NIC_LINK])
4679
                else:
4680
                  val.append(None)
4681
            else:
4682
              # index-based item
4683
              nic_idx = int(st_groups[2])
4684
              if nic_idx >= len(instance.nics):
4685
                val = None
4686
              else:
4687
                if st_groups[1] == "mac":
4688
                  val = instance.nics[nic_idx].mac
4689
                elif st_groups[1] == "ip":
4690
                  val = instance.nics[nic_idx].ip
4691
                elif st_groups[1] == "mode":
4692
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4693
                elif st_groups[1] == "link":
4694
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4695
                elif st_groups[1] == "bridge":
4696
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4697
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4698
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4699
                  else:
4700
                    val = None
4701
                else:
4702
                  assert False, "Unhandled NIC parameter"
4703
          else:
4704
            assert False, ("Declared but unhandled variable parameter '%s'" %
4705
                           field)
4706
        else:
4707
          assert False, "Declared but unhandled parameter '%s'" % field
4708
        iout.append(val)
4709
      output.append(iout)
4710

    
4711
    return output
4712

    
4713

    
4714
class LUFailoverInstance(LogicalUnit):
4715
  """Failover an instance.
4716

4717
  """
4718
  HPATH = "instance-failover"
4719
  HTYPE = constants.HTYPE_INSTANCE
4720
  _OP_REQP = ["instance_name", "ignore_consistency"]
4721
  REQ_BGL = False
4722

    
4723
  def CheckArguments(self):
4724
    """Check the arguments.
4725

4726
    """
4727
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4728
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4729

    
4730
  def ExpandNames(self):
4731
    self._ExpandAndLockInstance()
4732
    self.needed_locks[locking.LEVEL_NODE] = []
4733
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4734

    
4735
  def DeclareLocks(self, level):
4736
    if level == locking.LEVEL_NODE:
4737
      self._LockInstancesNodes()
4738

    
4739
  def BuildHooksEnv(self):
4740
    """Build hooks env.
4741

4742
    This runs on master, primary and secondary nodes of the instance.
4743

4744
    """
4745
    instance = self.instance
4746
    source_node = instance.primary_node
4747
    target_node = instance.secondary_nodes[0]
4748
    env = {
4749
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4750
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4751
      "OLD_PRIMARY": source_node,
4752
      "OLD_SECONDARY": target_node,
4753
      "NEW_PRIMARY": target_node,
4754
      "NEW_SECONDARY": source_node,
4755
      }
4756
    env.update(_BuildInstanceHookEnvByObject(self, instance))
4757
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4758
    nl_post = list(nl)
4759
    nl_post.append(source_node)
4760
    return env, nl, nl_post
4761

    
4762
  def CheckPrereq(self):
4763
    """Check prerequisites.
4764

4765
    This checks that the instance is in the cluster.
4766

4767
    """
4768
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4769
    assert self.instance is not None, \
4770
      "Cannot retrieve locked instance %s" % self.op.instance_name
4771

    
4772
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4773
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4774
      raise errors.OpPrereqError("Instance's disk layout is not"
4775
                                 " network mirrored, cannot failover.",
4776
                                 errors.ECODE_STATE)
4777

    
4778
    secondary_nodes = instance.secondary_nodes
4779
    if not secondary_nodes:
4780
      raise errors.ProgrammerError("no secondary node but using "
4781
                                   "a mirrored disk template")
4782

    
4783
    target_node = secondary_nodes[0]
4784
    _CheckNodeOnline(self, target_node)
4785
    _CheckNodeNotDrained(self, target_node)
4786
    if instance.admin_up:
4787
      # check memory requirements on the secondary node
4788
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4789
                           instance.name, bep[constants.BE_MEMORY],
4790
                           instance.hypervisor)
4791
    else:
4792
      self.LogInfo("Not checking memory on the secondary node as"
4793
                   " instance will not be started")
4794

    
4795
    # check bridge existance
4796
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4797

    
4798
  def Exec(self, feedback_fn):
4799
    """Failover an instance.
4800

4801
    The failover is done by shutting it down on its present node and
4802
    starting it on the secondary.
4803

4804
    """
4805
    instance = self.instance
4806

    
4807
    source_node = instance.primary_node
4808
    target_node = instance.secondary_nodes[0]
4809

    
4810
    if instance.admin_up:
4811
      feedback_fn("* checking disk consistency between source and target")
4812
      for dev in instance.disks:
4813
        # for drbd, these are drbd over lvm
4814
        if not _CheckDiskConsistency(self, dev, target_node, False):
4815
          if not self.op.ignore_consistency:
4816
            raise errors.OpExecError("Disk %s is degraded on target node,"
4817
                                     " aborting failover." % dev.iv_name)
4818
    else:
4819
      feedback_fn("* not checking disk consistency as instance is not running")
4820

    
4821
    feedback_fn("* shutting down instance on source node")
4822
    logging.info("Shutting down instance %s on node %s",
4823
                 instance.name, source_node)
4824

    
4825
    result = self.rpc.call_instance_shutdown(source_node, instance,
4826
                                             self.shutdown_timeout)
4827
    msg = result.fail_msg
4828
    if msg:
4829
      if self.op.ignore_consistency:
4830
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4831
                             " Proceeding anyway. Please make sure node"
4832
                             " %s is down. Error details: %s",
4833
                             instance.name, source_node, source_node, msg)
4834
      else:
4835
        raise errors.OpExecError("Could not shutdown instance %s on"
4836
                                 " node %s: %s" %
4837
                                 (instance.name, source_node, msg))
4838

    
4839
    feedback_fn("* deactivating the instance's disks on source node")
4840
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4841
      raise errors.OpExecError("Can't shut down the instance's disks.")
4842

    
4843
    instance.primary_node = target_node
4844
    # distribute new instance config to the other nodes
4845
    self.cfg.Update(instance, feedback_fn)
4846

    
4847
    # Only start the instance if it's marked as up
4848
    if instance.admin_up:
4849
      feedback_fn("* activating the instance's disks on target node")
4850
      logging.info("Starting instance %s on node %s",
4851
                   instance.name, target_node)
4852

    
4853
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4854
                                               ignore_secondaries=True)
4855
      if not disks_ok:
4856
        _ShutdownInstanceDisks(self, instance)
4857
        raise errors.OpExecError("Can't activate the instance's disks")
4858

    
4859
      feedback_fn("* starting the instance on the target node")
4860
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4861
      msg = result.fail_msg
4862
      if msg:
4863
        _ShutdownInstanceDisks(self, instance)
4864
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4865
                                 (instance.name, target_node, msg))
4866

    
4867

    
4868
class LUMigrateInstance(LogicalUnit):
4869
  """Migrate an instance.
4870

4871
  This is migration without shutting down, compared to the failover,
4872
  which is done with shutdown.
4873

4874
  """
4875
  HPATH = "instance-migrate"
4876
  HTYPE = constants.HTYPE_INSTANCE
4877
  _OP_REQP = ["instance_name", "live", "cleanup"]
4878

    
4879
  REQ_BGL = False
4880

    
4881
  def ExpandNames(self):
4882
    self._ExpandAndLockInstance()
4883

    
4884
    self.needed_locks[locking.LEVEL_NODE] = []
4885
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4886

    
4887
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4888
                                       self.op.live, self.op.cleanup)
4889
    self.tasklets = [self._migrater]
4890

    
4891
  def DeclareLocks(self, level):
4892
    if level == locking.LEVEL_NODE:
4893
      self._LockInstancesNodes()
4894

    
4895
  def BuildHooksEnv(self):
4896
    """Build hooks env.
4897

4898
    This runs on master, primary and secondary nodes of the instance.
4899

4900
    """
4901
    instance = self._migrater.instance
4902
    source_node = instance.primary_node
4903
    target_node = instance.secondary_nodes[0]
4904
    env = _BuildInstanceHookEnvByObject(self, instance)
4905
    env["MIGRATE_LIVE"] = self.op.live
4906
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4907
    env.update({
4908
        "OLD_PRIMARY": source_node,
4909
        "OLD_SECONDARY": target_node,
4910
        "NEW_PRIMARY": target_node,
4911
        "NEW_SECONDARY": source_node,
4912
        })
4913
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4914
    nl_post = list(nl)
4915
    nl_post.append(source_node)
4916
    return env, nl, nl_post
4917

    
4918

    
4919
class LUMoveInstance(LogicalUnit):
4920
  """Move an instance by data-copying.
4921

4922
  """
4923
  HPATH = "instance-move"
4924
  HTYPE = constants.HTYPE_INSTANCE
4925
  _OP_REQP = ["instance_name", "target_node"]
4926
  REQ_BGL = False
4927

    
4928
  def CheckArguments(self):
4929
    """Check the arguments.
4930

4931
    """
4932
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4933
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4934

    
4935
  def ExpandNames(self):
4936
    self._ExpandAndLockInstance()
4937
    target_node = _ExpandNodeName(self.cfg, self.op.target_node)
4938
    self.op.target_node = target_node
4939
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4940
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4941

    
4942
  def DeclareLocks(self, level):
4943
    if level == locking.LEVEL_NODE:
4944
      self._LockInstancesNodes(primary_only=True)
4945

    
4946
  def BuildHooksEnv(self):
4947
    """Build hooks env.
4948

4949
    This runs on master, primary and secondary nodes of the instance.
4950

4951
    """
4952
    env = {
4953
      "TARGET_NODE": self.op.target_node,
4954
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4955
      }
4956
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4957
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4958
                                       self.op.target_node]
4959
    return env, nl, nl
4960

    
4961
  def CheckPrereq(self):
4962
    """Check prerequisites.
4963

4964
    This checks that the instance is in the cluster.
4965

4966
    """
4967
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4968
    assert self.instance is not None, \
4969
      "Cannot retrieve locked instance %s" % self.op.instance_name
4970

    
4971
    node = self.cfg.GetNodeInfo(self.op.target_node)
4972
    assert node is not None, \
4973
      "Cannot retrieve locked node %s" % self.op.target_node
4974

    
4975
    self.target_node = target_node = node.name
4976

    
4977
    if target_node == instance.primary_node:
4978
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4979
                                 (instance.name, target_node),
4980
                                 errors.ECODE_STATE)
4981

    
4982
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4983

    
4984
    for idx, dsk in enumerate(instance.disks):
4985
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4986
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4987
                                   " cannot copy" % idx, errors.ECODE_STATE)
4988

    
4989
    _CheckNodeOnline(self, target_node)
4990
    _CheckNodeNotDrained(self, target_node)
4991

    
4992
    if instance.admin_up:
4993
      # check memory requirements on the secondary node
4994
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4995
                           instance.name, bep[constants.BE_MEMORY],
4996
                           instance.hypervisor)
4997
    else:
4998
      self.LogInfo("Not checking memory on the secondary node as"
4999
                   " instance will not be started")
5000

    
5001
    # check bridge existance
5002
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5003

    
5004
  def Exec(self, feedback_fn):
5005
    """Move an instance.
5006

5007
    The move is done by shutting it down on its present node, copying
5008
    the data over (slow) and starting it on the new node.
5009

5010
    """
5011
    instance = self.instance
5012

    
5013
    source_node = instance.primary_node
5014
    target_node = self.target_node
5015

    
5016
    self.LogInfo("Shutting down instance %s on source node %s",
5017
                 instance.name, source_node)
5018

    
5019
    result = self.rpc.call_instance_shutdown(source_node, instance,
5020
                                             self.shutdown_timeout)
5021
    msg = result.fail_msg
5022
    if msg:
5023
      if self.op.ignore_consistency:
5024
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
5025
                             " Proceeding anyway. Please make sure node"
5026
                             " %s is down. Error details: %s",
5027
                             instance.name, source_node, source_node, msg)
5028
      else:
5029
        raise errors.OpExecError("Could not shutdown instance %s on"
5030
                                 " node %s: %s" %
5031
                                 (instance.name, source_node, msg))
5032

    
5033
    # create the target disks
5034
    try:
5035
      _CreateDisks(self, instance, target_node=target_node)
5036
    except errors.OpExecError:
5037
      self.LogWarning("Device creation failed, reverting...")
5038
      try:
5039
        _RemoveDisks(self, instance, target_node=target_node)
5040
      finally:
5041
        self.cfg.ReleaseDRBDMinors(instance.name)
5042
        raise
5043

    
5044
    cluster_name = self.cfg.GetClusterInfo().cluster_name
5045

    
5046
    errs = []
5047
    # activate, get path, copy the data over
5048
    for idx, disk in enumerate(instance.disks):
5049
      self.LogInfo("Copying data for disk %d", idx)
5050
      result = self.rpc.call_blockdev_assemble(target_node, disk,
5051
                                               instance.name, True)
5052
      if result.fail_msg:
5053
        self.LogWarning("Can't assemble newly created disk %d: %s",
5054
                        idx, result.fail_msg)
5055
        errs.append(result.fail_msg)
5056
        break
5057
      dev_path = result.payload
5058
      result = self.rpc.call_blockdev_export(source_node, disk,
5059
                                             target_node, dev_path,
5060
                                             cluster_name)
5061
      if result.fail_msg:
5062
        self.LogWarning("Can't copy data over for disk %d: %s",
5063
                        idx, result.fail_msg)
5064
        errs.append(result.fail_msg)
5065
        break
5066

    
5067
    if errs:
5068
      self.LogWarning("Some disks failed to copy, aborting")
5069
      try:
5070
        _RemoveDisks(self, instance, target_node=target_node)
5071
      finally:
5072
        self.cfg.ReleaseDRBDMinors(instance.name)
5073
        raise errors.OpExecError("Errors during disk copy: %s" %
5074
                                 (",".join(errs),))
5075

    
5076
    instance.primary_node = target_node
5077
    self.cfg.Update(instance, feedback_fn)
5078

    
5079
    self.LogInfo("Removing the disks on the original node")
5080
    _RemoveDisks(self, instance, target_node=source_node)
5081

    
5082
    # Only start the instance if it's marked as up
5083
    if instance.admin_up:
5084
      self.LogInfo("Starting instance %s on node %s",
5085
                   instance.name, target_node)
5086

    
5087
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
5088
                                           ignore_secondaries=True)
5089
      if not disks_ok:
5090
        _ShutdownInstanceDisks(self, instance)
5091
        raise errors.OpExecError("Can't activate the instance's disks")
5092

    
5093
      result = self.rpc.call_instance_start(target_node, instance, None, None)
5094
      msg = result.fail_msg
5095
      if msg:
5096
        _ShutdownInstanceDisks(self, instance)
5097
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5098
                                 (instance.name, target_node, msg))
5099

    
5100

    
5101
class LUMigrateNode(LogicalUnit):
5102
  """Migrate all instances from a node.
5103

5104
  """
5105
  HPATH = "node-migrate"
5106
  HTYPE = constants.HTYPE_NODE
5107
  _OP_REQP = ["node_name", "live"]
5108
  REQ_BGL = False
5109

    
5110
  def ExpandNames(self):
5111
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5112

    
5113
    self.needed_locks = {
5114
      locking.LEVEL_NODE: [self.op.node_name],
5115
      }
5116

    
5117
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5118

    
5119
    # Create tasklets for migrating instances for all instances on this node
5120
    names = []
5121
    tasklets = []
5122

    
5123
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5124
      logging.debug("Migrating instance %s", inst.name)
5125
      names.append(inst.name)
5126

    
5127
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5128

    
5129
    self.tasklets = tasklets
5130

    
5131
    # Declare instance locks
5132
    self.needed_locks[locking.LEVEL_INSTANCE] = names
5133

    
5134
  def DeclareLocks(self, level):
5135
    if level == locking.LEVEL_NODE:
5136
      self._LockInstancesNodes()
5137

    
5138
  def BuildHooksEnv(self):
5139
    """Build hooks env.
5140

5141
    This runs on the master, the primary and all the secondaries.
5142

5143
    """
5144
    env = {
5145
      "NODE_NAME": self.op.node_name,
5146
      }
5147

    
5148
    nl = [self.cfg.GetMasterNode()]
5149

    
5150
    return (env, nl, nl)
5151

    
5152

    
5153
class TLMigrateInstance(Tasklet):
5154
  def __init__(self, lu, instance_name, live, cleanup):
5155
    """Initializes this class.
5156

5157
    """
5158
    Tasklet.__init__(self, lu)
5159

    
5160
    # Parameters
5161
    self.instance_name = instance_name
5162
    self.live = live
5163
    self.cleanup = cleanup
5164

    
5165
  def CheckPrereq(self):
5166
    """Check prerequisites.
5167

5168
    This checks that the instance is in the cluster.
5169

5170
    """
5171
    instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5172
    instance = self.cfg.GetInstanceInfo(instance_name)
5173
    assert instance is not None
5174

    
5175
    if instance.disk_template != constants.DT_DRBD8:
5176
      raise errors.OpPrereqError("Instance's disk layout is not"
5177
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5178

    
5179
    secondary_nodes = instance.secondary_nodes
5180
    if not secondary_nodes:
5181
      raise errors.ConfigurationError("No secondary node but using"
5182
                                      " drbd8 disk template")
5183

    
5184
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5185

    
5186
    target_node = secondary_nodes[0]
5187
    # check memory requirements on the secondary node
5188
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5189
                         instance.name, i_be[constants.BE_MEMORY],
5190
                         instance.hypervisor)
5191

    
5192
    # check bridge existance
5193
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5194

    
5195
    if not self.cleanup:
5196
      _CheckNodeNotDrained(self, target_node)
5197
      result = self.rpc.call_instance_migratable(instance.primary_node,
5198
                                                 instance)
5199
      result.Raise("Can't migrate, please use failover",
5200
                   prereq=True, ecode=errors.ECODE_STATE)
5201

    
5202
    self.instance = instance
5203

    
5204
  def _WaitUntilSync(self):
5205
    """Poll with custom rpc for disk sync.
5206

5207
    This uses our own step-based rpc call.
5208

5209
    """
5210
    self.feedback_fn("* wait until resync is done")
5211
    all_done = False
5212
    while not all_done:
5213
      all_done = True
5214
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5215
                                            self.nodes_ip,
5216
                                            self.instance.disks)
5217
      min_percent = 100
5218
      for node, nres in result.items():
5219
        nres.Raise("Cannot resync disks on node %s" % node)
5220
        node_done, node_percent = nres.payload
5221
        all_done = all_done and node_done
5222
        if node_percent is not None:
5223
          min_percent = min(min_percent, node_percent)
5224
      if not all_done:
5225
        if min_percent < 100:
5226
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5227
        time.sleep(2)
5228

    
5229
  def _EnsureSecondary(self, node):
5230
    """Demote a node to secondary.
5231

5232
    """
5233
    self.feedback_fn("* switching node %s to secondary mode" % node)
5234

    
5235
    for dev in self.instance.disks:
5236
      self.cfg.SetDiskID(dev, node)
5237

    
5238
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5239
                                          self.instance.disks)
5240
    result.Raise("Cannot change disk to secondary on node %s" % node)
5241

    
5242
  def _GoStandalone(self):
5243
    """Disconnect from the network.
5244

5245
    """
5246
    self.feedback_fn("* changing into standalone mode")
5247
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5248
                                               self.instance.disks)
5249
    for node, nres in result.items():
5250
      nres.Raise("Cannot disconnect disks node %s" % node)
5251

    
5252
  def _GoReconnect(self, multimaster):
5253
    """Reconnect to the network.
5254

5255
    """
5256
    if multimaster:
5257
      msg = "dual-master"
5258
    else:
5259
      msg = "single-master"
5260
    self.feedback_fn("* changing disks into %s mode" % msg)
5261
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5262
                                           self.instance.disks,
5263
                                           self.instance.name, multimaster)
5264
    for node, nres in result.items():
5265
      nres.Raise("Cannot change disks config on node %s" % node)
5266

    
5267
  def _ExecCleanup(self):
5268
    """Try to cleanup after a failed migration.
5269

5270
    The cleanup is done by:
5271
      - check that the instance is running only on one node
5272
        (and update the config if needed)
5273
      - change disks on its secondary node to secondary
5274
      - wait until disks are fully synchronized
5275
      - disconnect from the network
5276
      - change disks into single-master mode
5277
      - wait again until disks are fully synchronized
5278

5279
    """
5280
    instance = self.instance
5281
    target_node = self.target_node
5282
    source_node = self.source_node
5283

    
5284
    # check running on only one node
5285
    self.feedback_fn("* checking where the instance actually runs"
5286
                     " (if this hangs, the hypervisor might be in"
5287
                     " a bad state)")
5288
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5289
    for node, result in ins_l.items():
5290
      result.Raise("Can't contact node %s" % node)
5291

    
5292
    runningon_source = instance.name in ins_l[source_node].payload
5293
    runningon_target = instance.name in ins_l[target_node].payload
5294

    
5295
    if runningon_source and runningon_target:
5296
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5297
                               " or the hypervisor is confused. You will have"
5298
                               " to ensure manually that it runs only on one"
5299
                               " and restart this operation.")
5300

    
5301
    if not (runningon_source or runningon_target):
5302
      raise errors.OpExecError("Instance does not seem to be running at all."
5303
                               " In this case, it's safer to repair by"
5304
                               " running 'gnt-instance stop' to ensure disk"
5305
                               " shutdown, and then restarting it.")
5306

    
5307
    if runningon_target:
5308
      # the migration has actually succeeded, we need to update the config
5309
      self.feedback_fn("* instance running on secondary node (%s),"
5310
                       " updating config" % target_node)
5311
      instance.primary_node = target_node
5312
      self.cfg.Update(instance, self.feedback_fn)
5313
      demoted_node = source_node
5314
    else:
5315
      self.feedback_fn("* instance confirmed to be running on its"
5316
                       " primary node (%s)" % source_node)
5317
      demoted_node = target_node
5318

    
5319
    self._EnsureSecondary(demoted_node)
5320
    try:
5321
      self._WaitUntilSync()
5322
    except errors.OpExecError:
5323
      # we ignore here errors, since if the device is standalone, it
5324
      # won't be able to sync
5325
      pass
5326
    self._GoStandalone()
5327
    self._GoReconnect(False)
5328
    self._WaitUntilSync()
5329

    
5330
    self.feedback_fn("* done")
5331

    
5332
  def _RevertDiskStatus(self):
5333
    """Try to revert the disk status after a failed migration.
5334

5335
    """
5336
    target_node = self.target_node
5337
    try:
5338
      self._EnsureSecondary(target_node)
5339
      self._GoStandalone()
5340
      self._GoReconnect(False)
5341
      self._WaitUntilSync()
5342
    except errors.OpExecError, err:
5343
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5344
                         " drives: error '%s'\n"
5345
                         "Please look and recover the instance status" %
5346
                         str(err))
5347

    
5348
  def _AbortMigration(self):
5349
    """Call the hypervisor code to abort a started migration.
5350

5351
    """
5352
    instance = self.instance
5353
    target_node = self.target_node
5354
    migration_info = self.migration_info
5355

    
5356
    abort_result = self.rpc.call_finalize_migration(target_node,
5357
                                                    instance,
5358
                                                    migration_info,
5359
                                                    False)
5360
    abort_msg = abort_result.fail_msg
5361
    if abort_msg:
5362
      logging.error("Aborting migration failed on target node %s: %s",
5363
                    target_node, abort_msg)
5364
      # Don't raise an exception here, as we stil have to try to revert the
5365
      # disk status, even if this step failed.
5366

    
5367
  def _ExecMigration(self):
5368
    """Migrate an instance.
5369

5370
    The migrate is done by:
5371
      - change the disks into dual-master mode
5372
      - wait until disks are fully synchronized again
5373
      - migrate the instance
5374
      - change disks on the new secondary node (the old primary) to secondary
5375
      - wait until disks are fully synchronized
5376
      - change disks into single-master mode
5377

5378
    """
5379
    instance = self.instance
5380
    target_node = self.target_node
5381
    source_node = self.source_node
5382

    
5383
    self.feedback_fn("* checking disk consistency between source and target")
5384
    for dev in instance.disks:
5385
      if not _CheckDiskConsistency(self, dev, target_node, False):
5386
        raise errors.OpExecError("Disk %s is degraded or not fully"
5387
                                 " synchronized on target node,"
5388
                                 " aborting migrate." % dev.iv_name)
5389

    
5390
    # First get the migration information from the remote node
5391
    result = self.rpc.call_migration_info(source_node, instance)
5392
    msg = result.fail_msg
5393
    if msg:
5394
      log_err = ("Failed fetching source migration information from %s: %s" %
5395
                 (source_node, msg))
5396
      logging.error(log_err)
5397
      raise errors.OpExecError(log_err)
5398

    
5399
    self.migration_info = migration_info = result.payload
5400

    
5401
    # Then switch the disks to master/master mode
5402
    self._EnsureSecondary(target_node)
5403
    self._GoStandalone()
5404
    self._GoReconnect(True)
5405
    self._WaitUntilSync()
5406

    
5407
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5408
    result = self.rpc.call_accept_instance(target_node,
5409
                                           instance,
5410
                                           migration_info,
5411
                                           self.nodes_ip[target_node])
5412

    
5413
    msg = result.fail_msg
5414
    if msg:
5415
      logging.error("Instance pre-migration failed, trying to revert"
5416
                    " disk status: %s", msg)
5417
      self.feedback_fn("Pre-migration failed, aborting")
5418
      self._AbortMigration()
5419
      self._RevertDiskStatus()
5420
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5421
                               (instance.name, msg))
5422

    
5423
    self.feedback_fn("* migrating instance to %s" % target_node)
5424
    time.sleep(10)
5425
    result = self.rpc.call_instance_migrate(source_node, instance,
5426
                                            self.nodes_ip[target_node],
5427
                                            self.live)
5428
    msg = result.fail_msg
5429
    if msg:
5430
      logging.error("Instance migration failed, trying to revert"
5431
                    " disk status: %s", msg)
5432
      self.feedback_fn("Migration failed, aborting")
5433
      self._AbortMigration()
5434
      self._RevertDiskStatus()
5435
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5436
                               (instance.name, msg))
5437
    time.sleep(10)
5438

    
5439
    instance.primary_node = target_node
5440
    # distribute new instance config to the other nodes
5441
    self.cfg.Update(instance, self.feedback_fn)
5442

    
5443
    result = self.rpc.call_finalize_migration(target_node,
5444
                                              instance,
5445
                                              migration_info,
5446
                                              True)
5447
    msg = result.fail_msg
5448
    if msg:
5449
      logging.error("Instance migration succeeded, but finalization failed:"
5450
                    " %s", msg)
5451
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5452
                               msg)
5453

    
5454
    self._EnsureSecondary(source_node)
5455
    self._WaitUntilSync()
5456
    self._GoStandalone()
5457
    self._GoReconnect(False)
5458
    self._WaitUntilSync()
5459

    
5460
    self.feedback_fn("* done")
5461

    
5462
  def Exec(self, feedback_fn):
5463
    """Perform the migration.
5464

5465
    """
5466
    feedback_fn("Migrating instance %s" % self.instance.name)
5467

    
5468
    self.feedback_fn = feedback_fn
5469

    
5470
    self.source_node = self.instance.primary_node
5471
    self.target_node = self.instance.secondary_nodes[0]
5472
    self.all_nodes = [self.source_node, self.target_node]
5473
    self.nodes_ip = {
5474
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5475
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5476
      }
5477

    
5478
    if self.cleanup:
5479
      return self._ExecCleanup()
5480
    else:
5481
      return self._ExecMigration()
5482

    
5483

    
5484
def _CreateBlockDev(lu, node, instance, device, force_create,
5485
                    info, force_open):
5486
  """Create a tree of block devices on a given node.
5487

5488
  If this device type has to be created on secondaries, create it and
5489
  all its children.
5490

5491
  If not, just recurse to children keeping the same 'force' value.
5492

5493
  @param lu: the lu on whose behalf we execute
5494
  @param node: the node on which to create the device
5495
  @type instance: L{objects.Instance}
5496
  @param instance: the instance which owns the device
5497
  @type device: L{objects.Disk}
5498
  @param device: the device to create
5499
  @type force_create: boolean
5500
  @param force_create: whether to force creation of this device; this
5501
      will be change to True whenever we find a device which has
5502
      CreateOnSecondary() attribute
5503
  @param info: the extra 'metadata' we should attach to the device
5504
      (this will be represented as a LVM tag)
5505
  @type force_open: boolean
5506
  @param force_open: this parameter will be passes to the
5507
      L{backend.BlockdevCreate} function where it specifies
5508
      whether we run on primary or not, and it affects both
5509
      the child assembly and the device own Open() execution
5510

5511
  """
5512
  if device.CreateOnSecondary():
5513
    force_create = True
5514

    
5515
  if device.children:
5516
    for child in device.children:
5517
      _CreateBlockDev(lu, node, instance, child, force_create,
5518
                      info, force_open)
5519

    
5520
  if not force_create:
5521
    return
5522

    
5523
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5524

    
5525

    
5526
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5527
  """Create a single block device on a given node.
5528

5529
  This will not recurse over children of the device, so they must be
5530
  created in advance.
5531

5532
  @param lu: the lu on whose behalf we execute
5533
  @param node: the node on which to create the device
5534
  @type instance: L{objects.Instance}
5535
  @param instance: the instance which owns the device
5536
  @type device: L{objects.Disk}
5537
  @param device: the device to create
5538
  @param info: the extra 'metadata' we should attach to the device
5539
      (this will be represented as a LVM tag)
5540
  @type force_open: boolean
5541
  @param force_open: this parameter will be passes to the
5542
      L{backend.BlockdevCreate} function where it specifies
5543
      whether we run on primary or not, and it affects both
5544
      the child assembly and the device own Open() execution
5545

5546
  """
5547
  lu.cfg.SetDiskID(device, node)
5548
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5549
                                       instance.name, force_open, info)
5550
  result.Raise("Can't create block device %s on"
5551
               " node %s for instance %s" % (device, node, instance.name))
5552
  if device.physical_id is None:
5553
    device.physical_id = result.payload
5554

    
5555

    
5556
def _GenerateUniqueNames(lu, exts):
5557
  """Generate a suitable LV name.
5558

5559
  This will generate a logical volume name for the given instance.
5560

5561
  """
5562
  results = []
5563
  for val in exts:
5564
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5565
    results.append("%s%s" % (new_id, val))
5566
  return results
5567

    
5568

    
5569
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5570
                         p_minor, s_minor):
5571
  """Generate a drbd8 device complete with its children.
5572

5573
  """
5574
  port = lu.cfg.AllocatePort()
5575
  vgname = lu.cfg.GetVGName()
5576
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5577
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5578
                          logical_id=(vgname, names[0]))
5579
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5580
                          logical_id=(vgname, names[1]))
5581
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5582
                          logical_id=(primary, secondary, port,
5583
                                      p_minor, s_minor,
5584
                                      shared_secret),
5585
                          children=[dev_data, dev_meta],
5586
                          iv_name=iv_name)
5587
  return drbd_dev
5588

    
5589

    
5590
def _GenerateDiskTemplate(lu, template_name,
5591
                          instance_name, primary_node,
5592
                          secondary_nodes, disk_info,
5593
                          file_storage_dir, file_driver,
5594
                          base_index):
5595
  """Generate the entire disk layout for a given template type.
5596

5597
  """
5598
  #TODO: compute space requirements
5599

    
5600
  vgname = lu.cfg.GetVGName()
5601
  disk_count = len(disk_info)
5602
  disks = []
5603
  if template_name == constants.DT_DISKLESS:
5604
    pass
5605
  elif template_name == constants.DT_PLAIN:
5606
    if len(secondary_nodes) != 0:
5607
      raise errors.ProgrammerError("Wrong template configuration")
5608

    
5609
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5610
                                      for i in range(disk_count)])
5611
    for idx, disk in enumerate(disk_info):
5612
      disk_index = idx + base_index
5613
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5614
                              logical_id=(vgname, names[idx]),
5615
                              iv_name="disk/%d" % disk_index,
5616
                              mode=disk["mode"])
5617
      disks.append(disk_dev)
5618
  elif template_name == constants.DT_DRBD8:
5619
    if len(secondary_nodes) != 1:
5620
      raise errors.ProgrammerError("Wrong template configuration")
5621
    remote_node = secondary_nodes[0]
5622
    minors = lu.cfg.AllocateDRBDMinor(
5623
      [primary_node, remote_node] * len(disk_info), instance_name)
5624

    
5625
    names = []
5626
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5627
                                               for i in range(disk_count)]):
5628
      names.append(lv_prefix + "_data")
5629
      names.append(lv_prefix + "_meta")
5630
    for idx, disk in enumerate(disk_info):
5631
      disk_index = idx + base_index
5632
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5633
                                      disk["size"], names[idx*2:idx*2+2],
5634
                                      "disk/%d" % disk_index,
5635
                                      minors[idx*2], minors[idx*2+1])
5636
      disk_dev.mode = disk["mode"]
5637
      disks.append(disk_dev)
5638
  elif template_name == constants.DT_FILE:
5639
    if len(secondary_nodes) != 0:
5640
      raise errors.ProgrammerError("Wrong template configuration")
5641

    
5642
    for idx, disk in enumerate(disk_info):
5643
      disk_index = idx + base_index
5644
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5645
                              iv_name="disk/%d" % disk_index,
5646
                              logical_id=(file_driver,
5647
                                          "%s/disk%d" % (file_storage_dir,
5648
                                                         disk_index)),
5649
                              mode=disk["mode"])
5650
      disks.append(disk_dev)
5651
  else:
5652
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5653
  return disks
5654

    
5655

    
5656
def _GetInstanceInfoText(instance):
5657
  """Compute that text that should be added to the disk's metadata.
5658

5659
  """
5660
  return "originstname+%s" % instance.name
5661

    
5662

    
5663
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5664
  """Create all disks for an instance.
5665

5666
  This abstracts away some work from AddInstance.
5667

5668
  @type lu: L{LogicalUnit}
5669
  @param lu: the logical unit on whose behalf we execute
5670
  @type instance: L{objects.Instance}
5671
  @param instance: the instance whose disks we should create
5672
  @type to_skip: list
5673
  @param to_skip: list of indices to skip
5674
  @type target_node: string
5675
  @param target_node: if passed, overrides the target node for creation
5676
  @rtype: boolean
5677
  @return: the success of the creation
5678

5679
  """
5680
  info = _GetInstanceInfoText(instance)
5681
  if target_node is None:
5682
    pnode = instance.primary_node
5683
    all_nodes = instance.all_nodes
5684
  else:
5685
    pnode = target_node
5686
    all_nodes = [pnode]
5687

    
5688
  if instance.disk_template == constants.DT_FILE:
5689
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5690
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5691

    
5692
    result.Raise("Failed to create directory '%s' on"
5693
                 " node %s" % (file_storage_dir, pnode))
5694

    
5695
  # Note: this needs to be kept in sync with adding of disks in
5696
  # LUSetInstanceParams
5697
  for idx, device in enumerate(instance.disks):
5698
    if to_skip and idx in to_skip:
5699
      continue
5700
    logging.info("Creating volume %s for instance %s",
5701
                 device.iv_name, instance.name)
5702
    #HARDCODE
5703
    for node in all_nodes:
5704
      f_create = node == pnode
5705
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5706

    
5707

    
5708
def _RemoveDisks(lu, instance, target_node=None):
5709
  """Remove all disks for an instance.
5710

5711
  This abstracts away some work from `AddInstance()` and
5712
  `RemoveInstance()`. Note that in case some of the devices couldn't
5713
  be removed, the removal will continue with the other ones (compare
5714
  with `_CreateDisks()`).
5715

5716
  @type lu: L{LogicalUnit}
5717
  @param lu: the logical unit on whose behalf we execute
5718
  @type instance: L{objects.Instance}
5719
  @param instance: the instance whose disks we should remove
5720
  @type target_node: string
5721
  @param target_node: used to override the node on which to remove the disks
5722
  @rtype: boolean
5723
  @return: the success of the removal
5724

5725
  """
5726
  logging.info("Removing block devices for instance %s", instance.name)
5727

    
5728
  all_result = True
5729
  for device in instance.disks:
5730
    if target_node:
5731
      edata = [(target_node, device)]
5732
    else:
5733
      edata = device.ComputeNodeTree(instance.primary_node)
5734
    for node, disk in edata:
5735
      lu.cfg.SetDiskID(disk, node)
5736
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5737
      if msg:
5738
        lu.LogWarning("Could not remove block device %s on node %s,"
5739
                      " continuing anyway: %s", device.iv_name, node, msg)
5740
        all_result = False
5741

    
5742
  if instance.disk_template == constants.DT_FILE:
5743
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5744
    if target_node:
5745
      tgt = target_node
5746
    else:
5747
      tgt = instance.primary_node
5748
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5749
    if result.fail_msg:
5750
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5751
                    file_storage_dir, instance.primary_node, result.fail_msg)
5752
      all_result = False
5753

    
5754
  return all_result
5755

    
5756

    
5757
def _ComputeDiskSize(disk_template, disks):
5758
  """Compute disk size requirements in the volume group
5759

5760
  """
5761
  # Required free disk space as a function of disk and swap space
5762
  req_size_dict = {
5763
    constants.DT_DISKLESS: None,
5764
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5765
    # 128 MB are added for drbd metadata for each disk
5766
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5767
    constants.DT_FILE: None,
5768
  }
5769

    
5770
  if disk_template not in req_size_dict:
5771
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5772
                                 " is unknown" %  disk_template)
5773

    
5774
  return req_size_dict[disk_template]
5775

    
5776

    
5777
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5778
  """Hypervisor parameter validation.
5779

5780
  This function abstract the hypervisor parameter validation to be
5781
  used in both instance create and instance modify.
5782

5783
  @type lu: L{LogicalUnit}
5784
  @param lu: the logical unit for which we check
5785
  @type nodenames: list
5786
  @param nodenames: the list of nodes on which we should check
5787
  @type hvname: string
5788
  @param hvname: the name of the hypervisor we should use
5789
  @type hvparams: dict
5790
  @param hvparams: the parameters which we need to check
5791
  @raise errors.OpPrereqError: if the parameters are not valid
5792

5793
  """
5794
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5795
                                                  hvname,
5796
                                                  hvparams)
5797
  for node in nodenames:
5798
    info = hvinfo[node]
5799
    if info.offline:
5800
      continue
5801
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5802

    
5803

    
5804
class LUCreateInstance(LogicalUnit):
5805
  """Create an instance.
5806

5807
  """
5808
  HPATH = "instance-add"
5809
  HTYPE = constants.HTYPE_INSTANCE
5810
  _OP_REQP = ["instance_name", "disks", "disk_template",
5811
              "mode", "start",
5812
              "wait_for_sync", "ip_check", "nics",
5813
              "hvparams", "beparams"]
5814
  REQ_BGL = False
5815

    
5816
  def CheckArguments(self):
5817
    """Check arguments.
5818

5819
    """
5820
    # set optional parameters to none if they don't exist
5821
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5822
      if not hasattr(self.op, attr):
5823
        setattr(self.op, attr, None)
5824

    
5825
    # do not require name_check to ease forward/backward compatibility
5826
    # for tools
5827
    if not hasattr(self.op, "name_check"):
5828
      self.op.name_check = True
5829
    if not hasattr(self.op, "no_install"):
5830
      self.op.no_install = False
5831
    if self.op.no_install and self.op.start:
5832
      self.LogInfo("No-installation mode selected, disabling startup")
5833
      self.op.start = False
5834
    # validate/normalize the instance name
5835
    self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
5836
    if self.op.ip_check and not self.op.name_check:
5837
      # TODO: make the ip check more flexible and not depend on the name check
5838
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
5839
                                 errors.ECODE_INVAL)
5840
    if (self.op.disk_template == constants.DT_FILE and
5841
        not constants.ENABLE_FILE_STORAGE):
5842
      raise errors.OpPrereqError("File storage disabled at configure time",
5843
                                 errors.ECODE_INVAL)
5844
    # check disk information: either all adopt, or no adopt
5845
    has_adopt = has_no_adopt = False
5846
    for disk in self.op.disks:
5847
      if "adopt" in disk:
5848
        has_adopt = True
5849
      else:
5850
        has_no_adopt = True
5851
    if has_adopt and has_no_adopt:
5852
      raise errors.OpPrereqError("Either all disks have are adoped or none is",
5853
                                 errors.ECODE_INVAL)
5854
    if has_adopt:
5855
      if self.op.disk_template != constants.DT_PLAIN:
5856
        raise errors.OpPrereqError("Disk adoption is only supported for the"
5857
                                   " 'plain' disk template",
5858
                                   errors.ECODE_INVAL)
5859
      if self.op.iallocator is not None:
5860
        raise errors.OpPrereqError("Disk adoption not allowed with an"
5861
                                   " iallocator script", errors.ECODE_INVAL)
5862
      if self.op.mode == constants.INSTANCE_IMPORT:
5863
        raise errors.OpPrereqError("Disk adoption not allowed for"
5864
                                   " instance import", errors.ECODE_INVAL)
5865

    
5866
    self.adopt_disks = has_adopt
5867

    
5868
  def ExpandNames(self):
5869
    """ExpandNames for CreateInstance.
5870

5871
    Figure out the right locks for instance creation.
5872

5873
    """
5874
    self.needed_locks = {}
5875

    
5876
    # cheap checks, mostly valid constants given
5877

    
5878
    # verify creation mode
5879
    if self.op.mode not in (constants.INSTANCE_CREATE,
5880
                            constants.INSTANCE_IMPORT):
5881
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5882
                                 self.op.mode, errors.ECODE_INVAL)
5883

    
5884
    # disk template and mirror node verification
5885
    _CheckDiskTemplate(self.op.disk_template)
5886

    
5887
    if self.op.hypervisor is None:
5888
      self.op.hypervisor = self.cfg.GetHypervisorType()
5889

    
5890
    cluster = self.cfg.GetClusterInfo()
5891
    enabled_hvs = cluster.enabled_hypervisors
5892
    if self.op.hypervisor not in enabled_hvs:
5893
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5894
                                 " cluster (%s)" % (self.op.hypervisor,
5895
                                  ",".join(enabled_hvs)),
5896
                                 errors.ECODE_STATE)
5897

    
5898
    # check hypervisor parameter syntax (locally)
5899
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5900
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5901
                                  self.op.hvparams)
5902
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5903
    hv_type.CheckParameterSyntax(filled_hvp)
5904
    self.hv_full = filled_hvp
5905
    # check that we don't specify global parameters on an instance
5906
    _CheckGlobalHvParams(self.op.hvparams)
5907

    
5908
    # fill and remember the beparams dict
5909
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5910
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5911
                                    self.op.beparams)
5912

    
5913
    #### instance parameters check
5914

    
5915
    # instance name verification
5916
    if self.op.name_check:
5917
      hostname1 = utils.GetHostInfo(self.op.instance_name)
5918
      self.op.instance_name = instance_name = hostname1.name
5919
      # used in CheckPrereq for ip ping check
5920
      self.check_ip = hostname1.ip
5921
    else:
5922
      instance_name = self.op.instance_name
5923
      self.check_ip = None
5924

    
5925
    # this is just a preventive check, but someone might still add this
5926
    # instance in the meantime, and creation will fail at lock-add time
5927
    if instance_name in self.cfg.GetInstanceList():
5928
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5929
                                 instance_name, errors.ECODE_EXISTS)
5930

    
5931
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5932

    
5933
    # NIC buildup
5934
    self.nics = []
5935
    for idx, nic in enumerate(self.op.nics):
5936
      nic_mode_req = nic.get("mode", None)
5937
      nic_mode = nic_mode_req
5938
      if nic_mode is None:
5939
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5940

    
5941
      # in routed mode, for the first nic, the default ip is 'auto'
5942
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5943
        default_ip_mode = constants.VALUE_AUTO
5944
      else:
5945
        default_ip_mode = constants.VALUE_NONE
5946

    
5947
      # ip validity checks
5948
      ip = nic.get("ip", default_ip_mode)
5949
      if ip is None or ip.lower() == constants.VALUE_NONE:
5950
        nic_ip = None
5951
      elif ip.lower() == constants.VALUE_AUTO:
5952
        if not self.op.name_check:
5953
          raise errors.OpPrereqError("IP address set to auto but name checks"
5954
                                     " have been skipped. Aborting.",
5955
                                     errors.ECODE_INVAL)
5956
        nic_ip = hostname1.ip
5957
      else:
5958
        if not utils.IsValidIP(ip):
5959
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5960
                                     " like a valid IP" % ip,
5961
                                     errors.ECODE_INVAL)
5962
        nic_ip = ip
5963

    
5964
      # TODO: check the ip address for uniqueness
5965
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5966
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
5967
                                   errors.ECODE_INVAL)
5968

    
5969
      # MAC address verification
5970
      mac = nic.get("mac", constants.VALUE_AUTO)
5971
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5972
        mac = utils.NormalizeAndValidateMac(mac)
5973

    
5974
        try:
5975
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
5976
        except errors.ReservationError:
5977
          raise errors.OpPrereqError("MAC address %s already in use"
5978
                                     " in cluster" % mac,
5979
                                     errors.ECODE_NOTUNIQUE)
5980

    
5981
      # bridge verification
5982
      bridge = nic.get("bridge", None)
5983
      link = nic.get("link", None)
5984
      if bridge and link:
5985
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5986
                                   " at the same time", errors.ECODE_INVAL)
5987
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5988
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5989
                                   errors.ECODE_INVAL)
5990
      elif bridge:
5991
        link = bridge
5992

    
5993
      nicparams = {}
5994
      if nic_mode_req:
5995
        nicparams[constants.NIC_MODE] = nic_mode_req
5996
      if link:
5997
        nicparams[constants.NIC_LINK] = link
5998

    
5999
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
6000
                                      nicparams)
6001
      objects.NIC.CheckParameterSyntax(check_params)
6002
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6003

    
6004
    # disk checks/pre-build
6005
    self.disks = []
6006
    for disk in self.op.disks:
6007
      mode = disk.get("mode", constants.DISK_RDWR)
6008
      if mode not in constants.DISK_ACCESS_SET:
6009
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6010
                                   mode, errors.ECODE_INVAL)
6011
      size = disk.get("size", None)
6012
      if size is None:
6013
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6014
      try:
6015
        size = int(size)
6016
      except (TypeError, ValueError):
6017
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6018
                                   errors.ECODE_INVAL)
6019
      new_disk = {"size": size, "mode": mode}
6020
      if "adopt" in disk:
6021
        new_disk["adopt"] = disk["adopt"]
6022
      self.disks.append(new_disk)
6023

    
6024
    # file storage checks
6025
    if (self.op.file_driver and
6026
        not self.op.file_driver in constants.FILE_DRIVER):
6027
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
6028
                                 self.op.file_driver, errors.ECODE_INVAL)
6029

    
6030
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6031
      raise errors.OpPrereqError("File storage directory path not absolute",
6032
                                 errors.ECODE_INVAL)
6033

    
6034
    ### Node/iallocator related checks
6035
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
6036
      raise errors.OpPrereqError("One and only one of iallocator and primary"
6037
                                 " node must be given",
6038
                                 errors.ECODE_INVAL)
6039

    
6040
    if self.op.iallocator:
6041
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6042
    else:
6043
      self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6044
      nodelist = [self.op.pnode]
6045
      if self.op.snode is not None:
6046
        self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6047
        nodelist.append(self.op.snode)
6048
      self.needed_locks[locking.LEVEL_NODE] = nodelist
6049

    
6050
    # in case of import lock the source node too
6051
    if self.op.mode == constants.INSTANCE_IMPORT:
6052
      src_node = getattr(self.op, "src_node", None)
6053
      src_path = getattr(self.op, "src_path", None)
6054

    
6055
      if src_path is None:
6056
        self.op.src_path = src_path = self.op.instance_name
6057

    
6058
      if src_node is None:
6059
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6060
        self.op.src_node = None
6061
        if os.path.isabs(src_path):
6062
          raise errors.OpPrereqError("Importing an instance from an absolute"
6063
                                     " path requires a source node option.",
6064
                                     errors.ECODE_INVAL)
6065
      else:
6066
        self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6067
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6068
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
6069
        if not os.path.isabs(src_path):
6070
          self.op.src_path = src_path = \
6071
            utils.PathJoin(constants.EXPORT_DIR, src_path)
6072

    
6073
      # On import force_variant must be True, because if we forced it at
6074
      # initial install, our only chance when importing it back is that it
6075
      # works again!
6076
      self.op.force_variant = True
6077

    
6078
      if self.op.no_install:
6079
        self.LogInfo("No-installation mode has no effect during import")
6080

    
6081
    else: # INSTANCE_CREATE
6082
      if getattr(self.op, "os_type", None) is None:
6083
        raise errors.OpPrereqError("No guest OS specified",
6084
                                   errors.ECODE_INVAL)
6085
      self.op.force_variant = getattr(self.op, "force_variant", False)
6086

    
6087
  def _RunAllocator(self):
6088
    """Run the allocator based on input opcode.
6089

6090
    """
6091
    nics = [n.ToDict() for n in self.nics]
6092
    ial = IAllocator(self.cfg, self.rpc,
6093
                     mode=constants.IALLOCATOR_MODE_ALLOC,
6094
                     name=self.op.instance_name,
6095
                     disk_template=self.op.disk_template,
6096
                     tags=[],
6097
                     os=self.op.os_type,
6098
                     vcpus=self.be_full[constants.BE_VCPUS],
6099
                     mem_size=self.be_full[constants.BE_MEMORY],
6100
                     disks=self.disks,
6101
                     nics=nics,
6102
                     hypervisor=self.op.hypervisor,
6103
                     )
6104

    
6105
    ial.Run(self.op.iallocator)
6106

    
6107
    if not ial.success:
6108
      raise errors.OpPrereqError("Can't compute nodes using"
6109
                                 " iallocator '%s': %s" %
6110
                                 (self.op.iallocator, ial.info),
6111
                                 errors.ECODE_NORES)
6112
    if len(ial.result) != ial.required_nodes:
6113
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6114
                                 " of nodes (%s), required %s" %
6115
                                 (self.op.iallocator, len(ial.result),
6116
                                  ial.required_nodes), errors.ECODE_FAULT)
6117
    self.op.pnode = ial.result[0]
6118
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6119
                 self.op.instance_name, self.op.iallocator,
6120
                 utils.CommaJoin(ial.result))
6121
    if ial.required_nodes == 2:
6122
      self.op.snode = ial.result[1]
6123

    
6124
  def BuildHooksEnv(self):
6125
    """Build hooks env.
6126

6127
    This runs on master, primary and secondary nodes of the instance.
6128

6129
    """
6130
    env = {
6131
      "ADD_MODE": self.op.mode,
6132
      }
6133
    if self.op.mode == constants.INSTANCE_IMPORT:
6134
      env["SRC_NODE"] = self.op.src_node
6135
      env["SRC_PATH"] = self.op.src_path
6136
      env["SRC_IMAGES"] = self.src_images
6137

    
6138
    env.update(_BuildInstanceHookEnv(
6139
      name=self.op.instance_name,
6140
      primary_node=self.op.pnode,
6141
      secondary_nodes=self.secondaries,
6142
      status=self.op.start,
6143
      os_type=self.op.os_type,
6144
      memory=self.be_full[constants.BE_MEMORY],
6145
      vcpus=self.be_full[constants.BE_VCPUS],
6146
      nics=_NICListToTuple(self, self.nics),
6147
      disk_template=self.op.disk_template,
6148
      disks=[(d["size"], d["mode"]) for d in self.disks],
6149
      bep=self.be_full,
6150
      hvp=self.hv_full,
6151
      hypervisor_name=self.op.hypervisor,
6152
    ))
6153

    
6154
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6155
          self.secondaries)
6156
    return env, nl, nl
6157

    
6158

    
6159
  def CheckPrereq(self):
6160
    """Check prerequisites.
6161

6162
    """
6163
    if (not self.cfg.GetVGName() and
6164
        self.op.disk_template not in constants.DTS_NOT_LVM):
6165
      raise errors.OpPrereqError("Cluster does not support lvm-based"
6166
                                 " instances", errors.ECODE_STATE)
6167

    
6168
    if self.op.mode == constants.INSTANCE_IMPORT:
6169
      src_node = self.op.src_node
6170
      src_path = self.op.src_path
6171

    
6172
      if src_node is None:
6173
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6174
        exp_list = self.rpc.call_export_list(locked_nodes)
6175
        found = False
6176
        for node in exp_list:
6177
          if exp_list[node].fail_msg:
6178
            continue
6179
          if src_path in exp_list[node].payload:
6180
            found = True
6181
            self.op.src_node = src_node = node
6182
            self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6183
                                                         src_path)
6184
            break
6185
        if not found:
6186
          raise errors.OpPrereqError("No export found for relative path %s" %
6187
                                      src_path, errors.ECODE_INVAL)
6188

    
6189
      _CheckNodeOnline(self, src_node)
6190
      result = self.rpc.call_export_info(src_node, src_path)
6191
      result.Raise("No export or invalid export found in dir %s" % src_path)
6192

    
6193
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6194
      if not export_info.has_section(constants.INISECT_EXP):
6195
        raise errors.ProgrammerError("Corrupted export config",
6196
                                     errors.ECODE_ENVIRON)
6197

    
6198
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
6199
      if (int(ei_version) != constants.EXPORT_VERSION):
6200
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6201
                                   (ei_version, constants.EXPORT_VERSION),
6202
                                   errors.ECODE_ENVIRON)
6203

    
6204
      # Check that the new instance doesn't have less disks than the export
6205
      instance_disks = len(self.disks)
6206
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6207
      if instance_disks < export_disks:
6208
        raise errors.OpPrereqError("Not enough disks to import."
6209
                                   " (instance: %d, export: %d)" %
6210
                                   (instance_disks, export_disks),
6211
                                   errors.ECODE_INVAL)
6212

    
6213
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6214
      disk_images = []
6215
      for idx in range(export_disks):
6216
        option = 'disk%d_dump' % idx
6217
        if export_info.has_option(constants.INISECT_INS, option):
6218
          # FIXME: are the old os-es, disk sizes, etc. useful?
6219
          export_name = export_info.get(constants.INISECT_INS, option)
6220
          image = utils.PathJoin(src_path, export_name)
6221
          disk_images.append(image)
6222
        else:
6223
          disk_images.append(False)
6224

    
6225
      self.src_images = disk_images
6226

    
6227
      old_name = export_info.get(constants.INISECT_INS, 'name')
6228
      # FIXME: int() here could throw a ValueError on broken exports
6229
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6230
      if self.op.instance_name == old_name:
6231
        for idx, nic in enumerate(self.nics):
6232
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6233
            nic_mac_ini = 'nic%d_mac' % idx
6234
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6235

    
6236
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6237

    
6238
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6239
    if self.op.ip_check:
6240
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6241
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6242
                                   (self.check_ip, self.op.instance_name),
6243
                                   errors.ECODE_NOTUNIQUE)
6244

    
6245
    #### mac address generation
6246
    # By generating here the mac address both the allocator and the hooks get
6247
    # the real final mac address rather than the 'auto' or 'generate' value.
6248
    # There is a race condition between the generation and the instance object
6249
    # creation, which means that we know the mac is valid now, but we're not
6250
    # sure it will be when we actually add the instance. If things go bad
6251
    # adding the instance will abort because of a duplicate mac, and the
6252
    # creation job will fail.
6253
    for nic in self.nics:
6254
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6255
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6256

    
6257
    #### allocator run
6258

    
6259
    if self.op.iallocator is not None:
6260
      self._RunAllocator()
6261

    
6262
    #### node related checks
6263

    
6264
    # check primary node
6265
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6266
    assert self.pnode is not None, \
6267
      "Cannot retrieve locked node %s" % self.op.pnode
6268
    if pnode.offline:
6269
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6270
                                 pnode.name, errors.ECODE_STATE)
6271
    if pnode.drained:
6272
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6273
                                 pnode.name, errors.ECODE_STATE)
6274

    
6275
    self.secondaries = []
6276

    
6277
    # mirror node verification
6278
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6279
      if self.op.snode is None:
6280
        raise errors.OpPrereqError("The networked disk templates need"
6281
                                   " a mirror node", errors.ECODE_INVAL)
6282
      if self.op.snode == pnode.name:
6283
        raise errors.OpPrereqError("The secondary node cannot be the"
6284
                                   " primary node.", errors.ECODE_INVAL)
6285
      _CheckNodeOnline(self, self.op.snode)
6286
      _CheckNodeNotDrained(self, self.op.snode)
6287
      self.secondaries.append(self.op.snode)
6288

    
6289
    nodenames = [pnode.name] + self.secondaries
6290

    
6291
    req_size = _ComputeDiskSize(self.op.disk_template,
6292
                                self.disks)
6293

    
6294
    # Check lv size requirements, if not adopting
6295
    if req_size is not None and not self.adopt_disks:
6296
      _CheckNodesFreeDisk(self, nodenames, req_size)
6297

    
6298
    if self.adopt_disks: # instead, we must check the adoption data
6299
      all_lvs = set([i["adopt"] for i in self.disks])
6300
      if len(all_lvs) != len(self.disks):
6301
        raise errors.OpPrereqError("Duplicate volume names given for adoption",
6302
                                   errors.ECODE_INVAL)
6303
      for lv_name in all_lvs:
6304
        try:
6305
          self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6306
        except errors.ReservationError:
6307
          raise errors.OpPrereqError("LV named %s used by another instance" %
6308
                                     lv_name, errors.ECODE_NOTUNIQUE)
6309

    
6310
      node_lvs = self.rpc.call_lv_list([pnode.name],
6311
                                       self.cfg.GetVGName())[pnode.name]
6312
      node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6313
      node_lvs = node_lvs.payload
6314
      delta = all_lvs.difference(node_lvs.keys())
6315
      if delta:
6316
        raise errors.OpPrereqError("Missing logical volume(s): %s" %
6317
                                   utils.CommaJoin(delta),
6318
                                   errors.ECODE_INVAL)
6319
      online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6320
      if online_lvs:
6321
        raise errors.OpPrereqError("Online logical volumes found, cannot"
6322
                                   " adopt: %s" % utils.CommaJoin(online_lvs),
6323
                                   errors.ECODE_STATE)
6324
      # update the size of disk based on what is found
6325
      for dsk in self.disks:
6326
        dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6327

    
6328
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6329

    
6330
    _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6331

    
6332
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6333

    
6334
    # memory check on primary node
6335
    if self.op.start:
6336
      _CheckNodeFreeMemory(self, self.pnode.name,
6337
                           "creating instance %s" % self.op.instance_name,
6338
                           self.be_full[constants.BE_MEMORY],
6339
                           self.op.hypervisor)
6340

    
6341
    self.dry_run_result = list(nodenames)
6342

    
6343
  def Exec(self, feedback_fn):
6344
    """Create and add the instance to the cluster.
6345

6346
    """
6347
    instance = self.op.instance_name
6348
    pnode_name = self.pnode.name
6349

    
6350
    ht_kind = self.op.hypervisor
6351
    if ht_kind in constants.HTS_REQ_PORT:
6352
      network_port = self.cfg.AllocatePort()
6353
    else:
6354
      network_port = None
6355

    
6356
    ##if self.op.vnc_bind_address is None:
6357
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6358

    
6359
    # this is needed because os.path.join does not accept None arguments
6360
    if self.op.file_storage_dir is None:
6361
      string_file_storage_dir = ""
6362
    else:
6363
      string_file_storage_dir = self.op.file_storage_dir
6364

    
6365
    # build the full file storage dir path
6366
    file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6367
                                      string_file_storage_dir, instance)
6368

    
6369

    
6370
    disks = _GenerateDiskTemplate(self,
6371
                                  self.op.disk_template,
6372
                                  instance, pnode_name,
6373
                                  self.secondaries,
6374
                                  self.disks,
6375
                                  file_storage_dir,
6376
                                  self.op.file_driver,
6377
                                  0)
6378

    
6379
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6380
                            primary_node=pnode_name,
6381
                            nics=self.nics, disks=disks,
6382
                            disk_template=self.op.disk_template,
6383
                            admin_up=False,
6384
                            network_port=network_port,
6385
                            beparams=self.op.beparams,
6386
                            hvparams=self.op.hvparams,
6387
                            hypervisor=self.op.hypervisor,
6388
                            )
6389

    
6390
    if self.adopt_disks:
6391
      # rename LVs to the newly-generated names; we need to construct
6392
      # 'fake' LV disks with the old data, plus the new unique_id
6393
      tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6394
      rename_to = []
6395
      for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6396
        rename_to.append(t_dsk.logical_id)
6397
        t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6398
        self.cfg.SetDiskID(t_dsk, pnode_name)
6399
      result = self.rpc.call_blockdev_rename(pnode_name,
6400
                                             zip(tmp_disks, rename_to))
6401
      result.Raise("Failed to rename adoped LVs")
6402
    else:
6403
      feedback_fn("* creating instance disks...")
6404
      try:
6405
        _CreateDisks(self, iobj)
6406
      except errors.OpExecError:
6407
        self.LogWarning("Device creation failed, reverting...")
6408
        try:
6409
          _RemoveDisks(self, iobj)
6410
        finally:
6411
          self.cfg.ReleaseDRBDMinors(instance)
6412
          raise
6413

    
6414
    feedback_fn("adding instance %s to cluster config" % instance)
6415

    
6416
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6417

    
6418
    # Declare that we don't want to remove the instance lock anymore, as we've
6419
    # added the instance to the config
6420
    del self.remove_locks[locking.LEVEL_INSTANCE]
6421
    # Unlock all the nodes
6422
    if self.op.mode == constants.INSTANCE_IMPORT:
6423
      nodes_keep = [self.op.src_node]
6424
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6425
                       if node != self.op.src_node]
6426
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6427
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6428
    else:
6429
      self.context.glm.release(locking.LEVEL_NODE)
6430
      del self.acquired_locks[locking.LEVEL_NODE]
6431

    
6432
    if self.op.wait_for_sync:
6433
      disk_abort = not _WaitForSync(self, iobj)
6434
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6435
      # make sure the disks are not degraded (still sync-ing is ok)
6436
      time.sleep(15)
6437
      feedback_fn("* checking mirrors status")
6438
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6439
    else:
6440
      disk_abort = False
6441

    
6442
    if disk_abort:
6443
      _RemoveDisks(self, iobj)
6444
      self.cfg.RemoveInstance(iobj.name)
6445
      # Make sure the instance lock gets removed
6446
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6447
      raise errors.OpExecError("There are some degraded disks for"
6448
                               " this instance")
6449

    
6450
    if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6451
      if self.op.mode == constants.INSTANCE_CREATE:
6452
        if not self.op.no_install:
6453
          feedback_fn("* running the instance OS create scripts...")
6454
          # FIXME: pass debug option from opcode to backend
6455
          result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6456
                                                 self.op.debug_level)
6457
          result.Raise("Could not add os for instance %s"
6458
                       " on node %s" % (instance, pnode_name))
6459

    
6460
      elif self.op.mode == constants.INSTANCE_IMPORT:
6461
        feedback_fn("* running the instance OS import scripts...")
6462
        src_node = self.op.src_node
6463
        src_images = self.src_images
6464
        cluster_name = self.cfg.GetClusterName()
6465
        # FIXME: pass debug option from opcode to backend
6466
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6467
                                                         src_node, src_images,
6468
                                                         cluster_name,
6469
                                                         self.op.debug_level)
6470
        msg = import_result.fail_msg
6471
        if msg:
6472
          self.LogWarning("Error while importing the disk images for instance"
6473
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6474
      else:
6475
        # also checked in the prereq part
6476
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6477
                                     % self.op.mode)
6478

    
6479
    if self.op.start:
6480
      iobj.admin_up = True
6481
      self.cfg.Update(iobj, feedback_fn)
6482
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6483
      feedback_fn("* starting instance...")
6484
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6485
      result.Raise("Could not start instance")
6486

    
6487
    return list(iobj.all_nodes)
6488

    
6489

    
6490
class LUConnectConsole(NoHooksLU):
6491
  """Connect to an instance's console.
6492

6493
  This is somewhat special in that it returns the command line that
6494
  you need to run on the master node in order to connect to the
6495
  console.
6496

6497
  """
6498
  _OP_REQP = ["instance_name"]
6499
  REQ_BGL = False
6500

    
6501
  def ExpandNames(self):
6502
    self._ExpandAndLockInstance()
6503

    
6504
  def CheckPrereq(self):
6505
    """Check prerequisites.
6506

6507
    This checks that the instance is in the cluster.
6508

6509
    """
6510
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6511
    assert self.instance is not None, \
6512
      "Cannot retrieve locked instance %s" % self.op.instance_name
6513
    _CheckNodeOnline(self, self.instance.primary_node)
6514

    
6515
  def Exec(self, feedback_fn):
6516
    """Connect to the console of an instance
6517

6518
    """
6519
    instance = self.instance
6520
    node = instance.primary_node
6521

    
6522
    node_insts = self.rpc.call_instance_list([node],
6523
                                             [instance.hypervisor])[node]
6524
    node_insts.Raise("Can't get node information from %s" % node)
6525

    
6526
    if instance.name not in node_insts.payload:
6527
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6528

    
6529
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6530

    
6531
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6532
    cluster = self.cfg.GetClusterInfo()
6533
    # beparams and hvparams are passed separately, to avoid editing the
6534
    # instance and then saving the defaults in the instance itself.
6535
    hvparams = cluster.FillHV(instance)
6536
    beparams = cluster.FillBE(instance)
6537
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6538

    
6539
    # build ssh cmdline
6540
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6541

    
6542

    
6543
class LUReplaceDisks(LogicalUnit):
6544
  """Replace the disks of an instance.
6545

6546
  """
6547
  HPATH = "mirrors-replace"
6548
  HTYPE = constants.HTYPE_INSTANCE
6549
  _OP_REQP = ["instance_name", "mode", "disks"]
6550
  REQ_BGL = False
6551

    
6552
  def CheckArguments(self):
6553
    if not hasattr(self.op, "remote_node"):
6554
      self.op.remote_node = None
6555
    if not hasattr(self.op, "iallocator"):
6556
      self.op.iallocator = None
6557
    if not hasattr(self.op, "early_release"):
6558
      self.op.early_release = False
6559

    
6560
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6561
                                  self.op.iallocator)
6562

    
6563
  def ExpandNames(self):
6564
    self._ExpandAndLockInstance()
6565

    
6566
    if self.op.iallocator is not None:
6567
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6568

    
6569
    elif self.op.remote_node is not None:
6570
      remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6571
      self.op.remote_node = remote_node
6572

    
6573
      # Warning: do not remove the locking of the new secondary here
6574
      # unless DRBD8.AddChildren is changed to work in parallel;
6575
      # currently it doesn't since parallel invocations of
6576
      # FindUnusedMinor will conflict
6577
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6578
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6579

    
6580
    else:
6581
      self.needed_locks[locking.LEVEL_NODE] = []
6582
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6583

    
6584
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6585
                                   self.op.iallocator, self.op.remote_node,
6586
                                   self.op.disks, False, self.op.early_release)
6587

    
6588
    self.tasklets = [self.replacer]
6589

    
6590
  def DeclareLocks(self, level):
6591
    # If we're not already locking all nodes in the set we have to declare the
6592
    # instance's primary/secondary nodes.
6593
    if (level == locking.LEVEL_NODE and
6594
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6595
      self._LockInstancesNodes()
6596

    
6597
  def BuildHooksEnv(self):
6598
    """Build hooks env.
6599

6600
    This runs on the master, the primary and all the secondaries.
6601

6602
    """
6603
    instance = self.replacer.instance
6604
    env = {
6605
      "MODE": self.op.mode,
6606
      "NEW_SECONDARY": self.op.remote_node,
6607
      "OLD_SECONDARY": instance.secondary_nodes[0],
6608
      }
6609
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6610
    nl = [
6611
      self.cfg.GetMasterNode(),
6612
      instance.primary_node,
6613
      ]
6614
    if self.op.remote_node is not None:
6615
      nl.append(self.op.remote_node)
6616
    return env, nl, nl
6617

    
6618

    
6619
class LUEvacuateNode(LogicalUnit):
6620
  """Relocate the secondary instances from a node.
6621

6622
  """
6623
  HPATH = "node-evacuate"
6624
  HTYPE = constants.HTYPE_NODE
6625
  _OP_REQP = ["node_name"]
6626
  REQ_BGL = False
6627

    
6628
  def CheckArguments(self):
6629
    if not hasattr(self.op, "remote_node"):
6630
      self.op.remote_node = None
6631
    if not hasattr(self.op, "iallocator"):
6632
      self.op.iallocator = None
6633
    if not hasattr(self.op, "early_release"):
6634
      self.op.early_release = False
6635

    
6636
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6637
                                  self.op.remote_node,
6638
                                  self.op.iallocator)
6639

    
6640
  def ExpandNames(self):
6641
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6642

    
6643
    self.needed_locks = {}
6644

    
6645
    # Declare node locks
6646
    if self.op.iallocator is not None:
6647
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6648

    
6649
    elif self.op.remote_node is not None:
6650
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
6651

    
6652
      # Warning: do not remove the locking of the new secondary here
6653
      # unless DRBD8.AddChildren is changed to work in parallel;
6654
      # currently it doesn't since parallel invocations of
6655
      # FindUnusedMinor will conflict
6656
      self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
6657
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6658

    
6659
    else:
6660
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6661

    
6662
    # Create tasklets for replacing disks for all secondary instances on this
6663
    # node
6664
    names = []
6665
    tasklets = []
6666

    
6667
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6668
      logging.debug("Replacing disks for instance %s", inst.name)
6669
      names.append(inst.name)
6670

    
6671
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6672
                                self.op.iallocator, self.op.remote_node, [],
6673
                                True, self.op.early_release)
6674
      tasklets.append(replacer)
6675

    
6676
    self.tasklets = tasklets
6677
    self.instance_names = names
6678

    
6679
    # Declare instance locks
6680
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6681

    
6682
  def DeclareLocks(self, level):
6683
    # If we're not already locking all nodes in the set we have to declare the
6684
    # instance's primary/secondary nodes.
6685
    if (level == locking.LEVEL_NODE and
6686
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6687
      self._LockInstancesNodes()
6688

    
6689
  def BuildHooksEnv(self):
6690
    """Build hooks env.
6691

6692
    This runs on the master, the primary and all the secondaries.
6693

6694
    """
6695
    env = {
6696
      "NODE_NAME": self.op.node_name,
6697
      }
6698

    
6699
    nl = [self.cfg.GetMasterNode()]
6700

    
6701
    if self.op.remote_node is not None:
6702
      env["NEW_SECONDARY"] = self.op.remote_node
6703
      nl.append(self.op.remote_node)
6704

    
6705
    return (env, nl, nl)
6706

    
6707

    
6708
class TLReplaceDisks(Tasklet):
6709
  """Replaces disks for an instance.
6710

6711
  Note: Locking is not within the scope of this class.
6712

6713
  """
6714
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6715
               disks, delay_iallocator, early_release):
6716
    """Initializes this class.
6717

6718
    """
6719
    Tasklet.__init__(self, lu)
6720

    
6721
    # Parameters
6722
    self.instance_name = instance_name
6723
    self.mode = mode
6724
    self.iallocator_name = iallocator_name
6725
    self.remote_node = remote_node
6726
    self.disks = disks
6727
    self.delay_iallocator = delay_iallocator
6728
    self.early_release = early_release
6729

    
6730
    # Runtime data
6731
    self.instance = None
6732
    self.new_node = None
6733
    self.target_node = None
6734
    self.other_node = None
6735
    self.remote_node_info = None
6736
    self.node_secondary_ip = None
6737

    
6738
  @staticmethod
6739
  def CheckArguments(mode, remote_node, iallocator):
6740
    """Helper function for users of this class.
6741

6742
    """
6743
    # check for valid parameter combination
6744
    if mode == constants.REPLACE_DISK_CHG:
6745
      if remote_node is None and iallocator is None:
6746
        raise errors.OpPrereqError("When changing the secondary either an"
6747
                                   " iallocator script must be used or the"
6748
                                   " new node given", errors.ECODE_INVAL)
6749

    
6750
      if remote_node is not None and iallocator is not None:
6751
        raise errors.OpPrereqError("Give either the iallocator or the new"
6752
                                   " secondary, not both", errors.ECODE_INVAL)
6753

    
6754
    elif remote_node is not None or iallocator is not None:
6755
      # Not replacing the secondary
6756
      raise errors.OpPrereqError("The iallocator and new node options can"
6757
                                 " only be used when changing the"
6758
                                 " secondary node", errors.ECODE_INVAL)
6759

    
6760
  @staticmethod
6761
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6762
    """Compute a new secondary node using an IAllocator.
6763

6764
    """
6765
    ial = IAllocator(lu.cfg, lu.rpc,
6766
                     mode=constants.IALLOCATOR_MODE_RELOC,
6767
                     name=instance_name,
6768
                     relocate_from=relocate_from)
6769

    
6770
    ial.Run(iallocator_name)
6771

    
6772
    if not ial.success:
6773
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6774
                                 " %s" % (iallocator_name, ial.info),
6775
                                 errors.ECODE_NORES)
6776

    
6777
    if len(ial.result) != ial.required_nodes:
6778
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6779
                                 " of nodes (%s), required %s" %
6780
                                 (iallocator_name,
6781
                                  len(ial.result), ial.required_nodes),
6782
                                 errors.ECODE_FAULT)
6783

    
6784
    remote_node_name = ial.result[0]
6785

    
6786
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6787
               instance_name, remote_node_name)
6788

    
6789
    return remote_node_name
6790

    
6791
  def _FindFaultyDisks(self, node_name):
6792
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6793
                                    node_name, True)
6794

    
6795
  def CheckPrereq(self):
6796
    """Check prerequisites.
6797

6798
    This checks that the instance is in the cluster.
6799

6800
    """
6801
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6802
    assert instance is not None, \
6803
      "Cannot retrieve locked instance %s" % self.instance_name
6804

    
6805
    if instance.disk_template != constants.DT_DRBD8:
6806
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6807
                                 " instances", errors.ECODE_INVAL)
6808

    
6809
    if len(instance.secondary_nodes) != 1:
6810
      raise errors.OpPrereqError("The instance has a strange layout,"
6811
                                 " expected one secondary but found %d" %
6812
                                 len(instance.secondary_nodes),
6813
                                 errors.ECODE_FAULT)
6814

    
6815
    if not self.delay_iallocator:
6816
      self._CheckPrereq2()
6817

    
6818
  def _CheckPrereq2(self):
6819
    """Check prerequisites, second part.
6820

6821
    This function should always be part of CheckPrereq. It was separated and is
6822
    now called from Exec because during node evacuation iallocator was only
6823
    called with an unmodified cluster model, not taking planned changes into
6824
    account.
6825

6826
    """
6827
    instance = self.instance
6828
    secondary_node = instance.secondary_nodes[0]
6829

    
6830
    if self.iallocator_name is None:
6831
      remote_node = self.remote_node
6832
    else:
6833
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6834
                                       instance.name, instance.secondary_nodes)
6835

    
6836
    if remote_node is not None:
6837
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6838
      assert self.remote_node_info is not None, \
6839
        "Cannot retrieve locked node %s" % remote_node
6840
    else:
6841
      self.remote_node_info = None
6842

    
6843
    if remote_node == self.instance.primary_node:
6844
      raise errors.OpPrereqError("The specified node is the primary node of"
6845
                                 " the instance.", errors.ECODE_INVAL)
6846

    
6847
    if remote_node == secondary_node:
6848
      raise errors.OpPrereqError("The specified node is already the"
6849
                                 " secondary node of the instance.",
6850
                                 errors.ECODE_INVAL)
6851

    
6852
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6853
                                    constants.REPLACE_DISK_CHG):
6854
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
6855
                                 errors.ECODE_INVAL)
6856

    
6857
    if self.mode == constants.REPLACE_DISK_AUTO:
6858
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6859
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6860

    
6861
      if faulty_primary and faulty_secondary:
6862
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6863
                                   " one node and can not be repaired"
6864
                                   " automatically" % self.instance_name,
6865
                                   errors.ECODE_STATE)
6866

    
6867
      if faulty_primary:
6868
        self.disks = faulty_primary
6869
        self.target_node = instance.primary_node
6870
        self.other_node = secondary_node
6871
        check_nodes = [self.target_node, self.other_node]
6872
      elif faulty_secondary:
6873
        self.disks = faulty_secondary
6874
        self.target_node = secondary_node
6875
        self.other_node = instance.primary_node
6876
        check_nodes = [self.target_node, self.other_node]
6877
      else:
6878
        self.disks = []
6879
        check_nodes = []
6880

    
6881
    else:
6882
      # Non-automatic modes
6883
      if self.mode == constants.REPLACE_DISK_PRI:
6884
        self.target_node = instance.primary_node
6885
        self.other_node = secondary_node
6886
        check_nodes = [self.target_node, self.other_node]
6887

    
6888
      elif self.mode == constants.REPLACE_DISK_SEC:
6889
        self.target_node = secondary_node
6890
        self.other_node = instance.primary_node
6891
        check_nodes = [self.target_node, self.other_node]
6892

    
6893
      elif self.mode == constants.REPLACE_DISK_CHG:
6894
        self.new_node = remote_node
6895
        self.other_node = instance.primary_node
6896
        self.target_node = secondary_node
6897
        check_nodes = [self.new_node, self.other_node]
6898

    
6899
        _CheckNodeNotDrained(self.lu, remote_node)
6900

    
6901
        old_node_info = self.cfg.GetNodeInfo(secondary_node)
6902
        assert old_node_info is not None
6903
        if old_node_info.offline and not self.early_release:
6904
          # doesn't make sense to delay the release
6905
          self.early_release = True
6906
          self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
6907
                          " early-release mode", secondary_node)
6908

    
6909
      else:
6910
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6911
                                     self.mode)
6912

    
6913
      # If not specified all disks should be replaced
6914
      if not self.disks:
6915
        self.disks = range(len(self.instance.disks))
6916

    
6917
    for node in check_nodes:
6918
      _CheckNodeOnline(self.lu, node)
6919

    
6920
    # Check whether disks are valid
6921
    for disk_idx in self.disks:
6922
      instance.FindDisk(disk_idx)
6923

    
6924
    # Get secondary node IP addresses
6925
    node_2nd_ip = {}
6926

    
6927
    for node_name in [self.target_node, self.other_node, self.new_node]:
6928
      if node_name is not None:
6929
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6930

    
6931
    self.node_secondary_ip = node_2nd_ip
6932

    
6933
  def Exec(self, feedback_fn):
6934
    """Execute disk replacement.
6935

6936
    This dispatches the disk replacement to the appropriate handler.
6937

6938
    """
6939
    if self.delay_iallocator:
6940
      self._CheckPrereq2()
6941

    
6942
    if not self.disks:
6943
      feedback_fn("No disks need replacement")
6944
      return
6945

    
6946
    feedback_fn("Replacing disk(s) %s for %s" %
6947
                (utils.CommaJoin(self.disks), self.instance.name))
6948

    
6949
    activate_disks = (not self.instance.admin_up)
6950

    
6951
    # Activate the instance disks if we're replacing them on a down instance
6952
    if activate_disks:
6953
      _StartInstanceDisks(self.lu, self.instance, True)
6954

    
6955
    try:
6956
      # Should we replace the secondary node?
6957
      if self.new_node is not None:
6958
        fn = self._ExecDrbd8Secondary
6959
      else:
6960
        fn = self._ExecDrbd8DiskOnly
6961

    
6962
      return fn(feedback_fn)
6963

    
6964
    finally:
6965
      # Deactivate the instance disks if we're replacing them on a
6966
      # down instance
6967
      if activate_disks:
6968
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6969

    
6970
  def _CheckVolumeGroup(self, nodes):
6971
    self.lu.LogInfo("Checking volume groups")
6972

    
6973
    vgname = self.cfg.GetVGName()
6974

    
6975
    # Make sure volume group exists on all involved nodes
6976
    results = self.rpc.call_vg_list(nodes)
6977
    if not results:
6978
      raise errors.OpExecError("Can't list volume groups on the nodes")
6979

    
6980
    for node in nodes:
6981
      res = results[node]
6982
      res.Raise("Error checking node %s" % node)
6983
      if vgname not in res.payload:
6984
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6985
                                 (vgname, node))
6986

    
6987
  def _CheckDisksExistence(self, nodes):
6988
    # Check disk existence
6989
    for idx, dev in enumerate(self.instance.disks):
6990
      if idx not in self.disks:
6991
        continue
6992

    
6993
      for node in nodes:
6994
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6995
        self.cfg.SetDiskID(dev, node)
6996

    
6997
        result = self.rpc.call_blockdev_find(node, dev)
6998

    
6999
        msg = result.fail_msg
7000
        if msg or not result.payload:
7001
          if not msg:
7002
            msg = "disk not found"
7003
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7004
                                   (idx, node, msg))
7005

    
7006
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7007
    for idx, dev in enumerate(self.instance.disks):
7008
      if idx not in self.disks:
7009
        continue
7010

    
7011
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7012
                      (idx, node_name))
7013

    
7014
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7015
                                   ldisk=ldisk):
7016
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7017
                                 " replace disks for instance %s" %
7018
                                 (node_name, self.instance.name))
7019

    
7020
  def _CreateNewStorage(self, node_name):
7021
    vgname = self.cfg.GetVGName()
7022
    iv_names = {}
7023

    
7024
    for idx, dev in enumerate(self.instance.disks):
7025
      if idx not in self.disks:
7026
        continue
7027

    
7028
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7029

    
7030
      self.cfg.SetDiskID(dev, node_name)
7031

    
7032
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7033
      names = _GenerateUniqueNames(self.lu, lv_names)
7034

    
7035
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7036
                             logical_id=(vgname, names[0]))
7037
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7038
                             logical_id=(vgname, names[1]))
7039

    
7040
      new_lvs = [lv_data, lv_meta]
7041
      old_lvs = dev.children
7042
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7043

    
7044
      # we pass force_create=True to force the LVM creation
7045
      for new_lv in new_lvs:
7046
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7047
                        _GetInstanceInfoText(self.instance), False)
7048

    
7049
    return iv_names
7050

    
7051
  def _CheckDevices(self, node_name, iv_names):
7052
    for name, (dev, _, _) in iv_names.iteritems():
7053
      self.cfg.SetDiskID(dev, node_name)
7054

    
7055
      result = self.rpc.call_blockdev_find(node_name, dev)
7056

    
7057
      msg = result.fail_msg
7058
      if msg or not result.payload:
7059
        if not msg:
7060
          msg = "disk not found"
7061
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
7062
                                 (name, msg))
7063

    
7064
      if result.payload.is_degraded:
7065
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
7066

    
7067
  def _RemoveOldStorage(self, node_name, iv_names):
7068
    for name, (_, old_lvs, _) in iv_names.iteritems():
7069
      self.lu.LogInfo("Remove logical volumes for %s" % name)
7070

    
7071
      for lv in old_lvs:
7072
        self.cfg.SetDiskID(lv, node_name)
7073

    
7074
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7075
        if msg:
7076
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
7077
                             hint="remove unused LVs manually")
7078

    
7079
  def _ReleaseNodeLock(self, node_name):
7080
    """Releases the lock for a given node."""
7081
    self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7082

    
7083
  def _ExecDrbd8DiskOnly(self, feedback_fn):
7084
    """Replace a disk on the primary or secondary for DRBD 8.
7085

7086
    The algorithm for replace is quite complicated:
7087

7088
      1. for each disk to be replaced:
7089

7090
        1. create new LVs on the target node with unique names
7091
        1. detach old LVs from the drbd device
7092
        1. rename old LVs to name_replaced.<time_t>
7093
        1. rename new LVs to old LVs
7094
        1. attach the new LVs (with the old names now) to the drbd device
7095

7096
      1. wait for sync across all devices
7097

7098
      1. for each modified disk:
7099

7100
        1. remove old LVs (which have the name name_replaces.<time_t>)
7101

7102
    Failures are not very well handled.
7103

7104
    """
7105
    steps_total = 6
7106

    
7107
    # Step: check device activation
7108
    self.lu.LogStep(1, steps_total, "Check device existence")
7109
    self._CheckDisksExistence([self.other_node, self.target_node])
7110
    self._CheckVolumeGroup([self.target_node, self.other_node])
7111

    
7112
    # Step: check other node consistency
7113
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7114
    self._CheckDisksConsistency(self.other_node,
7115
                                self.other_node == self.instance.primary_node,
7116
                                False)
7117

    
7118
    # Step: create new storage
7119
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7120
    iv_names = self._CreateNewStorage(self.target_node)
7121

    
7122
    # Step: for each lv, detach+rename*2+attach
7123
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7124
    for dev, old_lvs, new_lvs in iv_names.itervalues():
7125
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7126

    
7127
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7128
                                                     old_lvs)
7129
      result.Raise("Can't detach drbd from local storage on node"
7130
                   " %s for device %s" % (self.target_node, dev.iv_name))
7131
      #dev.children = []
7132
      #cfg.Update(instance)
7133

    
7134
      # ok, we created the new LVs, so now we know we have the needed
7135
      # storage; as such, we proceed on the target node to rename
7136
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7137
      # using the assumption that logical_id == physical_id (which in
7138
      # turn is the unique_id on that node)
7139

    
7140
      # FIXME(iustin): use a better name for the replaced LVs
7141
      temp_suffix = int(time.time())
7142
      ren_fn = lambda d, suff: (d.physical_id[0],
7143
                                d.physical_id[1] + "_replaced-%s" % suff)
7144

    
7145
      # Build the rename list based on what LVs exist on the node
7146
      rename_old_to_new = []
7147
      for to_ren in old_lvs:
7148
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7149
        if not result.fail_msg and result.payload:
7150
          # device exists
7151
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7152

    
7153
      self.lu.LogInfo("Renaming the old LVs on the target node")
7154
      result = self.rpc.call_blockdev_rename(self.target_node,
7155
                                             rename_old_to_new)
7156
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
7157

    
7158
      # Now we rename the new LVs to the old LVs
7159
      self.lu.LogInfo("Renaming the new LVs on the target node")
7160
      rename_new_to_old = [(new, old.physical_id)
7161
                           for old, new in zip(old_lvs, new_lvs)]
7162
      result = self.rpc.call_blockdev_rename(self.target_node,
7163
                                             rename_new_to_old)
7164
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
7165

    
7166
      for old, new in zip(old_lvs, new_lvs):
7167
        new.logical_id = old.logical_id
7168
        self.cfg.SetDiskID(new, self.target_node)
7169

    
7170
      for disk in old_lvs:
7171
        disk.logical_id = ren_fn(disk, temp_suffix)
7172
        self.cfg.SetDiskID(disk, self.target_node)
7173

    
7174
      # Now that the new lvs have the old name, we can add them to the device
7175
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7176
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7177
                                                  new_lvs)
7178
      msg = result.fail_msg
7179
      if msg:
7180
        for new_lv in new_lvs:
7181
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
7182
                                               new_lv).fail_msg
7183
          if msg2:
7184
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7185
                               hint=("cleanup manually the unused logical"
7186
                                     "volumes"))
7187
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7188

    
7189
      dev.children = new_lvs
7190

    
7191
      self.cfg.Update(self.instance, feedback_fn)
7192

    
7193
    cstep = 5
7194
    if self.early_release:
7195
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7196
      cstep += 1
7197
      self._RemoveOldStorage(self.target_node, iv_names)
7198
      # WARNING: we release both node locks here, do not do other RPCs
7199
      # than WaitForSync to the primary node
7200
      self._ReleaseNodeLock([self.target_node, self.other_node])
7201

    
7202
    # Wait for sync
7203
    # This can fail as the old devices are degraded and _WaitForSync
7204
    # does a combined result over all disks, so we don't check its return value
7205
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7206
    cstep += 1
7207
    _WaitForSync(self.lu, self.instance)
7208

    
7209
    # Check all devices manually
7210
    self._CheckDevices(self.instance.primary_node, iv_names)
7211

    
7212
    # Step: remove old storage
7213
    if not self.early_release:
7214
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7215
      cstep += 1
7216
      self._RemoveOldStorage(self.target_node, iv_names)
7217

    
7218
  def _ExecDrbd8Secondary(self, feedback_fn):
7219
    """Replace the secondary node for DRBD 8.
7220

7221
    The algorithm for replace is quite complicated:
7222
      - for all disks of the instance:
7223
        - create new LVs on the new node with same names
7224
        - shutdown the drbd device on the old secondary
7225
        - disconnect the drbd network on the primary
7226
        - create the drbd device on the new secondary
7227
        - network attach the drbd on the primary, using an artifice:
7228
          the drbd code for Attach() will connect to the network if it
7229
          finds a device which is connected to the good local disks but
7230
          not network enabled
7231
      - wait for sync across all devices
7232
      - remove all disks from the old secondary
7233

7234
    Failures are not very well handled.
7235

7236
    """
7237
    steps_total = 6
7238

    
7239
    # Step: check device activation
7240
    self.lu.LogStep(1, steps_total, "Check device existence")
7241
    self._CheckDisksExistence([self.instance.primary_node])
7242
    self._CheckVolumeGroup([self.instance.primary_node])
7243

    
7244
    # Step: check other node consistency
7245
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7246
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7247

    
7248
    # Step: create new storage
7249
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7250
    for idx, dev in enumerate(self.instance.disks):
7251
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7252
                      (self.new_node, idx))
7253
      # we pass force_create=True to force LVM creation
7254
      for new_lv in dev.children:
7255
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7256
                        _GetInstanceInfoText(self.instance), False)
7257

    
7258
    # Step 4: dbrd minors and drbd setups changes
7259
    # after this, we must manually remove the drbd minors on both the
7260
    # error and the success paths
7261
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7262
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7263
                                         for dev in self.instance.disks],
7264
                                        self.instance.name)
7265
    logging.debug("Allocated minors %r", minors)
7266

    
7267
    iv_names = {}
7268
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7269
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7270
                      (self.new_node, idx))
7271
      # create new devices on new_node; note that we create two IDs:
7272
      # one without port, so the drbd will be activated without
7273
      # networking information on the new node at this stage, and one
7274
      # with network, for the latter activation in step 4
7275
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7276
      if self.instance.primary_node == o_node1:
7277
        p_minor = o_minor1
7278
      else:
7279
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7280
        p_minor = o_minor2
7281

    
7282
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7283
                      p_minor, new_minor, o_secret)
7284
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7285
                    p_minor, new_minor, o_secret)
7286

    
7287
      iv_names[idx] = (dev, dev.children, new_net_id)
7288
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7289
                    new_net_id)
7290
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7291
                              logical_id=new_alone_id,
7292
                              children=dev.children,
7293
                              size=dev.size)
7294
      try:
7295
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7296
                              _GetInstanceInfoText(self.instance), False)
7297
      except errors.GenericError:
7298
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7299
        raise
7300

    
7301
    # We have new devices, shutdown the drbd on the old secondary
7302
    for idx, dev in enumerate(self.instance.disks):
7303
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7304
      self.cfg.SetDiskID(dev, self.target_node)
7305
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7306
      if msg:
7307
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7308
                           "node: %s" % (idx, msg),
7309
                           hint=("Please cleanup this device manually as"
7310
                                 " soon as possible"))
7311

    
7312
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7313
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7314
                                               self.node_secondary_ip,
7315
                                               self.instance.disks)\
7316
                                              [self.instance.primary_node]
7317

    
7318
    msg = result.fail_msg
7319
    if msg:
7320
      # detaches didn't succeed (unlikely)
7321
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7322
      raise errors.OpExecError("Can't detach the disks from the network on"
7323
                               " old node: %s" % (msg,))
7324

    
7325
    # if we managed to detach at least one, we update all the disks of
7326
    # the instance to point to the new secondary
7327
    self.lu.LogInfo("Updating instance configuration")
7328
    for dev, _, new_logical_id in iv_names.itervalues():
7329
      dev.logical_id = new_logical_id
7330
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7331

    
7332
    self.cfg.Update(self.instance, feedback_fn)
7333

    
7334
    # and now perform the drbd attach
7335
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7336
                    " (standalone => connected)")
7337
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7338
                                            self.new_node],
7339
                                           self.node_secondary_ip,
7340
                                           self.instance.disks,
7341
                                           self.instance.name,
7342
                                           False)
7343
    for to_node, to_result in result.items():
7344
      msg = to_result.fail_msg
7345
      if msg:
7346
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7347
                           to_node, msg,
7348
                           hint=("please do a gnt-instance info to see the"
7349
                                 " status of disks"))
7350
    cstep = 5
7351
    if self.early_release:
7352
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7353
      cstep += 1
7354
      self._RemoveOldStorage(self.target_node, iv_names)
7355
      # WARNING: we release all node locks here, do not do other RPCs
7356
      # than WaitForSync to the primary node
7357
      self._ReleaseNodeLock([self.instance.primary_node,
7358
                             self.target_node,
7359
                             self.new_node])
7360

    
7361
    # Wait for sync
7362
    # This can fail as the old devices are degraded and _WaitForSync
7363
    # does a combined result over all disks, so we don't check its return value
7364
    self.lu.LogStep(cstep, steps_total, "Sync devices")
7365
    cstep += 1
7366
    _WaitForSync(self.lu, self.instance)
7367

    
7368
    # Check all devices manually
7369
    self._CheckDevices(self.instance.primary_node, iv_names)
7370

    
7371
    # Step: remove old storage
7372
    if not self.early_release:
7373
      self.lu.LogStep(cstep, steps_total, "Removing old storage")
7374
      self._RemoveOldStorage(self.target_node, iv_names)
7375

    
7376

    
7377
class LURepairNodeStorage(NoHooksLU):
7378
  """Repairs the volume group on a node.
7379

7380
  """
7381
  _OP_REQP = ["node_name"]
7382
  REQ_BGL = False
7383

    
7384
  def CheckArguments(self):
7385
    self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7386

    
7387
  def ExpandNames(self):
7388
    self.needed_locks = {
7389
      locking.LEVEL_NODE: [self.op.node_name],
7390
      }
7391

    
7392
  def _CheckFaultyDisks(self, instance, node_name):
7393
    """Ensure faulty disks abort the opcode or at least warn."""
7394
    try:
7395
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7396
                                  node_name, True):
7397
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7398
                                   " node '%s'" % (instance.name, node_name),
7399
                                   errors.ECODE_STATE)
7400
    except errors.OpPrereqError, err:
7401
      if self.op.ignore_consistency:
7402
        self.proc.LogWarning(str(err.args[0]))
7403
      else:
7404
        raise
7405

    
7406
  def CheckPrereq(self):
7407
    """Check prerequisites.
7408

7409
    """
7410
    storage_type = self.op.storage_type
7411

    
7412
    if (constants.SO_FIX_CONSISTENCY not in
7413
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7414
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7415
                                 " repaired" % storage_type,
7416
                                 errors.ECODE_INVAL)
7417

    
7418
    # Check whether any instance on this node has faulty disks
7419
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7420
      if not inst.admin_up:
7421
        continue
7422
      check_nodes = set(inst.all_nodes)
7423
      check_nodes.discard(self.op.node_name)
7424
      for inst_node_name in check_nodes:
7425
        self._CheckFaultyDisks(inst, inst_node_name)
7426

    
7427
  def Exec(self, feedback_fn):
7428
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7429
                (self.op.name, self.op.node_name))
7430

    
7431
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7432
    result = self.rpc.call_storage_execute(self.op.node_name,
7433
                                           self.op.storage_type, st_args,
7434
                                           self.op.name,
7435
                                           constants.SO_FIX_CONSISTENCY)
7436
    result.Raise("Failed to repair storage unit '%s' on %s" %
7437
                 (self.op.name, self.op.node_name))
7438

    
7439

    
7440
class LUNodeEvacuationStrategy(NoHooksLU):
7441
  """Computes the node evacuation strategy.
7442

7443
  """
7444
  _OP_REQP = ["nodes"]
7445
  REQ_BGL = False
7446

    
7447
  def CheckArguments(self):
7448
    if not hasattr(self.op, "remote_node"):
7449
      self.op.remote_node = None
7450
    if not hasattr(self.op, "iallocator"):
7451
      self.op.iallocator = None
7452
    if self.op.remote_node is not None and self.op.iallocator is not None:
7453
      raise errors.OpPrereqError("Give either the iallocator or the new"
7454
                                 " secondary, not both", errors.ECODE_INVAL)
7455

    
7456
  def ExpandNames(self):
7457
    self.op.nodes = _GetWantedNodes(self, self.op.nodes)
7458
    self.needed_locks = locks = {}
7459
    if self.op.remote_node is None:
7460
      locks[locking.LEVEL_NODE] = locking.ALL_SET
7461
    else:
7462
      self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7463
      locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
7464

    
7465
  def CheckPrereq(self):
7466
    pass
7467

    
7468
  def Exec(self, feedback_fn):
7469
    if self.op.remote_node is not None:
7470
      instances = []
7471
      for node in self.op.nodes:
7472
        instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
7473
      result = []
7474
      for i in instances:
7475
        if i.primary_node == self.op.remote_node:
7476
          raise errors.OpPrereqError("Node %s is the primary node of"
7477
                                     " instance %s, cannot use it as"
7478
                                     " secondary" %
7479
                                     (self.op.remote_node, i.name),
7480
                                     errors.ECODE_INVAL)
7481
        result.append([i.name, self.op.remote_node])
7482
    else:
7483
      ial = IAllocator(self.cfg, self.rpc,
7484
                       mode=constants.IALLOCATOR_MODE_MEVAC,
7485
                       evac_nodes=self.op.nodes)
7486
      ial.Run(self.op.iallocator, validate=True)
7487
      if not ial.success:
7488
        raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
7489
                                 errors.ECODE_NORES)
7490
      result = ial.result
7491
    return result
7492

    
7493

    
7494
class LUGrowDisk(LogicalUnit):
7495
  """Grow a disk of an instance.
7496

7497
  """
7498
  HPATH = "disk-grow"
7499
  HTYPE = constants.HTYPE_INSTANCE
7500
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7501
  REQ_BGL = False
7502

    
7503
  def ExpandNames(self):
7504
    self._ExpandAndLockInstance()
7505
    self.needed_locks[locking.LEVEL_NODE] = []
7506
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7507

    
7508
  def DeclareLocks(self, level):
7509
    if level == locking.LEVEL_NODE:
7510
      self._LockInstancesNodes()
7511

    
7512
  def BuildHooksEnv(self):
7513
    """Build hooks env.
7514

7515
    This runs on the master, the primary and all the secondaries.
7516

7517
    """
7518
    env = {
7519
      "DISK": self.op.disk,
7520
      "AMOUNT": self.op.amount,
7521
      }
7522
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7523
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7524
    return env, nl, nl
7525

    
7526
  def CheckPrereq(self):
7527
    """Check prerequisites.
7528

7529
    This checks that the instance is in the cluster.
7530

7531
    """
7532
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7533
    assert instance is not None, \
7534
      "Cannot retrieve locked instance %s" % self.op.instance_name
7535
    nodenames = list(instance.all_nodes)
7536
    for node in nodenames:
7537
      _CheckNodeOnline(self, node)
7538

    
7539

    
7540
    self.instance = instance
7541

    
7542
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7543
      raise errors.OpPrereqError("Instance's disk layout does not support"
7544
                                 " growing.", errors.ECODE_INVAL)
7545

    
7546
    self.disk = instance.FindDisk(self.op.disk)
7547

    
7548
    _CheckNodesFreeDisk(self, nodenames, self.op.amount)
7549

    
7550
  def Exec(self, feedback_fn):
7551
    """Execute disk grow.
7552

7553
    """
7554
    instance = self.instance
7555
    disk = self.disk
7556
    for node in instance.all_nodes:
7557
      self.cfg.SetDiskID(disk, node)
7558
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7559
      result.Raise("Grow request failed to node %s" % node)
7560

    
7561
      # TODO: Rewrite code to work properly
7562
      # DRBD goes into sync mode for a short amount of time after executing the
7563
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7564
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7565
      # time is a work-around.
7566
      time.sleep(5)
7567

    
7568
    disk.RecordGrow(self.op.amount)
7569
    self.cfg.Update(instance, feedback_fn)
7570
    if self.op.wait_for_sync:
7571
      disk_abort = not _WaitForSync(self, instance)
7572
      if disk_abort:
7573
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7574
                             " status.\nPlease check the instance.")
7575

    
7576

    
7577
class LUQueryInstanceData(NoHooksLU):
7578
  """Query runtime instance data.
7579

7580
  """
7581
  _OP_REQP = ["instances", "static"]
7582
  REQ_BGL = False
7583

    
7584
  def ExpandNames(self):
7585
    self.needed_locks = {}
7586
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7587

    
7588
    if not isinstance(self.op.instances, list):
7589
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7590
                                 errors.ECODE_INVAL)
7591

    
7592
    if self.op.instances:
7593
      self.wanted_names = []
7594
      for name in self.op.instances:
7595
        full_name = _ExpandInstanceName(self.cfg, name)
7596
        self.wanted_names.append(full_name)
7597
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7598
    else:
7599
      self.wanted_names = None
7600
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7601

    
7602
    self.needed_locks[locking.LEVEL_NODE] = []
7603
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7604

    
7605
  def DeclareLocks(self, level):
7606
    if level == locking.LEVEL_NODE:
7607
      self._LockInstancesNodes()
7608

    
7609
  def CheckPrereq(self):
7610
    """Check prerequisites.
7611

7612
    This only checks the optional instance list against the existing names.
7613

7614
    """
7615
    if self.wanted_names is None:
7616
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7617

    
7618
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7619
                             in self.wanted_names]
7620
    return
7621

    
7622
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7623
    """Returns the status of a block device
7624

7625
    """
7626
    if self.op.static or not node:
7627
      return None
7628

    
7629
    self.cfg.SetDiskID(dev, node)
7630

    
7631
    result = self.rpc.call_blockdev_find(node, dev)
7632
    if result.offline:
7633
      return None
7634

    
7635
    result.Raise("Can't compute disk status for %s" % instance_name)
7636

    
7637
    status = result.payload
7638
    if status is None:
7639
      return None
7640

    
7641
    return (status.dev_path, status.major, status.minor,
7642
            status.sync_percent, status.estimated_time,
7643
            status.is_degraded, status.ldisk_status)
7644

    
7645
  def _ComputeDiskStatus(self, instance, snode, dev):
7646
    """Compute block device status.
7647

7648
    """
7649
    if dev.dev_type in constants.LDS_DRBD:
7650
      # we change the snode then (otherwise we use the one passed in)
7651
      if dev.logical_id[0] == instance.primary_node:
7652
        snode = dev.logical_id[1]
7653
      else:
7654
        snode = dev.logical_id[0]
7655

    
7656
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7657
                                              instance.name, dev)
7658
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7659

    
7660
    if dev.children:
7661
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7662
                      for child in dev.children]
7663
    else:
7664
      dev_children = []
7665

    
7666
    data = {
7667
      "iv_name": dev.iv_name,
7668
      "dev_type": dev.dev_type,
7669
      "logical_id": dev.logical_id,
7670
      "physical_id": dev.physical_id,
7671
      "pstatus": dev_pstatus,
7672
      "sstatus": dev_sstatus,
7673
      "children": dev_children,
7674
      "mode": dev.mode,
7675
      "size": dev.size,
7676
      }
7677

    
7678
    return data
7679

    
7680
  def Exec(self, feedback_fn):
7681
    """Gather and return data"""
7682
    result = {}
7683

    
7684
    cluster = self.cfg.GetClusterInfo()
7685

    
7686
    for instance in self.wanted_instances:
7687
      if not self.op.static:
7688
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7689
                                                  instance.name,
7690
                                                  instance.hypervisor)
7691
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7692
        remote_info = remote_info.payload
7693
        if remote_info and "state" in remote_info:
7694
          remote_state = "up"
7695
        else:
7696
          remote_state = "down"
7697
      else:
7698
        remote_state = None
7699
      if instance.admin_up:
7700
        config_state = "up"
7701
      else:
7702
        config_state = "down"
7703

    
7704
      disks = [self._ComputeDiskStatus(instance, None, device)
7705
               for device in instance.disks]
7706

    
7707
      idict = {
7708
        "name": instance.name,
7709
        "config_state": config_state,
7710
        "run_state": remote_state,
7711
        "pnode": instance.primary_node,
7712
        "snodes": instance.secondary_nodes,
7713
        "os": instance.os,
7714
        # this happens to be the same format used for hooks
7715
        "nics": _NICListToTuple(self, instance.nics),
7716
        "disks": disks,
7717
        "hypervisor": instance.hypervisor,
7718
        "network_port": instance.network_port,
7719
        "hv_instance": instance.hvparams,
7720
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
7721
        "be_instance": instance.beparams,
7722
        "be_actual": cluster.FillBE(instance),
7723
        "serial_no": instance.serial_no,
7724
        "mtime": instance.mtime,
7725
        "ctime": instance.ctime,
7726
        "uuid": instance.uuid,
7727
        }
7728

    
7729
      result[instance.name] = idict
7730

    
7731
    return result
7732

    
7733

    
7734
class LUSetInstanceParams(LogicalUnit):
7735
  """Modifies an instances's parameters.
7736

7737
  """
7738
  HPATH = "instance-modify"
7739
  HTYPE = constants.HTYPE_INSTANCE
7740
  _OP_REQP = ["instance_name"]
7741
  REQ_BGL = False
7742

    
7743
  def CheckArguments(self):
7744
    if not hasattr(self.op, 'nics'):
7745
      self.op.nics = []
7746
    if not hasattr(self.op, 'disks'):
7747
      self.op.disks = []
7748
    if not hasattr(self.op, 'beparams'):
7749
      self.op.beparams = {}
7750
    if not hasattr(self.op, 'hvparams'):
7751
      self.op.hvparams = {}
7752
    if not hasattr(self.op, "disk_template"):
7753
      self.op.disk_template = None
7754
    if not hasattr(self.op, "remote_node"):
7755
      self.op.remote_node = None
7756
    if not hasattr(self.op, "os_name"):
7757
      self.op.os_name = None
7758
    if not hasattr(self.op, "force_variant"):
7759
      self.op.force_variant = False
7760
    self.op.force = getattr(self.op, "force", False)
7761
    if not (self.op.nics or self.op.disks or self.op.disk_template or
7762
            self.op.hvparams or self.op.beparams or self.op.os_name):
7763
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7764

    
7765
    if self.op.hvparams:
7766
      _CheckGlobalHvParams(self.op.hvparams)
7767

    
7768
    # Disk validation
7769
    disk_addremove = 0
7770
    for disk_op, disk_dict in self.op.disks:
7771
      if disk_op == constants.DDM_REMOVE:
7772
        disk_addremove += 1
7773
        continue
7774
      elif disk_op == constants.DDM_ADD:
7775
        disk_addremove += 1
7776
      else:
7777
        if not isinstance(disk_op, int):
7778
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7779
        if not isinstance(disk_dict, dict):
7780
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7781
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7782

    
7783
      if disk_op == constants.DDM_ADD:
7784
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7785
        if mode not in constants.DISK_ACCESS_SET:
7786
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7787
                                     errors.ECODE_INVAL)
7788
        size = disk_dict.get('size', None)
7789
        if size is None:
7790
          raise errors.OpPrereqError("Required disk parameter size missing",
7791
                                     errors.ECODE_INVAL)
7792
        try:
7793
          size = int(size)
7794
        except (TypeError, ValueError), err:
7795
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7796
                                     str(err), errors.ECODE_INVAL)
7797
        disk_dict['size'] = size
7798
      else:
7799
        # modification of disk
7800
        if 'size' in disk_dict:
7801
          raise errors.OpPrereqError("Disk size change not possible, use"
7802
                                     " grow-disk", errors.ECODE_INVAL)
7803

    
7804
    if disk_addremove > 1:
7805
      raise errors.OpPrereqError("Only one disk add or remove operation"
7806
                                 " supported at a time", errors.ECODE_INVAL)
7807

    
7808
    if self.op.disks and self.op.disk_template is not None:
7809
      raise errors.OpPrereqError("Disk template conversion and other disk"
7810
                                 " changes not supported at the same time",
7811
                                 errors.ECODE_INVAL)
7812

    
7813
    if self.op.disk_template:
7814
      _CheckDiskTemplate(self.op.disk_template)
7815
      if (self.op.disk_template in constants.DTS_NET_MIRROR and
7816
          self.op.remote_node is None):
7817
        raise errors.OpPrereqError("Changing the disk template to a mirrored"
7818
                                   " one requires specifying a secondary node",
7819
                                   errors.ECODE_INVAL)
7820

    
7821
    # NIC validation
7822
    nic_addremove = 0
7823
    for nic_op, nic_dict in self.op.nics:
7824
      if nic_op == constants.DDM_REMOVE:
7825
        nic_addremove += 1
7826
        continue
7827
      elif nic_op == constants.DDM_ADD:
7828
        nic_addremove += 1
7829
      else:
7830
        if not isinstance(nic_op, int):
7831
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7832
        if not isinstance(nic_dict, dict):
7833
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7834
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7835

    
7836
      # nic_dict should be a dict
7837
      nic_ip = nic_dict.get('ip', None)
7838
      if nic_ip is not None:
7839
        if nic_ip.lower() == constants.VALUE_NONE:
7840
          nic_dict['ip'] = None
7841
        else:
7842
          if not utils.IsValidIP(nic_ip):
7843
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7844
                                       errors.ECODE_INVAL)
7845

    
7846
      nic_bridge = nic_dict.get('bridge', None)
7847
      nic_link = nic_dict.get('link', None)
7848
      if nic_bridge and nic_link:
7849
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7850
                                   " at the same time", errors.ECODE_INVAL)
7851
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7852
        nic_dict['bridge'] = None
7853
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7854
        nic_dict['link'] = None
7855

    
7856
      if nic_op == constants.DDM_ADD:
7857
        nic_mac = nic_dict.get('mac', None)
7858
        if nic_mac is None:
7859
          nic_dict['mac'] = constants.VALUE_AUTO
7860

    
7861
      if 'mac' in nic_dict:
7862
        nic_mac = nic_dict['mac']
7863
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7864
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7865

    
7866
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7867
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7868
                                     " modifying an existing nic",
7869
                                     errors.ECODE_INVAL)
7870

    
7871
    if nic_addremove > 1:
7872
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7873
                                 " supported at a time", errors.ECODE_INVAL)
7874

    
7875
  def ExpandNames(self):
7876
    self._ExpandAndLockInstance()
7877
    self.needed_locks[locking.LEVEL_NODE] = []
7878
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7879

    
7880
  def DeclareLocks(self, level):
7881
    if level == locking.LEVEL_NODE:
7882
      self._LockInstancesNodes()
7883
      if self.op.disk_template and self.op.remote_node:
7884
        self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7885
        self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
7886

    
7887
  def BuildHooksEnv(self):
7888
    """Build hooks env.
7889

7890
    This runs on the master, primary and secondaries.
7891

7892
    """
7893
    args = dict()
7894
    if constants.BE_MEMORY in self.be_new:
7895
      args['memory'] = self.be_new[constants.BE_MEMORY]
7896
    if constants.BE_VCPUS in self.be_new:
7897
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7898
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7899
    # information at all.
7900
    if self.op.nics:
7901
      args['nics'] = []
7902
      nic_override = dict(self.op.nics)
7903
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7904
      for idx, nic in enumerate(self.instance.nics):
7905
        if idx in nic_override:
7906
          this_nic_override = nic_override[idx]
7907
        else:
7908
          this_nic_override = {}
7909
        if 'ip' in this_nic_override:
7910
          ip = this_nic_override['ip']
7911
        else:
7912
          ip = nic.ip
7913
        if 'mac' in this_nic_override:
7914
          mac = this_nic_override['mac']
7915
        else:
7916
          mac = nic.mac
7917
        if idx in self.nic_pnew:
7918
          nicparams = self.nic_pnew[idx]
7919
        else:
7920
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7921
        mode = nicparams[constants.NIC_MODE]
7922
        link = nicparams[constants.NIC_LINK]
7923
        args['nics'].append((ip, mac, mode, link))
7924
      if constants.DDM_ADD in nic_override:
7925
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7926
        mac = nic_override[constants.DDM_ADD]['mac']
7927
        nicparams = self.nic_pnew[constants.DDM_ADD]
7928
        mode = nicparams[constants.NIC_MODE]
7929
        link = nicparams[constants.NIC_LINK]
7930
        args['nics'].append((ip, mac, mode, link))
7931
      elif constants.DDM_REMOVE in nic_override:
7932
        del args['nics'][-1]
7933

    
7934
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7935
    if self.op.disk_template:
7936
      env["NEW_DISK_TEMPLATE"] = self.op.disk_template
7937
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7938
    return env, nl, nl
7939

    
7940
  @staticmethod
7941
  def _GetUpdatedParams(old_params, update_dict,
7942
                        default_values, parameter_types):
7943
    """Return the new params dict for the given params.
7944

7945
    @type old_params: dict
7946
    @param old_params: old parameters
7947
    @type update_dict: dict
7948
    @param update_dict: dict containing new parameter values,
7949
                        or constants.VALUE_DEFAULT to reset the
7950
                        parameter to its default value
7951
    @type default_values: dict
7952
    @param default_values: default values for the filled parameters
7953
    @type parameter_types: dict
7954
    @param parameter_types: dict mapping target dict keys to types
7955
                            in constants.ENFORCEABLE_TYPES
7956
    @rtype: (dict, dict)
7957
    @return: (new_parameters, filled_parameters)
7958

7959
    """
7960
    params_copy = copy.deepcopy(old_params)
7961
    for key, val in update_dict.iteritems():
7962
      if val == constants.VALUE_DEFAULT:
7963
        try:
7964
          del params_copy[key]
7965
        except KeyError:
7966
          pass
7967
      else:
7968
        params_copy[key] = val
7969
    utils.ForceDictType(params_copy, parameter_types)
7970
    params_filled = objects.FillDict(default_values, params_copy)
7971
    return (params_copy, params_filled)
7972

    
7973
  def CheckPrereq(self):
7974
    """Check prerequisites.
7975

7976
    This only checks the instance list against the existing names.
7977

7978
    """
7979
    self.force = self.op.force
7980

    
7981
    # checking the new params on the primary/secondary nodes
7982

    
7983
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7984
    cluster = self.cluster = self.cfg.GetClusterInfo()
7985
    assert self.instance is not None, \
7986
      "Cannot retrieve locked instance %s" % self.op.instance_name
7987
    pnode = instance.primary_node
7988
    nodelist = list(instance.all_nodes)
7989

    
7990
    if self.op.disk_template:
7991
      if instance.disk_template == self.op.disk_template:
7992
        raise errors.OpPrereqError("Instance already has disk template %s" %
7993
                                   instance.disk_template, errors.ECODE_INVAL)
7994

    
7995
      if (instance.disk_template,
7996
          self.op.disk_template) not in self._DISK_CONVERSIONS:
7997
        raise errors.OpPrereqError("Unsupported disk template conversion from"
7998
                                   " %s to %s" % (instance.disk_template,
7999
                                                  self.op.disk_template),
8000
                                   errors.ECODE_INVAL)
8001
      if self.op.disk_template in constants.DTS_NET_MIRROR:
8002
        _CheckNodeOnline(self, self.op.remote_node)
8003
        _CheckNodeNotDrained(self, self.op.remote_node)
8004
        disks = [{"size": d.size} for d in instance.disks]
8005
        required = _ComputeDiskSize(self.op.disk_template, disks)
8006
        _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8007
        _CheckInstanceDown(self, instance, "cannot change disk template")
8008

    
8009
    # hvparams processing
8010
    if self.op.hvparams:
8011
      i_hvdict, hv_new = self._GetUpdatedParams(
8012
                             instance.hvparams, self.op.hvparams,
8013
                             cluster.hvparams[instance.hypervisor],
8014
                             constants.HVS_PARAMETER_TYPES)
8015
      # local check
8016
      hypervisor.GetHypervisor(
8017
        instance.hypervisor).CheckParameterSyntax(hv_new)
8018
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8019
      self.hv_new = hv_new # the new actual values
8020
      self.hv_inst = i_hvdict # the new dict (without defaults)
8021
    else:
8022
      self.hv_new = self.hv_inst = {}
8023

    
8024
    # beparams processing
8025
    if self.op.beparams:
8026
      i_bedict, be_new = self._GetUpdatedParams(
8027
                             instance.beparams, self.op.beparams,
8028
                             cluster.beparams[constants.PP_DEFAULT],
8029
                             constants.BES_PARAMETER_TYPES)
8030
      self.be_new = be_new # the new actual values
8031
      self.be_inst = i_bedict # the new dict (without defaults)
8032
    else:
8033
      self.be_new = self.be_inst = {}
8034

    
8035
    self.warn = []
8036

    
8037
    if constants.BE_MEMORY in self.op.beparams and not self.force:
8038
      mem_check_list = [pnode]
8039
      if be_new[constants.BE_AUTO_BALANCE]:
8040
        # either we changed auto_balance to yes or it was from before
8041
        mem_check_list.extend(instance.secondary_nodes)
8042
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
8043
                                                  instance.hypervisor)
8044
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8045
                                         instance.hypervisor)
8046
      pninfo = nodeinfo[pnode]
8047
      msg = pninfo.fail_msg
8048
      if msg:
8049
        # Assume the primary node is unreachable and go ahead
8050
        self.warn.append("Can't get info from primary node %s: %s" %
8051
                         (pnode,  msg))
8052
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
8053
        self.warn.append("Node data from primary node %s doesn't contain"
8054
                         " free memory information" % pnode)
8055
      elif instance_info.fail_msg:
8056
        self.warn.append("Can't get instance runtime information: %s" %
8057
                        instance_info.fail_msg)
8058
      else:
8059
        if instance_info.payload:
8060
          current_mem = int(instance_info.payload['memory'])
8061
        else:
8062
          # Assume instance not running
8063
          # (there is a slight race condition here, but it's not very probable,
8064
          # and we have no other way to check)
8065
          current_mem = 0
8066
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8067
                    pninfo.payload['memory_free'])
8068
        if miss_mem > 0:
8069
          raise errors.OpPrereqError("This change will prevent the instance"
8070
                                     " from starting, due to %d MB of memory"
8071
                                     " missing on its primary node" % miss_mem,
8072
                                     errors.ECODE_NORES)
8073

    
8074
      if be_new[constants.BE_AUTO_BALANCE]:
8075
        for node, nres in nodeinfo.items():
8076
          if node not in instance.secondary_nodes:
8077
            continue
8078
          msg = nres.fail_msg
8079
          if msg:
8080
            self.warn.append("Can't get info from secondary node %s: %s" %
8081
                             (node, msg))
8082
          elif not isinstance(nres.payload.get('memory_free', None), int):
8083
            self.warn.append("Secondary node %s didn't return free"
8084
                             " memory information" % node)
8085
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8086
            self.warn.append("Not enough memory to failover instance to"
8087
                             " secondary node %s" % node)
8088

    
8089
    # NIC processing
8090
    self.nic_pnew = {}
8091
    self.nic_pinst = {}
8092
    for nic_op, nic_dict in self.op.nics:
8093
      if nic_op == constants.DDM_REMOVE:
8094
        if not instance.nics:
8095
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8096
                                     errors.ECODE_INVAL)
8097
        continue
8098
      if nic_op != constants.DDM_ADD:
8099
        # an existing nic
8100
        if not instance.nics:
8101
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8102
                                     " no NICs" % nic_op,
8103
                                     errors.ECODE_INVAL)
8104
        if nic_op < 0 or nic_op >= len(instance.nics):
8105
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8106
                                     " are 0 to %d" %
8107
                                     (nic_op, len(instance.nics) - 1),
8108
                                     errors.ECODE_INVAL)
8109
        old_nic_params = instance.nics[nic_op].nicparams
8110
        old_nic_ip = instance.nics[nic_op].ip
8111
      else:
8112
        old_nic_params = {}
8113
        old_nic_ip = None
8114

    
8115
      update_params_dict = dict([(key, nic_dict[key])
8116
                                 for key in constants.NICS_PARAMETERS
8117
                                 if key in nic_dict])
8118

    
8119
      if 'bridge' in nic_dict:
8120
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8121

    
8122
      new_nic_params, new_filled_nic_params = \
8123
          self._GetUpdatedParams(old_nic_params, update_params_dict,
8124
                                 cluster.nicparams[constants.PP_DEFAULT],
8125
                                 constants.NICS_PARAMETER_TYPES)
8126
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8127
      self.nic_pinst[nic_op] = new_nic_params
8128
      self.nic_pnew[nic_op] = new_filled_nic_params
8129
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8130

    
8131
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
8132
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8133
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8134
        if msg:
8135
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8136
          if self.force:
8137
            self.warn.append(msg)
8138
          else:
8139
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8140
      if new_nic_mode == constants.NIC_MODE_ROUTED:
8141
        if 'ip' in nic_dict:
8142
          nic_ip = nic_dict['ip']
8143
        else:
8144
          nic_ip = old_nic_ip
8145
        if nic_ip is None:
8146
          raise errors.OpPrereqError('Cannot set the nic ip to None'
8147
                                     ' on a routed nic', errors.ECODE_INVAL)
8148
      if 'mac' in nic_dict:
8149
        nic_mac = nic_dict['mac']
8150
        if nic_mac is None:
8151
          raise errors.OpPrereqError('Cannot set the nic mac to None',
8152
                                     errors.ECODE_INVAL)
8153
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8154
          # otherwise generate the mac
8155
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8156
        else:
8157
          # or validate/reserve the current one
8158
          try:
8159
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8160
          except errors.ReservationError:
8161
            raise errors.OpPrereqError("MAC address %s already in use"
8162
                                       " in cluster" % nic_mac,
8163
                                       errors.ECODE_NOTUNIQUE)
8164

    
8165
    # DISK processing
8166
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8167
      raise errors.OpPrereqError("Disk operations not supported for"
8168
                                 " diskless instances",
8169
                                 errors.ECODE_INVAL)
8170
    for disk_op, _ in self.op.disks:
8171
      if disk_op == constants.DDM_REMOVE:
8172
        if len(instance.disks) == 1:
8173
          raise errors.OpPrereqError("Cannot remove the last disk of"
8174
                                     " an instance", errors.ECODE_INVAL)
8175
        _CheckInstanceDown(self, instance, "cannot remove disks")
8176

    
8177
      if (disk_op == constants.DDM_ADD and
8178
          len(instance.nics) >= constants.MAX_DISKS):
8179
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8180
                                   " add more" % constants.MAX_DISKS,
8181
                                   errors.ECODE_STATE)
8182
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8183
        # an existing disk
8184
        if disk_op < 0 or disk_op >= len(instance.disks):
8185
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
8186
                                     " are 0 to %d" %
8187
                                     (disk_op, len(instance.disks)),
8188
                                     errors.ECODE_INVAL)
8189

    
8190
    # OS change
8191
    if self.op.os_name and not self.op.force:
8192
      _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8193
                      self.op.force_variant)
8194

    
8195
    return
8196

    
8197
  def _ConvertPlainToDrbd(self, feedback_fn):
8198
    """Converts an instance from plain to drbd.
8199

8200
    """
8201
    feedback_fn("Converting template to drbd")
8202
    instance = self.instance
8203
    pnode = instance.primary_node
8204
    snode = self.op.remote_node
8205

    
8206
    # create a fake disk info for _GenerateDiskTemplate
8207
    disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8208
    new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8209
                                      instance.name, pnode, [snode],
8210
                                      disk_info, None, None, 0)
8211
    info = _GetInstanceInfoText(instance)
8212
    feedback_fn("Creating aditional volumes...")
8213
    # first, create the missing data and meta devices
8214
    for disk in new_disks:
8215
      # unfortunately this is... not too nice
8216
      _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8217
                            info, True)
8218
      for child in disk.children:
8219
        _CreateSingleBlockDev(self, snode, instance, child, info, True)
8220
    # at this stage, all new LVs have been created, we can rename the
8221
    # old ones
8222
    feedback_fn("Renaming original volumes...")
8223
    rename_list = [(o, n.children[0].logical_id)
8224
                   for (o, n) in zip(instance.disks, new_disks)]
8225
    result = self.rpc.call_blockdev_rename(pnode, rename_list)
8226
    result.Raise("Failed to rename original LVs")
8227

    
8228
    feedback_fn("Initializing DRBD devices...")
8229
    # all child devices are in place, we can now create the DRBD devices
8230
    for disk in new_disks:
8231
      for node in [pnode, snode]:
8232
        f_create = node == pnode
8233
        _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8234

    
8235
    # at this point, the instance has been modified
8236
    instance.disk_template = constants.DT_DRBD8
8237
    instance.disks = new_disks
8238
    self.cfg.Update(instance, feedback_fn)
8239

    
8240
    # disks are created, waiting for sync
8241
    disk_abort = not _WaitForSync(self, instance)
8242
    if disk_abort:
8243
      raise errors.OpExecError("There are some degraded disks for"
8244
                               " this instance, please cleanup manually")
8245

    
8246
  def _ConvertDrbdToPlain(self, feedback_fn):
8247
    """Converts an instance from drbd to plain.
8248

8249
    """
8250
    instance = self.instance
8251
    assert len(instance.secondary_nodes) == 1
8252
    pnode = instance.primary_node
8253
    snode = instance.secondary_nodes[0]
8254
    feedback_fn("Converting template to plain")
8255

    
8256
    old_disks = instance.disks
8257
    new_disks = [d.children[0] for d in old_disks]
8258

    
8259
    # copy over size and mode
8260
    for parent, child in zip(old_disks, new_disks):
8261
      child.size = parent.size
8262
      child.mode = parent.mode
8263

    
8264
    # update instance structure
8265
    instance.disks = new_disks
8266
    instance.disk_template = constants.DT_PLAIN
8267
    self.cfg.Update(instance, feedback_fn)
8268

    
8269
    feedback_fn("Removing volumes on the secondary node...")
8270
    for disk in old_disks:
8271
      self.cfg.SetDiskID(disk, snode)
8272
      msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8273
      if msg:
8274
        self.LogWarning("Could not remove block device %s on node %s,"
8275
                        " continuing anyway: %s", disk.iv_name, snode, msg)
8276

    
8277
    feedback_fn("Removing unneeded volumes on the primary node...")
8278
    for idx, disk in enumerate(old_disks):
8279
      meta = disk.children[1]
8280
      self.cfg.SetDiskID(meta, pnode)
8281
      msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8282
      if msg:
8283
        self.LogWarning("Could not remove metadata for disk %d on node %s,"
8284
                        " continuing anyway: %s", idx, pnode, msg)
8285

    
8286

    
8287
  def Exec(self, feedback_fn):
8288
    """Modifies an instance.
8289

8290
    All parameters take effect only at the next restart of the instance.
8291

8292
    """
8293
    # Process here the warnings from CheckPrereq, as we don't have a
8294
    # feedback_fn there.
8295
    for warn in self.warn:
8296
      feedback_fn("WARNING: %s" % warn)
8297

    
8298
    result = []
8299
    instance = self.instance
8300
    # disk changes
8301
    for disk_op, disk_dict in self.op.disks:
8302
      if disk_op == constants.DDM_REMOVE:
8303
        # remove the last disk
8304
        device = instance.disks.pop()
8305
        device_idx = len(instance.disks)
8306
        for node, disk in device.ComputeNodeTree(instance.primary_node):
8307
          self.cfg.SetDiskID(disk, node)
8308
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8309
          if msg:
8310
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
8311
                            " continuing anyway", device_idx, node, msg)
8312
        result.append(("disk/%d" % device_idx, "remove"))
8313
      elif disk_op == constants.DDM_ADD:
8314
        # add a new disk
8315
        if instance.disk_template == constants.DT_FILE:
8316
          file_driver, file_path = instance.disks[0].logical_id
8317
          file_path = os.path.dirname(file_path)
8318
        else:
8319
          file_driver = file_path = None
8320
        disk_idx_base = len(instance.disks)
8321
        new_disk = _GenerateDiskTemplate(self,
8322
                                         instance.disk_template,
8323
                                         instance.name, instance.primary_node,
8324
                                         instance.secondary_nodes,
8325
                                         [disk_dict],
8326
                                         file_path,
8327
                                         file_driver,
8328
                                         disk_idx_base)[0]
8329
        instance.disks.append(new_disk)
8330
        info = _GetInstanceInfoText(instance)
8331

    
8332
        logging.info("Creating volume %s for instance %s",
8333
                     new_disk.iv_name, instance.name)
8334
        # Note: this needs to be kept in sync with _CreateDisks
8335
        #HARDCODE
8336
        for node in instance.all_nodes:
8337
          f_create = node == instance.primary_node
8338
          try:
8339
            _CreateBlockDev(self, node, instance, new_disk,
8340
                            f_create, info, f_create)
8341
          except errors.OpExecError, err:
8342
            self.LogWarning("Failed to create volume %s (%s) on"
8343
                            " node %s: %s",
8344
                            new_disk.iv_name, new_disk, node, err)
8345
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8346
                       (new_disk.size, new_disk.mode)))
8347
      else:
8348
        # change a given disk
8349
        instance.disks[disk_op].mode = disk_dict['mode']
8350
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8351

    
8352
    if self.op.disk_template:
8353
      r_shut = _ShutdownInstanceDisks(self, instance)
8354
      if not r_shut:
8355
        raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8356
                                 " proceed with disk template conversion")
8357
      mode = (instance.disk_template, self.op.disk_template)
8358
      try:
8359
        self._DISK_CONVERSIONS[mode](self, feedback_fn)
8360
      except:
8361
        self.cfg.ReleaseDRBDMinors(instance.name)
8362
        raise
8363
      result.append(("disk_template", self.op.disk_template))
8364

    
8365
    # NIC changes
8366
    for nic_op, nic_dict in self.op.nics:
8367
      if nic_op == constants.DDM_REMOVE:
8368
        # remove the last nic
8369
        del instance.nics[-1]
8370
        result.append(("nic.%d" % len(instance.nics), "remove"))
8371
      elif nic_op == constants.DDM_ADD:
8372
        # mac and bridge should be set, by now
8373
        mac = nic_dict['mac']
8374
        ip = nic_dict.get('ip', None)
8375
        nicparams = self.nic_pinst[constants.DDM_ADD]
8376
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8377
        instance.nics.append(new_nic)
8378
        result.append(("nic.%d" % (len(instance.nics) - 1),
8379
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
8380
                       (new_nic.mac, new_nic.ip,
8381
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8382
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8383
                       )))
8384
      else:
8385
        for key in 'mac', 'ip':
8386
          if key in nic_dict:
8387
            setattr(instance.nics[nic_op], key, nic_dict[key])
8388
        if nic_op in self.nic_pinst:
8389
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8390
        for key, val in nic_dict.iteritems():
8391
          result.append(("nic.%s/%d" % (key, nic_op), val))
8392

    
8393
    # hvparams changes
8394
    if self.op.hvparams:
8395
      instance.hvparams = self.hv_inst
8396
      for key, val in self.op.hvparams.iteritems():
8397
        result.append(("hv/%s" % key, val))
8398

    
8399
    # beparams changes
8400
    if self.op.beparams:
8401
      instance.beparams = self.be_inst
8402
      for key, val in self.op.beparams.iteritems():
8403
        result.append(("be/%s" % key, val))
8404

    
8405
    # OS change
8406
    if self.op.os_name:
8407
      instance.os = self.op.os_name
8408

    
8409
    self.cfg.Update(instance, feedback_fn)
8410

    
8411
    return result
8412

    
8413
  _DISK_CONVERSIONS = {
8414
    (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8415
    (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8416
    }
8417

    
8418
class LUQueryExports(NoHooksLU):
8419
  """Query the exports list
8420

8421
  """
8422
  _OP_REQP = ['nodes']
8423
  REQ_BGL = False
8424

    
8425
  def ExpandNames(self):
8426
    self.needed_locks = {}
8427
    self.share_locks[locking.LEVEL_NODE] = 1
8428
    if not self.op.nodes:
8429
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8430
    else:
8431
      self.needed_locks[locking.LEVEL_NODE] = \
8432
        _GetWantedNodes(self, self.op.nodes)
8433

    
8434
  def CheckPrereq(self):
8435
    """Check prerequisites.
8436

8437
    """
8438
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8439

    
8440
  def Exec(self, feedback_fn):
8441
    """Compute the list of all the exported system images.
8442

8443
    @rtype: dict
8444
    @return: a dictionary with the structure node->(export-list)
8445
        where export-list is a list of the instances exported on
8446
        that node.
8447

8448
    """
8449
    rpcresult = self.rpc.call_export_list(self.nodes)
8450
    result = {}
8451
    for node in rpcresult:
8452
      if rpcresult[node].fail_msg:
8453
        result[node] = False
8454
      else:
8455
        result[node] = rpcresult[node].payload
8456

    
8457
    return result
8458

    
8459

    
8460
class LUExportInstance(LogicalUnit):
8461
  """Export an instance to an image in the cluster.
8462

8463
  """
8464
  HPATH = "instance-export"
8465
  HTYPE = constants.HTYPE_INSTANCE
8466
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8467
  REQ_BGL = False
8468

    
8469
  def CheckArguments(self):
8470
    """Check the arguments.
8471

8472
    """
8473
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8474
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8475

    
8476
  def ExpandNames(self):
8477
    self._ExpandAndLockInstance()
8478
    # FIXME: lock only instance primary and destination node
8479
    #
8480
    # Sad but true, for now we have do lock all nodes, as we don't know where
8481
    # the previous export might be, and and in this LU we search for it and
8482
    # remove it from its current node. In the future we could fix this by:
8483
    #  - making a tasklet to search (share-lock all), then create the new one,
8484
    #    then one to remove, after
8485
    #  - removing the removal operation altogether
8486
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8487

    
8488
  def DeclareLocks(self, level):
8489
    """Last minute lock declaration."""
8490
    # All nodes are locked anyway, so nothing to do here.
8491

    
8492
  def BuildHooksEnv(self):
8493
    """Build hooks env.
8494

8495
    This will run on the master, primary node and target node.
8496

8497
    """
8498
    env = {
8499
      "EXPORT_NODE": self.op.target_node,
8500
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8501
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8502
      }
8503
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8504
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8505
          self.op.target_node]
8506
    return env, nl, nl
8507

    
8508
  def CheckPrereq(self):
8509
    """Check prerequisites.
8510

8511
    This checks that the instance and node names are valid.
8512

8513
    """
8514
    instance_name = self.op.instance_name
8515
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8516
    assert self.instance is not None, \
8517
          "Cannot retrieve locked instance %s" % self.op.instance_name
8518
    _CheckNodeOnline(self, self.instance.primary_node)
8519

    
8520
    self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
8521
    self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
8522
    assert self.dst_node is not None
8523

    
8524
    _CheckNodeOnline(self, self.dst_node.name)
8525
    _CheckNodeNotDrained(self, self.dst_node.name)
8526

    
8527
    # instance disk type verification
8528
    for disk in self.instance.disks:
8529
      if disk.dev_type == constants.LD_FILE:
8530
        raise errors.OpPrereqError("Export not supported for instances with"
8531
                                   " file-based disks", errors.ECODE_INVAL)
8532

    
8533
  def Exec(self, feedback_fn):
8534
    """Export an instance to an image in the cluster.
8535

8536
    """
8537
    instance = self.instance
8538
    dst_node = self.dst_node
8539
    src_node = instance.primary_node
8540

    
8541
    if self.op.shutdown:
8542
      # shutdown the instance, but not the disks
8543
      feedback_fn("Shutting down instance %s" % instance.name)
8544
      result = self.rpc.call_instance_shutdown(src_node, instance,
8545
                                               self.shutdown_timeout)
8546
      result.Raise("Could not shutdown instance %s on"
8547
                   " node %s" % (instance.name, src_node))
8548

    
8549
    vgname = self.cfg.GetVGName()
8550

    
8551
    snap_disks = []
8552

    
8553
    # set the disks ID correctly since call_instance_start needs the
8554
    # correct drbd minor to create the symlinks
8555
    for disk in instance.disks:
8556
      self.cfg.SetDiskID(disk, src_node)
8557

    
8558
    activate_disks = (not instance.admin_up)
8559

    
8560
    if activate_disks:
8561
      # Activate the instance disks if we'exporting a stopped instance
8562
      feedback_fn("Activating disks for %s" % instance.name)
8563
      _StartInstanceDisks(self, instance, None)
8564

    
8565
    try:
8566
      # per-disk results
8567
      dresults = []
8568
      try:
8569
        for idx, disk in enumerate(instance.disks):
8570
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8571
                      (idx, src_node))
8572

    
8573
          # result.payload will be a snapshot of an lvm leaf of the one we
8574
          # passed
8575
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8576
          msg = result.fail_msg
8577
          if msg:
8578
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8579
                            idx, src_node, msg)
8580
            snap_disks.append(False)
8581
          else:
8582
            disk_id = (vgname, result.payload)
8583
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8584
                                   logical_id=disk_id, physical_id=disk_id,
8585
                                   iv_name=disk.iv_name)
8586
            snap_disks.append(new_dev)
8587

    
8588
      finally:
8589
        if self.op.shutdown and instance.admin_up:
8590
          feedback_fn("Starting instance %s" % instance.name)
8591
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8592
          msg = result.fail_msg
8593
          if msg:
8594
            _ShutdownInstanceDisks(self, instance)
8595
            raise errors.OpExecError("Could not start instance: %s" % msg)
8596

    
8597
      # TODO: check for size
8598

    
8599
      cluster_name = self.cfg.GetClusterName()
8600
      for idx, dev in enumerate(snap_disks):
8601
        feedback_fn("Exporting snapshot %s from %s to %s" %
8602
                    (idx, src_node, dst_node.name))
8603
        if dev:
8604
          # FIXME: pass debug from opcode to backend
8605
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8606
                                                 instance, cluster_name,
8607
                                                 idx, self.op.debug_level)
8608
          msg = result.fail_msg
8609
          if msg:
8610
            self.LogWarning("Could not export disk/%s from node %s to"
8611
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8612
            dresults.append(False)
8613
          else:
8614
            dresults.append(True)
8615
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8616
          if msg:
8617
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8618
                            " %s: %s", idx, src_node, msg)
8619
        else:
8620
          dresults.append(False)
8621

    
8622
      feedback_fn("Finalizing export on %s" % dst_node.name)
8623
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8624
                                             snap_disks)
8625
      fin_resu = True
8626
      msg = result.fail_msg
8627
      if msg:
8628
        self.LogWarning("Could not finalize export for instance %s"
8629
                        " on node %s: %s", instance.name, dst_node.name, msg)
8630
        fin_resu = False
8631

    
8632
    finally:
8633
      if activate_disks:
8634
        feedback_fn("Deactivating disks for %s" % instance.name)
8635
        _ShutdownInstanceDisks(self, instance)
8636

    
8637
    nodelist = self.cfg.GetNodeList()
8638
    nodelist.remove(dst_node.name)
8639

    
8640
    # on one-node clusters nodelist will be empty after the removal
8641
    # if we proceed the backup would be removed because OpQueryExports
8642
    # substitutes an empty list with the full cluster node list.
8643
    iname = instance.name
8644
    if nodelist:
8645
      feedback_fn("Removing old exports for instance %s" % iname)
8646
      exportlist = self.rpc.call_export_list(nodelist)
8647
      for node in exportlist:
8648
        if exportlist[node].fail_msg:
8649
          continue
8650
        if iname in exportlist[node].payload:
8651
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8652
          if msg:
8653
            self.LogWarning("Could not remove older export for instance %s"
8654
                            " on node %s: %s", iname, node, msg)
8655
    return fin_resu, dresults
8656

    
8657

    
8658
class LURemoveExport(NoHooksLU):
8659
  """Remove exports related to the named instance.
8660

8661
  """
8662
  _OP_REQP = ["instance_name"]
8663
  REQ_BGL = False
8664

    
8665
  def ExpandNames(self):
8666
    self.needed_locks = {}
8667
    # We need all nodes to be locked in order for RemoveExport to work, but we
8668
    # don't need to lock the instance itself, as nothing will happen to it (and
8669
    # we can remove exports also for a removed instance)
8670
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8671

    
8672
  def CheckPrereq(self):
8673
    """Check prerequisites.
8674
    """
8675
    pass
8676

    
8677
  def Exec(self, feedback_fn):
8678
    """Remove any export.
8679

8680
    """
8681
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8682
    # If the instance was not found we'll try with the name that was passed in.
8683
    # This will only work if it was an FQDN, though.
8684
    fqdn_warn = False
8685
    if not instance_name:
8686
      fqdn_warn = True
8687
      instance_name = self.op.instance_name
8688

    
8689
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8690
    exportlist = self.rpc.call_export_list(locked_nodes)
8691
    found = False
8692
    for node in exportlist:
8693
      msg = exportlist[node].fail_msg
8694
      if msg:
8695
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8696
        continue
8697
      if instance_name in exportlist[node].payload:
8698
        found = True
8699
        result = self.rpc.call_export_remove(node, instance_name)
8700
        msg = result.fail_msg
8701
        if msg:
8702
          logging.error("Could not remove export for instance %s"
8703
                        " on node %s: %s", instance_name, node, msg)
8704

    
8705
    if fqdn_warn and not found:
8706
      feedback_fn("Export not found. If trying to remove an export belonging"
8707
                  " to a deleted instance please use its Fully Qualified"
8708
                  " Domain Name.")
8709

    
8710

    
8711
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8712
  """Generic tags LU.
8713

8714
  This is an abstract class which is the parent of all the other tags LUs.
8715

8716
  """
8717

    
8718
  def ExpandNames(self):
8719
    self.needed_locks = {}
8720
    if self.op.kind == constants.TAG_NODE:
8721
      self.op.name = _ExpandNodeName(self.cfg, self.op.name)
8722
      self.needed_locks[locking.LEVEL_NODE] = self.op.name
8723
    elif self.op.kind == constants.TAG_INSTANCE:
8724
      self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
8725
      self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
8726

    
8727
  def CheckPrereq(self):
8728
    """Check prerequisites.
8729

8730
    """
8731
    if self.op.kind == constants.TAG_CLUSTER:
8732
      self.target = self.cfg.GetClusterInfo()
8733
    elif self.op.kind == constants.TAG_NODE:
8734
      self.target = self.cfg.GetNodeInfo(self.op.name)
8735
    elif self.op.kind == constants.TAG_INSTANCE:
8736
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8737
    else:
8738
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8739
                                 str(self.op.kind), errors.ECODE_INVAL)
8740

    
8741

    
8742
class LUGetTags(TagsLU):
8743
  """Returns the tags of a given object.
8744

8745
  """
8746
  _OP_REQP = ["kind", "name"]
8747
  REQ_BGL = False
8748

    
8749
  def Exec(self, feedback_fn):
8750
    """Returns the tag list.
8751

8752
    """
8753
    return list(self.target.GetTags())
8754

    
8755

    
8756
class LUSearchTags(NoHooksLU):
8757
  """Searches the tags for a given pattern.
8758

8759
  """
8760
  _OP_REQP = ["pattern"]
8761
  REQ_BGL = False
8762

    
8763
  def ExpandNames(self):
8764
    self.needed_locks = {}
8765

    
8766
  def CheckPrereq(self):
8767
    """Check prerequisites.
8768

8769
    This checks the pattern passed for validity by compiling it.
8770

8771
    """
8772
    try:
8773
      self.re = re.compile(self.op.pattern)
8774
    except re.error, err:
8775
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8776
                                 (self.op.pattern, err), errors.ECODE_INVAL)
8777

    
8778
  def Exec(self, feedback_fn):
8779
    """Returns the tag list.
8780

8781
    """
8782
    cfg = self.cfg
8783
    tgts = [("/cluster", cfg.GetClusterInfo())]
8784
    ilist = cfg.GetAllInstancesInfo().values()
8785
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8786
    nlist = cfg.GetAllNodesInfo().values()
8787
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8788
    results = []
8789
    for path, target in tgts:
8790
      for tag in target.GetTags():
8791
        if self.re.search(tag):
8792
          results.append((path, tag))
8793
    return results
8794

    
8795

    
8796
class LUAddTags(TagsLU):
8797
  """Sets a tag on a given object.
8798

8799
  """
8800
  _OP_REQP = ["kind", "name", "tags"]
8801
  REQ_BGL = False
8802

    
8803
  def CheckPrereq(self):
8804
    """Check prerequisites.
8805

8806
    This checks the type and length of the tag name and value.
8807

8808
    """
8809
    TagsLU.CheckPrereq(self)
8810
    for tag in self.op.tags:
8811
      objects.TaggableObject.ValidateTag(tag)
8812

    
8813
  def Exec(self, feedback_fn):
8814
    """Sets the tag.
8815

8816
    """
8817
    try:
8818
      for tag in self.op.tags:
8819
        self.target.AddTag(tag)
8820
    except errors.TagError, err:
8821
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8822
    self.cfg.Update(self.target, feedback_fn)
8823

    
8824

    
8825
class LUDelTags(TagsLU):
8826
  """Delete a list of tags from a given object.
8827

8828
  """
8829
  _OP_REQP = ["kind", "name", "tags"]
8830
  REQ_BGL = False
8831

    
8832
  def CheckPrereq(self):
8833
    """Check prerequisites.
8834

8835
    This checks that we have the given tag.
8836

8837
    """
8838
    TagsLU.CheckPrereq(self)
8839
    for tag in self.op.tags:
8840
      objects.TaggableObject.ValidateTag(tag)
8841
    del_tags = frozenset(self.op.tags)
8842
    cur_tags = self.target.GetTags()
8843
    if not del_tags <= cur_tags:
8844
      diff_tags = del_tags - cur_tags
8845
      diff_names = ["'%s'" % tag for tag in diff_tags]
8846
      diff_names.sort()
8847
      raise errors.OpPrereqError("Tag(s) %s not found" %
8848
                                 (",".join(diff_names)), errors.ECODE_NOENT)
8849

    
8850
  def Exec(self, feedback_fn):
8851
    """Remove the tag from the object.
8852

8853
    """
8854
    for tag in self.op.tags:
8855
      self.target.RemoveTag(tag)
8856
    self.cfg.Update(self.target, feedback_fn)
8857

    
8858

    
8859
class LUTestDelay(NoHooksLU):
8860
  """Sleep for a specified amount of time.
8861

8862
  This LU sleeps on the master and/or nodes for a specified amount of
8863
  time.
8864

8865
  """
8866
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8867
  REQ_BGL = False
8868

    
8869
  def ExpandNames(self):
8870
    """Expand names and set required locks.
8871

8872
    This expands the node list, if any.
8873

8874
    """
8875
    self.needed_locks = {}
8876
    if self.op.on_nodes:
8877
      # _GetWantedNodes can be used here, but is not always appropriate to use
8878
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8879
      # more information.
8880
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8881
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8882

    
8883
  def CheckPrereq(self):
8884
    """Check prerequisites.
8885

8886
    """
8887

    
8888
  def Exec(self, feedback_fn):
8889
    """Do the actual sleep.
8890

8891
    """
8892
    if self.op.on_master:
8893
      if not utils.TestDelay(self.op.duration):
8894
        raise errors.OpExecError("Error during master delay test")
8895
    if self.op.on_nodes:
8896
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8897
      for node, node_result in result.items():
8898
        node_result.Raise("Failure during rpc call to node %s" % node)
8899

    
8900

    
8901
class IAllocator(object):
8902
  """IAllocator framework.
8903

8904
  An IAllocator instance has three sets of attributes:
8905
    - cfg that is needed to query the cluster
8906
    - input data (all members of the _KEYS class attribute are required)
8907
    - four buffer attributes (in|out_data|text), that represent the
8908
      input (to the external script) in text and data structure format,
8909
      and the output from it, again in two formats
8910
    - the result variables from the script (success, info, nodes) for
8911
      easy usage
8912

8913
  """
8914
  # pylint: disable-msg=R0902
8915
  # lots of instance attributes
8916
  _ALLO_KEYS = [
8917
    "name", "mem_size", "disks", "disk_template",
8918
    "os", "tags", "nics", "vcpus", "hypervisor",
8919
    ]
8920
  _RELO_KEYS = [
8921
    "name", "relocate_from",
8922
    ]
8923
  _EVAC_KEYS = [
8924
    "evac_nodes",
8925
    ]
8926

    
8927
  def __init__(self, cfg, rpc, mode, **kwargs):
8928
    self.cfg = cfg
8929
    self.rpc = rpc
8930
    # init buffer variables
8931
    self.in_text = self.out_text = self.in_data = self.out_data = None
8932
    # init all input fields so that pylint is happy
8933
    self.mode = mode
8934
    self.mem_size = self.disks = self.disk_template = None
8935
    self.os = self.tags = self.nics = self.vcpus = None
8936
    self.hypervisor = None
8937
    self.relocate_from = None
8938
    self.name = None
8939
    self.evac_nodes = None
8940
    # computed fields
8941
    self.required_nodes = None
8942
    # init result fields
8943
    self.success = self.info = self.result = None
8944
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8945
      keyset = self._ALLO_KEYS
8946
      fn = self._AddNewInstance
8947
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8948
      keyset = self._RELO_KEYS
8949
      fn = self._AddRelocateInstance
8950
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8951
      keyset = self._EVAC_KEYS
8952
      fn = self._AddEvacuateNodes
8953
    else:
8954
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8955
                                   " IAllocator" % self.mode)
8956
    for key in kwargs:
8957
      if key not in keyset:
8958
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8959
                                     " IAllocator" % key)
8960
      setattr(self, key, kwargs[key])
8961

    
8962
    for key in keyset:
8963
      if key not in kwargs:
8964
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8965
                                     " IAllocator" % key)
8966
    self._BuildInputData(fn)
8967

    
8968
  def _ComputeClusterData(self):
8969
    """Compute the generic allocator input data.
8970

8971
    This is the data that is independent of the actual operation.
8972

8973
    """
8974
    cfg = self.cfg
8975
    cluster_info = cfg.GetClusterInfo()
8976
    # cluster data
8977
    data = {
8978
      "version": constants.IALLOCATOR_VERSION,
8979
      "cluster_name": cfg.GetClusterName(),
8980
      "cluster_tags": list(cluster_info.GetTags()),
8981
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8982
      # we don't have job IDs
8983
      }
8984
    iinfo = cfg.GetAllInstancesInfo().values()
8985
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8986

    
8987
    # node data
8988
    node_results = {}
8989
    node_list = cfg.GetNodeList()
8990

    
8991
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8992
      hypervisor_name = self.hypervisor
8993
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8994
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8995
    elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
8996
      hypervisor_name = cluster_info.enabled_hypervisors[0]
8997

    
8998
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8999
                                        hypervisor_name)
9000
    node_iinfo = \
9001
      self.rpc.call_all_instances_info(node_list,
9002
                                       cluster_info.enabled_hypervisors)
9003
    for nname, nresult in node_data.items():
9004
      # first fill in static (config-based) values
9005
      ninfo = cfg.GetNodeInfo(nname)
9006
      pnr = {
9007
        "tags": list(ninfo.GetTags()),
9008
        "primary_ip": ninfo.primary_ip,
9009
        "secondary_ip": ninfo.secondary_ip,
9010
        "offline": ninfo.offline,
9011
        "drained": ninfo.drained,
9012
        "master_candidate": ninfo.master_candidate,
9013
        }
9014

    
9015
      if not (ninfo.offline or ninfo.drained):
9016
        nresult.Raise("Can't get data for node %s" % nname)
9017
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9018
                                nname)
9019
        remote_info = nresult.payload
9020

    
9021
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
9022
                     'vg_size', 'vg_free', 'cpu_total']:
9023
          if attr not in remote_info:
9024
            raise errors.OpExecError("Node '%s' didn't return attribute"
9025
                                     " '%s'" % (nname, attr))
9026
          if not isinstance(remote_info[attr], int):
9027
            raise errors.OpExecError("Node '%s' returned invalid value"
9028
                                     " for '%s': %s" %
9029
                                     (nname, attr, remote_info[attr]))
9030
        # compute memory used by primary instances
9031
        i_p_mem = i_p_up_mem = 0
9032
        for iinfo, beinfo in i_list:
9033
          if iinfo.primary_node == nname:
9034
            i_p_mem += beinfo[constants.BE_MEMORY]
9035
            if iinfo.name not in node_iinfo[nname].payload:
9036
              i_used_mem = 0
9037
            else:
9038
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9039
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9040
            remote_info['memory_free'] -= max(0, i_mem_diff)
9041

    
9042
            if iinfo.admin_up:
9043
              i_p_up_mem += beinfo[constants.BE_MEMORY]
9044

    
9045
        # compute memory used by instances
9046
        pnr_dyn = {
9047
          "total_memory": remote_info['memory_total'],
9048
          "reserved_memory": remote_info['memory_dom0'],
9049
          "free_memory": remote_info['memory_free'],
9050
          "total_disk": remote_info['vg_size'],
9051
          "free_disk": remote_info['vg_free'],
9052
          "total_cpus": remote_info['cpu_total'],
9053
          "i_pri_memory": i_p_mem,
9054
          "i_pri_up_memory": i_p_up_mem,
9055
          }
9056
        pnr.update(pnr_dyn)
9057

    
9058
      node_results[nname] = pnr
9059
    data["nodes"] = node_results
9060

    
9061
    # instance data
9062
    instance_data = {}
9063
    for iinfo, beinfo in i_list:
9064
      nic_data = []
9065
      for nic in iinfo.nics:
9066
        filled_params = objects.FillDict(
9067
            cluster_info.nicparams[constants.PP_DEFAULT],
9068
            nic.nicparams)
9069
        nic_dict = {"mac": nic.mac,
9070
                    "ip": nic.ip,
9071
                    "mode": filled_params[constants.NIC_MODE],
9072
                    "link": filled_params[constants.NIC_LINK],
9073
                   }
9074
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9075
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9076
        nic_data.append(nic_dict)
9077
      pir = {
9078
        "tags": list(iinfo.GetTags()),
9079
        "admin_up": iinfo.admin_up,
9080
        "vcpus": beinfo[constants.BE_VCPUS],
9081
        "memory": beinfo[constants.BE_MEMORY],
9082
        "os": iinfo.os,
9083
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9084
        "nics": nic_data,
9085
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9086
        "disk_template": iinfo.disk_template,
9087
        "hypervisor": iinfo.hypervisor,
9088
        }
9089
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9090
                                                 pir["disks"])
9091
      instance_data[iinfo.name] = pir
9092

    
9093
    data["instances"] = instance_data
9094

    
9095
    self.in_data = data
9096

    
9097
  def _AddNewInstance(self):
9098
    """Add new instance data to allocator structure.
9099

9100
    This in combination with _AllocatorGetClusterData will create the
9101
    correct structure needed as input for the allocator.
9102

9103
    The checks for the completeness of the opcode must have already been
9104
    done.
9105

9106
    """
9107
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9108

    
9109
    if self.disk_template in constants.DTS_NET_MIRROR:
9110
      self.required_nodes = 2
9111
    else:
9112
      self.required_nodes = 1
9113
    request = {
9114
      "name": self.name,
9115
      "disk_template": self.disk_template,
9116
      "tags": self.tags,
9117
      "os": self.os,
9118
      "vcpus": self.vcpus,
9119
      "memory": self.mem_size,
9120
      "disks": self.disks,
9121
      "disk_space_total": disk_space,
9122
      "nics": self.nics,
9123
      "required_nodes": self.required_nodes,
9124
      }
9125
    return request
9126

    
9127
  def _AddRelocateInstance(self):
9128
    """Add relocate instance data to allocator structure.
9129

9130
    This in combination with _IAllocatorGetClusterData will create the
9131
    correct structure needed as input for the allocator.
9132

9133
    The checks for the completeness of the opcode must have already been
9134
    done.
9135

9136
    """
9137
    instance = self.cfg.GetInstanceInfo(self.name)
9138
    if instance is None:
9139
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
9140
                                   " IAllocator" % self.name)
9141

    
9142
    if instance.disk_template not in constants.DTS_NET_MIRROR:
9143
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9144
                                 errors.ECODE_INVAL)
9145

    
9146
    if len(instance.secondary_nodes) != 1:
9147
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
9148
                                 errors.ECODE_STATE)
9149

    
9150
    self.required_nodes = 1
9151
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
9152
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9153

    
9154
    request = {
9155
      "name": self.name,
9156
      "disk_space_total": disk_space,
9157
      "required_nodes": self.required_nodes,
9158
      "relocate_from": self.relocate_from,
9159
      }
9160
    return request
9161

    
9162
  def _AddEvacuateNodes(self):
9163
    """Add evacuate nodes data to allocator structure.
9164

9165
    """
9166
    request = {
9167
      "evac_nodes": self.evac_nodes
9168
      }
9169
    return request
9170

    
9171
  def _BuildInputData(self, fn):
9172
    """Build input data structures.
9173

9174
    """
9175
    self._ComputeClusterData()
9176

    
9177
    request = fn()
9178
    request["type"] = self.mode
9179
    self.in_data["request"] = request
9180

    
9181
    self.in_text = serializer.Dump(self.in_data)
9182

    
9183
  def Run(self, name, validate=True, call_fn=None):
9184
    """Run an instance allocator and return the results.
9185

9186
    """
9187
    if call_fn is None:
9188
      call_fn = self.rpc.call_iallocator_runner
9189

    
9190
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9191
    result.Raise("Failure while running the iallocator script")
9192

    
9193
    self.out_text = result.payload
9194
    if validate:
9195
      self._ValidateResult()
9196

    
9197
  def _ValidateResult(self):
9198
    """Process the allocator results.
9199

9200
    This will process and if successful save the result in
9201
    self.out_data and the other parameters.
9202

9203
    """
9204
    try:
9205
      rdict = serializer.Load(self.out_text)
9206
    except Exception, err:
9207
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9208

    
9209
    if not isinstance(rdict, dict):
9210
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
9211

    
9212
    # TODO: remove backwards compatiblity in later versions
9213
    if "nodes" in rdict and "result" not in rdict:
9214
      rdict["result"] = rdict["nodes"]
9215
      del rdict["nodes"]
9216

    
9217
    for key in "success", "info", "result":
9218
      if key not in rdict:
9219
        raise errors.OpExecError("Can't parse iallocator results:"
9220
                                 " missing key '%s'" % key)
9221
      setattr(self, key, rdict[key])
9222

    
9223
    if not isinstance(rdict["result"], list):
9224
      raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9225
                               " is not a list")
9226
    self.out_data = rdict
9227

    
9228

    
9229
class LUTestAllocator(NoHooksLU):
9230
  """Run allocator tests.
9231

9232
  This LU runs the allocator tests
9233

9234
  """
9235
  _OP_REQP = ["direction", "mode", "name"]
9236

    
9237
  def CheckPrereq(self):
9238
    """Check prerequisites.
9239

9240
    This checks the opcode parameters depending on the director and mode test.
9241

9242
    """
9243
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9244
      for attr in ["name", "mem_size", "disks", "disk_template",
9245
                   "os", "tags", "nics", "vcpus"]:
9246
        if not hasattr(self.op, attr):
9247
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9248
                                     attr, errors.ECODE_INVAL)
9249
      iname = self.cfg.ExpandInstanceName(self.op.name)
9250
      if iname is not None:
9251
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
9252
                                   iname, errors.ECODE_EXISTS)
9253
      if not isinstance(self.op.nics, list):
9254
        raise errors.OpPrereqError("Invalid parameter 'nics'",
9255
                                   errors.ECODE_INVAL)
9256
      for row in self.op.nics:
9257
        if (not isinstance(row, dict) or
9258
            "mac" not in row or
9259
            "ip" not in row or
9260
            "bridge" not in row):
9261
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
9262
                                     " parameter", errors.ECODE_INVAL)
9263
      if not isinstance(self.op.disks, list):
9264
        raise errors.OpPrereqError("Invalid parameter 'disks'",
9265
                                   errors.ECODE_INVAL)
9266
      for row in self.op.disks:
9267
        if (not isinstance(row, dict) or
9268
            "size" not in row or
9269
            not isinstance(row["size"], int) or
9270
            "mode" not in row or
9271
            row["mode"] not in ['r', 'w']):
9272
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
9273
                                     " parameter", errors.ECODE_INVAL)
9274
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
9275
        self.op.hypervisor = self.cfg.GetHypervisorType()
9276
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9277
      if not hasattr(self.op, "name"):
9278
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
9279
                                   errors.ECODE_INVAL)
9280
      fname = _ExpandInstanceName(self.cfg, self.op.name)
9281
      self.op.name = fname
9282
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
9283
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9284
      if not hasattr(self.op, "evac_nodes"):
9285
        raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
9286
                                   " opcode input", errors.ECODE_INVAL)
9287
    else:
9288
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
9289
                                 self.op.mode, errors.ECODE_INVAL)
9290

    
9291
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
9292
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
9293
        raise errors.OpPrereqError("Missing allocator name",
9294
                                   errors.ECODE_INVAL)
9295
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
9296
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
9297
                                 self.op.direction, errors.ECODE_INVAL)
9298

    
9299
  def Exec(self, feedback_fn):
9300
    """Run the allocator test.
9301

9302
    """
9303
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9304
      ial = IAllocator(self.cfg, self.rpc,
9305
                       mode=self.op.mode,
9306
                       name=self.op.name,
9307
                       mem_size=self.op.mem_size,
9308
                       disks=self.op.disks,
9309
                       disk_template=self.op.disk_template,
9310
                       os=self.op.os,
9311
                       tags=self.op.tags,
9312
                       nics=self.op.nics,
9313
                       vcpus=self.op.vcpus,
9314
                       hypervisor=self.op.hypervisor,
9315
                       )
9316
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
9317
      ial = IAllocator(self.cfg, self.rpc,
9318
                       mode=self.op.mode,
9319
                       name=self.op.name,
9320
                       relocate_from=list(self.relocate_from),
9321
                       )
9322
    elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
9323
      ial = IAllocator(self.cfg, self.rpc,
9324
                       mode=self.op.mode,
9325
                       evac_nodes=self.op.evac_nodes)
9326
    else:
9327
      raise errors.ProgrammerError("Uncatched mode %s in"
9328
                                   " LUTestAllocator.Exec", self.op.mode)
9329

    
9330
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
9331
      result = ial.in_text
9332
    else:
9333
      ial.Run(self.op.allocator, validate=False)
9334
      result = ial.out_text
9335
    return result