Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 94a1b377

History | View | Annotate | Download (310.5 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0201
25

    
26
# W0201 since most LU attributes are defined in CheckPrereq or similar
27
# functions
28

    
29
import os
30
import os.path
31
import time
32
import re
33
import platform
34
import logging
35
import copy
36

    
37
from ganeti import ssh
38
from ganeti import utils
39
from ganeti import errors
40
from ganeti import hypervisor
41
from ganeti import locking
42
from ganeti import constants
43
from ganeti import objects
44
from ganeti import serializer
45
from ganeti import ssconf
46

    
47

    
48
class LogicalUnit(object):
49
  """Logical Unit base class.
50

51
  Subclasses must follow these rules:
52
    - implement ExpandNames
53
    - implement CheckPrereq (except when tasklets are used)
54
    - implement Exec (except when tasklets are used)
55
    - implement BuildHooksEnv
56
    - redefine HPATH and HTYPE
57
    - optionally redefine their run requirements:
58
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
59

60
  Note that all commands require root permissions.
61

62
  @ivar dry_run_result: the value (if any) that will be returned to the caller
63
      in dry-run mode (signalled by opcode dry_run parameter)
64

65
  """
66
  HPATH = None
67
  HTYPE = None
68
  _OP_REQP = []
69
  REQ_BGL = True
70

    
71
  def __init__(self, processor, op, context, rpc):
72
    """Constructor for LogicalUnit.
73

74
    This needs to be overridden in derived classes in order to check op
75
    validity.
76

77
    """
78
    self.proc = processor
79
    self.op = op
80
    self.cfg = context.cfg
81
    self.context = context
82
    self.rpc = rpc
83
    # Dicts used to declare locking needs to mcpu
84
    self.needed_locks = None
85
    self.acquired_locks = {}
86
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
87
    self.add_locks = {}
88
    self.remove_locks = {}
89
    # Used to force good behavior when calling helper functions
90
    self.recalculate_locks = {}
91
    self.__ssh = None
92
    # logging
93
    self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
94
    self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
95
    self.LogStep = processor.LogStep # pylint: disable-msg=C0103
96
    # support for dry-run
97
    self.dry_run_result = None
98

    
99
    # Tasklets
100
    self.tasklets = None
101

    
102
    for attr_name in self._OP_REQP:
103
      attr_val = getattr(op, attr_name, None)
104
      if attr_val is None:
105
        raise errors.OpPrereqError("Required parameter '%s' missing" %
106
                                   attr_name, errors.ECODE_INVAL)
107

    
108
    self.CheckArguments()
109

    
110
  def __GetSSH(self):
111
    """Returns the SshRunner object
112

113
    """
114
    if not self.__ssh:
115
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
116
    return self.__ssh
117

    
118
  ssh = property(fget=__GetSSH)
119

    
120
  def CheckArguments(self):
121
    """Check syntactic validity for the opcode arguments.
122

123
    This method is for doing a simple syntactic check and ensure
124
    validity of opcode parameters, without any cluster-related
125
    checks. While the same can be accomplished in ExpandNames and/or
126
    CheckPrereq, doing these separate is better because:
127

128
      - ExpandNames is left as as purely a lock-related function
129
      - CheckPrereq is run after we have acquired locks (and possible
130
        waited for them)
131

132
    The function is allowed to change the self.op attribute so that
133
    later methods can no longer worry about missing parameters.
134

135
    """
136
    pass
137

    
138
  def ExpandNames(self):
139
    """Expand names for this LU.
140

141
    This method is called before starting to execute the opcode, and it should
142
    update all the parameters of the opcode to their canonical form (e.g. a
143
    short node name must be fully expanded after this method has successfully
144
    completed). This way locking, hooks, logging, ecc. can work correctly.
145

146
    LUs which implement this method must also populate the self.needed_locks
147
    member, as a dict with lock levels as keys, and a list of needed lock names
148
    as values. Rules:
149

150
      - use an empty dict if you don't need any lock
151
      - if you don't need any lock at a particular level omit that level
152
      - don't put anything for the BGL level
153
      - if you want all locks at a level use locking.ALL_SET as a value
154

155
    If you need to share locks (rather than acquire them exclusively) at one
156
    level you can modify self.share_locks, setting a true value (usually 1) for
157
    that level. By default locks are not shared.
158

159
    This function can also define a list of tasklets, which then will be
160
    executed in order instead of the usual LU-level CheckPrereq and Exec
161
    functions, if those are not defined by the LU.
162

163
    Examples::
164

165
      # Acquire all nodes and one instance
166
      self.needed_locks = {
167
        locking.LEVEL_NODE: locking.ALL_SET,
168
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
169
      }
170
      # Acquire just two nodes
171
      self.needed_locks = {
172
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
173
      }
174
      # Acquire no locks
175
      self.needed_locks = {} # No, you can't leave it to the default value None
176

177
    """
178
    # The implementation of this method is mandatory only if the new LU is
179
    # concurrent, so that old LUs don't need to be changed all at the same
180
    # time.
181
    if self.REQ_BGL:
182
      self.needed_locks = {} # Exclusive LUs don't need locks.
183
    else:
184
      raise NotImplementedError
185

    
186
  def DeclareLocks(self, level):
187
    """Declare LU locking needs for a level
188

189
    While most LUs can just declare their locking needs at ExpandNames time,
190
    sometimes there's the need to calculate some locks after having acquired
191
    the ones before. This function is called just before acquiring locks at a
192
    particular level, but after acquiring the ones at lower levels, and permits
193
    such calculations. It can be used to modify self.needed_locks, and by
194
    default it does nothing.
195

196
    This function is only called if you have something already set in
197
    self.needed_locks for the level.
198

199
    @param level: Locking level which is going to be locked
200
    @type level: member of ganeti.locking.LEVELS
201

202
    """
203

    
204
  def CheckPrereq(self):
205
    """Check prerequisites for this LU.
206

207
    This method should check that the prerequisites for the execution
208
    of this LU are fulfilled. It can do internode communication, but
209
    it should be idempotent - no cluster or system changes are
210
    allowed.
211

212
    The method should raise errors.OpPrereqError in case something is
213
    not fulfilled. Its return value is ignored.
214

215
    This method should also update all the parameters of the opcode to
216
    their canonical form if it hasn't been done by ExpandNames before.
217

218
    """
219
    if self.tasklets is not None:
220
      for (idx, tl) in enumerate(self.tasklets):
221
        logging.debug("Checking prerequisites for tasklet %s/%s",
222
                      idx + 1, len(self.tasklets))
223
        tl.CheckPrereq()
224
    else:
225
      raise NotImplementedError
226

    
227
  def Exec(self, feedback_fn):
228
    """Execute the LU.
229

230
    This method should implement the actual work. It should raise
231
    errors.OpExecError for failures that are somewhat dealt with in
232
    code, or expected.
233

234
    """
235
    if self.tasklets is not None:
236
      for (idx, tl) in enumerate(self.tasklets):
237
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
238
        tl.Exec(feedback_fn)
239
    else:
240
      raise NotImplementedError
241

    
242
  def BuildHooksEnv(self):
243
    """Build hooks environment for this LU.
244

245
    This method should return a three-node tuple consisting of: a dict
246
    containing the environment that will be used for running the
247
    specific hook for this LU, a list of node names on which the hook
248
    should run before the execution, and a list of node names on which
249
    the hook should run after the execution.
250

251
    The keys of the dict must not have 'GANETI_' prefixed as this will
252
    be handled in the hooks runner. Also note additional keys will be
253
    added by the hooks runner. If the LU doesn't define any
254
    environment, an empty dict (and not None) should be returned.
255

256
    No nodes should be returned as an empty list (and not None).
257

258
    Note that if the HPATH for a LU class is None, this function will
259
    not be called.
260

261
    """
262
    raise NotImplementedError
263

    
264
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
265
    """Notify the LU about the results of its hooks.
266

267
    This method is called every time a hooks phase is executed, and notifies
268
    the Logical Unit about the hooks' result. The LU can then use it to alter
269
    its result based on the hooks.  By default the method does nothing and the
270
    previous result is passed back unchanged but any LU can define it if it
271
    wants to use the local cluster hook-scripts somehow.
272

273
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
274
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
275
    @param hook_results: the results of the multi-node hooks rpc call
276
    @param feedback_fn: function used send feedback back to the caller
277
    @param lu_result: the previous Exec result this LU had, or None
278
        in the PRE phase
279
    @return: the new Exec result, based on the previous result
280
        and hook results
281

282
    """
283
    # API must be kept, thus we ignore the unused argument and could
284
    # be a function warnings
285
    # pylint: disable-msg=W0613,R0201
286
    return lu_result
287

    
288
  def _ExpandAndLockInstance(self):
289
    """Helper function to expand and lock an instance.
290

291
    Many LUs that work on an instance take its name in self.op.instance_name
292
    and need to expand it and then declare the expanded name for locking. This
293
    function does it, and then updates self.op.instance_name to the expanded
294
    name. It also initializes needed_locks as a dict, if this hasn't been done
295
    before.
296

297
    """
298
    if self.needed_locks is None:
299
      self.needed_locks = {}
300
    else:
301
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
302
        "_ExpandAndLockInstance called with instance-level locks set"
303
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
304
    if expanded_name is None:
305
      raise errors.OpPrereqError("Instance '%s' not known" %
306
                                 self.op.instance_name, errors.ECODE_NOENT)
307
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
308
    self.op.instance_name = expanded_name
309

    
310
  def _LockInstancesNodes(self, primary_only=False):
311
    """Helper function to declare instances' nodes for locking.
312

313
    This function should be called after locking one or more instances to lock
314
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
315
    with all primary or secondary nodes for instances already locked and
316
    present in self.needed_locks[locking.LEVEL_INSTANCE].
317

318
    It should be called from DeclareLocks, and for safety only works if
319
    self.recalculate_locks[locking.LEVEL_NODE] is set.
320

321
    In the future it may grow parameters to just lock some instance's nodes, or
322
    to just lock primaries or secondary nodes, if needed.
323

324
    If should be called in DeclareLocks in a way similar to::
325

326
      if level == locking.LEVEL_NODE:
327
        self._LockInstancesNodes()
328

329
    @type primary_only: boolean
330
    @param primary_only: only lock primary nodes of locked instances
331

332
    """
333
    assert locking.LEVEL_NODE in self.recalculate_locks, \
334
      "_LockInstancesNodes helper function called with no nodes to recalculate"
335

    
336
    # TODO: check if we're really been called with the instance locks held
337

    
338
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
339
    # future we might want to have different behaviors depending on the value
340
    # of self.recalculate_locks[locking.LEVEL_NODE]
341
    wanted_nodes = []
342
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
343
      instance = self.context.cfg.GetInstanceInfo(instance_name)
344
      wanted_nodes.append(instance.primary_node)
345
      if not primary_only:
346
        wanted_nodes.extend(instance.secondary_nodes)
347

    
348
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
349
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
350
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
351
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
352

    
353
    del self.recalculate_locks[locking.LEVEL_NODE]
354

    
355

    
356
class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
357
  """Simple LU which runs no hooks.
358

359
  This LU is intended as a parent for other LogicalUnits which will
360
  run no hooks, in order to reduce duplicate code.
361

362
  """
363
  HPATH = None
364
  HTYPE = None
365

    
366
  def BuildHooksEnv(self):
367
    """Empty BuildHooksEnv for NoHooksLu.
368

369
    This just raises an error.
370

371
    """
372
    assert False, "BuildHooksEnv called for NoHooksLUs"
373

    
374

    
375
class Tasklet:
376
  """Tasklet base class.
377

378
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
379
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
380
  tasklets know nothing about locks.
381

382
  Subclasses must follow these rules:
383
    - Implement CheckPrereq
384
    - Implement Exec
385

386
  """
387
  def __init__(self, lu):
388
    self.lu = lu
389

    
390
    # Shortcuts
391
    self.cfg = lu.cfg
392
    self.rpc = lu.rpc
393

    
394
  def CheckPrereq(self):
395
    """Check prerequisites for this tasklets.
396

397
    This method should check whether the prerequisites for the execution of
398
    this tasklet are fulfilled. It can do internode communication, but it
399
    should be idempotent - no cluster or system changes are allowed.
400

401
    The method should raise errors.OpPrereqError in case something is not
402
    fulfilled. Its return value is ignored.
403

404
    This method should also update all parameters to their canonical form if it
405
    hasn't been done before.
406

407
    """
408
    raise NotImplementedError
409

    
410
  def Exec(self, feedback_fn):
411
    """Execute the tasklet.
412

413
    This method should implement the actual work. It should raise
414
    errors.OpExecError for failures that are somewhat dealt with in code, or
415
    expected.
416

417
    """
418
    raise NotImplementedError
419

    
420

    
421
def _GetWantedNodes(lu, nodes):
422
  """Returns list of checked and expanded node names.
423

424
  @type lu: L{LogicalUnit}
425
  @param lu: the logical unit on whose behalf we execute
426
  @type nodes: list
427
  @param nodes: list of node names or None for all nodes
428
  @rtype: list
429
  @return: the list of nodes, sorted
430
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
431

432
  """
433
  if not isinstance(nodes, list):
434
    raise errors.OpPrereqError("Invalid argument type 'nodes'",
435
                               errors.ECODE_INVAL)
436

    
437
  if not nodes:
438
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
439
      " non-empty list of nodes whose name is to be expanded.")
440

    
441
  wanted = []
442
  for name in nodes:
443
    node = lu.cfg.ExpandNodeName(name)
444
    if node is None:
445
      raise errors.OpPrereqError("No such node name '%s'" % name,
446
                                 errors.ECODE_NOENT)
447
    wanted.append(node)
448

    
449
  return utils.NiceSort(wanted)
450

    
451

    
452
def _GetWantedInstances(lu, instances):
453
  """Returns list of checked and expanded instance names.
454

455
  @type lu: L{LogicalUnit}
456
  @param lu: the logical unit on whose behalf we execute
457
  @type instances: list
458
  @param instances: list of instance names or None for all instances
459
  @rtype: list
460
  @return: the list of instances, sorted
461
  @raise errors.OpPrereqError: if the instances parameter is wrong type
462
  @raise errors.OpPrereqError: if any of the passed instances is not found
463

464
  """
465
  if not isinstance(instances, list):
466
    raise errors.OpPrereqError("Invalid argument type 'instances'",
467
                               errors.ECODE_INVAL)
468

    
469
  if instances:
470
    wanted = []
471

    
472
    for name in instances:
473
      instance = lu.cfg.ExpandInstanceName(name)
474
      if instance is None:
475
        raise errors.OpPrereqError("No such instance name '%s'" % name,
476
                                   errors.ECODE_NOENT)
477
      wanted.append(instance)
478

    
479
  else:
480
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
481
  return wanted
482

    
483

    
484
def _CheckOutputFields(static, dynamic, selected):
485
  """Checks whether all selected fields are valid.
486

487
  @type static: L{utils.FieldSet}
488
  @param static: static fields set
489
  @type dynamic: L{utils.FieldSet}
490
  @param dynamic: dynamic fields set
491

492
  """
493
  f = utils.FieldSet()
494
  f.Extend(static)
495
  f.Extend(dynamic)
496

    
497
  delta = f.NonMatching(selected)
498
  if delta:
499
    raise errors.OpPrereqError("Unknown output fields selected: %s"
500
                               % ",".join(delta), errors.ECODE_INVAL)
501

    
502

    
503
def _CheckBooleanOpField(op, name):
504
  """Validates boolean opcode parameters.
505

506
  This will ensure that an opcode parameter is either a boolean value,
507
  or None (but that it always exists).
508

509
  """
510
  val = getattr(op, name, None)
511
  if not (val is None or isinstance(val, bool)):
512
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
513
                               (name, str(val)), errors.ECODE_INVAL)
514
  setattr(op, name, val)
515

    
516

    
517
def _CheckGlobalHvParams(params):
518
  """Validates that given hypervisor params are not global ones.
519

520
  This will ensure that instances don't get customised versions of
521
  global params.
522

523
  """
524
  used_globals = constants.HVC_GLOBALS.intersection(params)
525
  if used_globals:
526
    msg = ("The following hypervisor parameters are global and cannot"
527
           " be customized at instance level, please modify them at"
528
           " cluster level: %s" % utils.CommaJoin(used_globals))
529
    raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
530

    
531

    
532
def _CheckNodeOnline(lu, node):
533
  """Ensure that a given node is online.
534

535
  @param lu: the LU on behalf of which we make the check
536
  @param node: the node to check
537
  @raise errors.OpPrereqError: if the node is offline
538

539
  """
540
  if lu.cfg.GetNodeInfo(node).offline:
541
    raise errors.OpPrereqError("Can't use offline node %s" % node,
542
                               errors.ECODE_INVAL)
543

    
544

    
545
def _CheckNodeNotDrained(lu, node):
546
  """Ensure that a given node is not drained.
547

548
  @param lu: the LU on behalf of which we make the check
549
  @param node: the node to check
550
  @raise errors.OpPrereqError: if the node is drained
551

552
  """
553
  if lu.cfg.GetNodeInfo(node).drained:
554
    raise errors.OpPrereqError("Can't use drained node %s" % node,
555
                               errors.ECODE_INVAL)
556

    
557

    
558
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
559
                          memory, vcpus, nics, disk_template, disks,
560
                          bep, hvp, hypervisor_name):
561
  """Builds instance related env variables for hooks
562

563
  This builds the hook environment from individual variables.
564

565
  @type name: string
566
  @param name: the name of the instance
567
  @type primary_node: string
568
  @param primary_node: the name of the instance's primary node
569
  @type secondary_nodes: list
570
  @param secondary_nodes: list of secondary nodes as strings
571
  @type os_type: string
572
  @param os_type: the name of the instance's OS
573
  @type status: boolean
574
  @param status: the should_run status of the instance
575
  @type memory: string
576
  @param memory: the memory size of the instance
577
  @type vcpus: string
578
  @param vcpus: the count of VCPUs the instance has
579
  @type nics: list
580
  @param nics: list of tuples (ip, mac, mode, link) representing
581
      the NICs the instance has
582
  @type disk_template: string
583
  @param disk_template: the disk template of the instance
584
  @type disks: list
585
  @param disks: the list of (size, mode) pairs
586
  @type bep: dict
587
  @param bep: the backend parameters for the instance
588
  @type hvp: dict
589
  @param hvp: the hypervisor parameters for the instance
590
  @type hypervisor_name: string
591
  @param hypervisor_name: the hypervisor for the instance
592
  @rtype: dict
593
  @return: the hook environment for this instance
594

595
  """
596
  if status:
597
    str_status = "up"
598
  else:
599
    str_status = "down"
600
  env = {
601
    "OP_TARGET": name,
602
    "INSTANCE_NAME": name,
603
    "INSTANCE_PRIMARY": primary_node,
604
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
605
    "INSTANCE_OS_TYPE": os_type,
606
    "INSTANCE_STATUS": str_status,
607
    "INSTANCE_MEMORY": memory,
608
    "INSTANCE_VCPUS": vcpus,
609
    "INSTANCE_DISK_TEMPLATE": disk_template,
610
    "INSTANCE_HYPERVISOR": hypervisor_name,
611
  }
612

    
613
  if nics:
614
    nic_count = len(nics)
615
    for idx, (ip, mac, mode, link) in enumerate(nics):
616
      if ip is None:
617
        ip = ""
618
      env["INSTANCE_NIC%d_IP" % idx] = ip
619
      env["INSTANCE_NIC%d_MAC" % idx] = mac
620
      env["INSTANCE_NIC%d_MODE" % idx] = mode
621
      env["INSTANCE_NIC%d_LINK" % idx] = link
622
      if mode == constants.NIC_MODE_BRIDGED:
623
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
624
  else:
625
    nic_count = 0
626

    
627
  env["INSTANCE_NIC_COUNT"] = nic_count
628

    
629
  if disks:
630
    disk_count = len(disks)
631
    for idx, (size, mode) in enumerate(disks):
632
      env["INSTANCE_DISK%d_SIZE" % idx] = size
633
      env["INSTANCE_DISK%d_MODE" % idx] = mode
634
  else:
635
    disk_count = 0
636

    
637
  env["INSTANCE_DISK_COUNT"] = disk_count
638

    
639
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
640
    for key, value in source.items():
641
      env["INSTANCE_%s_%s" % (kind, key)] = value
642

    
643
  return env
644

    
645

    
646
def _NICListToTuple(lu, nics):
647
  """Build a list of nic information tuples.
648

649
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
650
  value in LUQueryInstanceData.
651

652
  @type lu:  L{LogicalUnit}
653
  @param lu: the logical unit on whose behalf we execute
654
  @type nics: list of L{objects.NIC}
655
  @param nics: list of nics to convert to hooks tuples
656

657
  """
658
  hooks_nics = []
659
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
660
  for nic in nics:
661
    ip = nic.ip
662
    mac = nic.mac
663
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
664
    mode = filled_params[constants.NIC_MODE]
665
    link = filled_params[constants.NIC_LINK]
666
    hooks_nics.append((ip, mac, mode, link))
667
  return hooks_nics
668

    
669

    
670
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
671
  """Builds instance related env variables for hooks from an object.
672

673
  @type lu: L{LogicalUnit}
674
  @param lu: the logical unit on whose behalf we execute
675
  @type instance: L{objects.Instance}
676
  @param instance: the instance for which we should build the
677
      environment
678
  @type override: dict
679
  @param override: dictionary with key/values that will override
680
      our values
681
  @rtype: dict
682
  @return: the hook environment dictionary
683

684
  """
685
  cluster = lu.cfg.GetClusterInfo()
686
  bep = cluster.FillBE(instance)
687
  hvp = cluster.FillHV(instance)
688
  args = {
689
    'name': instance.name,
690
    'primary_node': instance.primary_node,
691
    'secondary_nodes': instance.secondary_nodes,
692
    'os_type': instance.os,
693
    'status': instance.admin_up,
694
    'memory': bep[constants.BE_MEMORY],
695
    'vcpus': bep[constants.BE_VCPUS],
696
    'nics': _NICListToTuple(lu, instance.nics),
697
    'disk_template': instance.disk_template,
698
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
699
    'bep': bep,
700
    'hvp': hvp,
701
    'hypervisor_name': instance.hypervisor,
702
  }
703
  if override:
704
    args.update(override)
705
  return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
706

    
707

    
708
def _AdjustCandidatePool(lu, exceptions):
709
  """Adjust the candidate pool after node operations.
710

711
  """
712
  mod_list = lu.cfg.MaintainCandidatePool(exceptions)
713
  if mod_list:
714
    lu.LogInfo("Promoted nodes to master candidate role: %s",
715
               utils.CommaJoin(node.name for node in mod_list))
716
    for name in mod_list:
717
      lu.context.ReaddNode(name)
718
  mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
719
  if mc_now > mc_max:
720
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
721
               (mc_now, mc_max))
722

    
723

    
724
def _DecideSelfPromotion(lu, exceptions=None):
725
  """Decide whether I should promote myself as a master candidate.
726

727
  """
728
  cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
729
  mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
730
  # the new node will increase mc_max with one, so:
731
  mc_should = min(mc_should + 1, cp_size)
732
  return mc_now < mc_should
733

    
734

    
735
def _CheckNicsBridgesExist(lu, target_nics, target_node,
736
                               profile=constants.PP_DEFAULT):
737
  """Check that the brigdes needed by a list of nics exist.
738

739
  """
740
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
741
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
742
                for nic in target_nics]
743
  brlist = [params[constants.NIC_LINK] for params in paramslist
744
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
745
  if brlist:
746
    result = lu.rpc.call_bridges_exist(target_node, brlist)
747
    result.Raise("Error checking bridges on destination node '%s'" %
748
                 target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
749

    
750

    
751
def _CheckInstanceBridgesExist(lu, instance, node=None):
752
  """Check that the brigdes needed by an instance exist.
753

754
  """
755
  if node is None:
756
    node = instance.primary_node
757
  _CheckNicsBridgesExist(lu, instance.nics, node)
758

    
759

    
760
def _CheckOSVariant(os_obj, name):
761
  """Check whether an OS name conforms to the os variants specification.
762

763
  @type os_obj: L{objects.OS}
764
  @param os_obj: OS object to check
765
  @type name: string
766
  @param name: OS name passed by the user, to check for validity
767

768
  """
769
  if not os_obj.supported_variants:
770
    return
771
  try:
772
    variant = name.split("+", 1)[1]
773
  except IndexError:
774
    raise errors.OpPrereqError("OS name must include a variant",
775
                               errors.ECODE_INVAL)
776

    
777
  if variant not in os_obj.supported_variants:
778
    raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
779

    
780

    
781
def _GetNodeInstancesInner(cfg, fn):
782
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
783

    
784

    
785
def _GetNodeInstances(cfg, node_name):
786
  """Returns a list of all primary and secondary instances on a node.
787

788
  """
789

    
790
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
791

    
792

    
793
def _GetNodePrimaryInstances(cfg, node_name):
794
  """Returns primary instances on a node.
795

796
  """
797
  return _GetNodeInstancesInner(cfg,
798
                                lambda inst: node_name == inst.primary_node)
799

    
800

    
801
def _GetNodeSecondaryInstances(cfg, node_name):
802
  """Returns secondary instances on a node.
803

804
  """
805
  return _GetNodeInstancesInner(cfg,
806
                                lambda inst: node_name in inst.secondary_nodes)
807

    
808

    
809
def _GetStorageTypeArgs(cfg, storage_type):
810
  """Returns the arguments for a storage type.
811

812
  """
813
  # Special case for file storage
814
  if storage_type == constants.ST_FILE:
815
    # storage.FileStorage wants a list of storage directories
816
    return [[cfg.GetFileStorageDir()]]
817

    
818
  return []
819

    
820

    
821
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
822
  faulty = []
823

    
824
  for dev in instance.disks:
825
    cfg.SetDiskID(dev, node_name)
826

    
827
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
828
  result.Raise("Failed to get disk status from node %s" % node_name,
829
               prereq=prereq, ecode=errors.ECODE_ENVIRON)
830

    
831
  for idx, bdev_status in enumerate(result.payload):
832
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
833
      faulty.append(idx)
834

    
835
  return faulty
836

    
837

    
838
class LUPostInitCluster(LogicalUnit):
839
  """Logical unit for running hooks after cluster initialization.
840

841
  """
842
  HPATH = "cluster-init"
843
  HTYPE = constants.HTYPE_CLUSTER
844
  _OP_REQP = []
845

    
846
  def BuildHooksEnv(self):
847
    """Build hooks env.
848

849
    """
850
    env = {"OP_TARGET": self.cfg.GetClusterName()}
851
    mn = self.cfg.GetMasterNode()
852
    return env, [], [mn]
853

    
854
  def CheckPrereq(self):
855
    """No prerequisites to check.
856

857
    """
858
    return True
859

    
860
  def Exec(self, feedback_fn):
861
    """Nothing to do.
862

863
    """
864
    return True
865

    
866

    
867
class LUDestroyCluster(LogicalUnit):
868
  """Logical unit for destroying the cluster.
869

870
  """
871
  HPATH = "cluster-destroy"
872
  HTYPE = constants.HTYPE_CLUSTER
873
  _OP_REQP = []
874

    
875
  def BuildHooksEnv(self):
876
    """Build hooks env.
877

878
    """
879
    env = {"OP_TARGET": self.cfg.GetClusterName()}
880
    return env, [], []
881

    
882
  def CheckPrereq(self):
883
    """Check prerequisites.
884

885
    This checks whether the cluster is empty.
886

887
    Any errors are signaled by raising errors.OpPrereqError.
888

889
    """
890
    master = self.cfg.GetMasterNode()
891

    
892
    nodelist = self.cfg.GetNodeList()
893
    if len(nodelist) != 1 or nodelist[0] != master:
894
      raise errors.OpPrereqError("There are still %d node(s) in"
895
                                 " this cluster." % (len(nodelist) - 1),
896
                                 errors.ECODE_INVAL)
897
    instancelist = self.cfg.GetInstanceList()
898
    if instancelist:
899
      raise errors.OpPrereqError("There are still %d instance(s) in"
900
                                 " this cluster." % len(instancelist),
901
                                 errors.ECODE_INVAL)
902

    
903
  def Exec(self, feedback_fn):
904
    """Destroys the cluster.
905

906
    """
907
    master = self.cfg.GetMasterNode()
908
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
909

    
910
    # Run post hooks on master node before it's removed
911
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
912
    try:
913
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
914
    except:
915
      # pylint: disable-msg=W0702
916
      self.LogWarning("Errors occurred running hooks on %s" % master)
917

    
918
    result = self.rpc.call_node_stop_master(master, False)
919
    result.Raise("Could not disable the master role")
920

    
921
    if modify_ssh_setup:
922
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
923
      utils.CreateBackup(priv_key)
924
      utils.CreateBackup(pub_key)
925

    
926
    return master
927

    
928

    
929
class LUVerifyCluster(LogicalUnit):
930
  """Verifies the cluster status.
931

932
  """
933
  HPATH = "cluster-verify"
934
  HTYPE = constants.HTYPE_CLUSTER
935
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
936
  REQ_BGL = False
937

    
938
  TCLUSTER = "cluster"
939
  TNODE = "node"
940
  TINSTANCE = "instance"
941

    
942
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
943
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
944
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
945
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
946
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
947
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
948
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
949
  ENODEDRBD = (TNODE, "ENODEDRBD")
950
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
951
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
952
  ENODEHV = (TNODE, "ENODEHV")
953
  ENODELVM = (TNODE, "ENODELVM")
954
  ENODEN1 = (TNODE, "ENODEN1")
955
  ENODENET = (TNODE, "ENODENET")
956
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
957
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
958
  ENODERPC = (TNODE, "ENODERPC")
959
  ENODESSH = (TNODE, "ENODESSH")
960
  ENODEVERSION = (TNODE, "ENODEVERSION")
961
  ENODESETUP = (TNODE, "ENODESETUP")
962
  ENODETIME = (TNODE, "ENODETIME")
963

    
964
  ETYPE_FIELD = "code"
965
  ETYPE_ERROR = "ERROR"
966
  ETYPE_WARNING = "WARNING"
967

    
968
  def ExpandNames(self):
969
    self.needed_locks = {
970
      locking.LEVEL_NODE: locking.ALL_SET,
971
      locking.LEVEL_INSTANCE: locking.ALL_SET,
972
    }
973
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
974

    
975
  def _Error(self, ecode, item, msg, *args, **kwargs):
976
    """Format an error message.
977

978
    Based on the opcode's error_codes parameter, either format a
979
    parseable error code, or a simpler error string.
980

981
    This must be called only from Exec and functions called from Exec.
982

983
    """
984
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
985
    itype, etxt = ecode
986
    # first complete the msg
987
    if args:
988
      msg = msg % args
989
    # then format the whole message
990
    if self.op.error_codes:
991
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
992
    else:
993
      if item:
994
        item = " " + item
995
      else:
996
        item = ""
997
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
998
    # and finally report it via the feedback_fn
999
    self._feedback_fn("  - %s" % msg)
1000

    
1001
  def _ErrorIf(self, cond, *args, **kwargs):
1002
    """Log an error message if the passed condition is True.
1003

1004
    """
1005
    cond = bool(cond) or self.op.debug_simulate_errors
1006
    if cond:
1007
      self._Error(*args, **kwargs)
1008
    # do not mark the operation as failed for WARN cases only
1009
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1010
      self.bad = self.bad or cond
1011

    
1012
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
1013
                  node_result, master_files, drbd_map, vg_name):
1014
    """Run multiple tests against a node.
1015

1016
    Test list:
1017

1018
      - compares ganeti version
1019
      - checks vg existence and size > 20G
1020
      - checks config file checksum
1021
      - checks ssh to other nodes
1022

1023
    @type nodeinfo: L{objects.Node}
1024
    @param nodeinfo: the node to check
1025
    @param file_list: required list of files
1026
    @param local_cksum: dictionary of local files and their checksums
1027
    @param node_result: the results from the node
1028
    @param master_files: list of files that only masters should have
1029
    @param drbd_map: the useddrbd minors for this node, in
1030
        form of minor: (instance, must_exist) which correspond to instances
1031
        and their running status
1032
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
1033

1034
    """
1035
    node = nodeinfo.name
1036
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1037

    
1038
    # main result, node_result should be a non-empty dict
1039
    test = not node_result or not isinstance(node_result, dict)
1040
    _ErrorIf(test, self.ENODERPC, node,
1041
                  "unable to verify node: no data returned")
1042
    if test:
1043
      return
1044

    
1045
    # compares ganeti version
1046
    local_version = constants.PROTOCOL_VERSION
1047
    remote_version = node_result.get('version', None)
1048
    test = not (remote_version and
1049
                isinstance(remote_version, (list, tuple)) and
1050
                len(remote_version) == 2)
1051
    _ErrorIf(test, self.ENODERPC, node,
1052
             "connection to node returned invalid data")
1053
    if test:
1054
      return
1055

    
1056
    test = local_version != remote_version[0]
1057
    _ErrorIf(test, self.ENODEVERSION, node,
1058
             "incompatible protocol versions: master %s,"
1059
             " node %s", local_version, remote_version[0])
1060
    if test:
1061
      return
1062

    
1063
    # node seems compatible, we can actually try to look into its results
1064

    
1065
    # full package version
1066
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1067
                  self.ENODEVERSION, node,
1068
                  "software version mismatch: master %s, node %s",
1069
                  constants.RELEASE_VERSION, remote_version[1],
1070
                  code=self.ETYPE_WARNING)
1071

    
1072
    # checks vg existence and size > 20G
1073
    if vg_name is not None:
1074
      vglist = node_result.get(constants.NV_VGLIST, None)
1075
      test = not vglist
1076
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1077
      if not test:
1078
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1079
                                              constants.MIN_VG_SIZE)
1080
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1081

    
1082
    # checks config file checksum
1083

    
1084
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1085
    test = not isinstance(remote_cksum, dict)
1086
    _ErrorIf(test, self.ENODEFILECHECK, node,
1087
             "node hasn't returned file checksum data")
1088
    if not test:
1089
      for file_name in file_list:
1090
        node_is_mc = nodeinfo.master_candidate
1091
        must_have = (file_name not in master_files) or node_is_mc
1092
        # missing
1093
        test1 = file_name not in remote_cksum
1094
        # invalid checksum
1095
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1096
        # existing and good
1097
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1098
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1099
                 "file '%s' missing", file_name)
1100
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1101
                 "file '%s' has wrong checksum", file_name)
1102
        # not candidate and this is not a must-have file
1103
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1104
                 "file '%s' should not exist on non master"
1105
                 " candidates (and the file is outdated)", file_name)
1106
        # all good, except non-master/non-must have combination
1107
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1108
                 "file '%s' should not exist"
1109
                 " on non master candidates", file_name)
1110

    
1111
    # checks ssh to any
1112

    
1113
    test = constants.NV_NODELIST not in node_result
1114
    _ErrorIf(test, self.ENODESSH, node,
1115
             "node hasn't returned node ssh connectivity data")
1116
    if not test:
1117
      if node_result[constants.NV_NODELIST]:
1118
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1119
          _ErrorIf(True, self.ENODESSH, node,
1120
                   "ssh communication with node '%s': %s", a_node, a_msg)
1121

    
1122
    test = constants.NV_NODENETTEST not in node_result
1123
    _ErrorIf(test, self.ENODENET, node,
1124
             "node hasn't returned node tcp connectivity data")
1125
    if not test:
1126
      if node_result[constants.NV_NODENETTEST]:
1127
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1128
        for anode in nlist:
1129
          _ErrorIf(True, self.ENODENET, node,
1130
                   "tcp communication with node '%s': %s",
1131
                   anode, node_result[constants.NV_NODENETTEST][anode])
1132

    
1133
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1134
    if isinstance(hyp_result, dict):
1135
      for hv_name, hv_result in hyp_result.iteritems():
1136
        test = hv_result is not None
1137
        _ErrorIf(test, self.ENODEHV, node,
1138
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1139

    
1140
    # check used drbd list
1141
    if vg_name is not None:
1142
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1143
      test = not isinstance(used_minors, (tuple, list))
1144
      _ErrorIf(test, self.ENODEDRBD, node,
1145
               "cannot parse drbd status file: %s", str(used_minors))
1146
      if not test:
1147
        for minor, (iname, must_exist) in drbd_map.items():
1148
          test = minor not in used_minors and must_exist
1149
          _ErrorIf(test, self.ENODEDRBD, node,
1150
                   "drbd minor %d of instance %s is not active",
1151
                   minor, iname)
1152
        for minor in used_minors:
1153
          test = minor not in drbd_map
1154
          _ErrorIf(test, self.ENODEDRBD, node,
1155
                   "unallocated drbd minor %d is in use", minor)
1156
    test = node_result.get(constants.NV_NODESETUP,
1157
                           ["Missing NODESETUP results"])
1158
    _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1159
             "; ".join(test))
1160

    
1161
    # check pv names
1162
    if vg_name is not None:
1163
      pvlist = node_result.get(constants.NV_PVLIST, None)
1164
      test = pvlist is None
1165
      _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1166
      if not test:
1167
        # check that ':' is not present in PV names, since it's a
1168
        # special character for lvcreate (denotes the range of PEs to
1169
        # use on the PV)
1170
        for _, pvname, owner_vg in pvlist:
1171
          test = ":" in pvname
1172
          _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1173
                   " '%s' of VG '%s'", pvname, owner_vg)
1174

    
1175
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1176
                      node_instance, n_offline):
1177
    """Verify an instance.
1178

1179
    This function checks to see if the required block devices are
1180
    available on the instance's node.
1181

1182
    """
1183
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1184
    node_current = instanceconfig.primary_node
1185

    
1186
    node_vol_should = {}
1187
    instanceconfig.MapLVsByNode(node_vol_should)
1188

    
1189
    for node in node_vol_should:
1190
      if node in n_offline:
1191
        # ignore missing volumes on offline nodes
1192
        continue
1193
      for volume in node_vol_should[node]:
1194
        test = node not in node_vol_is or volume not in node_vol_is[node]
1195
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1196
                 "volume %s missing on node %s", volume, node)
1197

    
1198
    if instanceconfig.admin_up:
1199
      test = ((node_current not in node_instance or
1200
               not instance in node_instance[node_current]) and
1201
              node_current not in n_offline)
1202
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1203
               "instance not running on its primary node %s",
1204
               node_current)
1205

    
1206
    for node in node_instance:
1207
      if (not node == node_current):
1208
        test = instance in node_instance[node]
1209
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1210
                 "instance should not run on node %s", node)
1211

    
1212
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1213
    """Verify if there are any unknown volumes in the cluster.
1214

1215
    The .os, .swap and backup volumes are ignored. All other volumes are
1216
    reported as unknown.
1217

1218
    """
1219
    for node in node_vol_is:
1220
      for volume in node_vol_is[node]:
1221
        test = (node not in node_vol_should or
1222
                volume not in node_vol_should[node])
1223
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1224
                      "volume %s is unknown", volume)
1225

    
1226
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1227
    """Verify the list of running instances.
1228

1229
    This checks what instances are running but unknown to the cluster.
1230

1231
    """
1232
    for node in node_instance:
1233
      for o_inst in node_instance[node]:
1234
        test = o_inst not in instancelist
1235
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1236
                      "instance %s on node %s should not exist", o_inst, node)
1237

    
1238
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1239
    """Verify N+1 Memory Resilience.
1240

1241
    Check that if one single node dies we can still start all the instances it
1242
    was primary for.
1243

1244
    """
1245
    for node, nodeinfo in node_info.iteritems():
1246
      # This code checks that every node which is now listed as secondary has
1247
      # enough memory to host all instances it is supposed to should a single
1248
      # other node in the cluster fail.
1249
      # FIXME: not ready for failover to an arbitrary node
1250
      # FIXME: does not support file-backed instances
1251
      # WARNING: we currently take into account down instances as well as up
1252
      # ones, considering that even if they're down someone might want to start
1253
      # them even in the event of a node failure.
1254
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1255
        needed_mem = 0
1256
        for instance in instances:
1257
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1258
          if bep[constants.BE_AUTO_BALANCE]:
1259
            needed_mem += bep[constants.BE_MEMORY]
1260
        test = nodeinfo['mfree'] < needed_mem
1261
        self._ErrorIf(test, self.ENODEN1, node,
1262
                      "not enough memory on to accommodate"
1263
                      " failovers should peer node %s fail", prinode)
1264

    
1265
  def CheckPrereq(self):
1266
    """Check prerequisites.
1267

1268
    Transform the list of checks we're going to skip into a set and check that
1269
    all its members are valid.
1270

1271
    """
1272
    self.skip_set = frozenset(self.op.skip_checks)
1273
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1274
      raise errors.OpPrereqError("Invalid checks to be skipped specified",
1275
                                 errors.ECODE_INVAL)
1276

    
1277
  def BuildHooksEnv(self):
1278
    """Build hooks env.
1279

1280
    Cluster-Verify hooks just ran in the post phase and their failure makes
1281
    the output be logged in the verify output and the verification to fail.
1282

1283
    """
1284
    all_nodes = self.cfg.GetNodeList()
1285
    env = {
1286
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1287
      }
1288
    for node in self.cfg.GetAllNodesInfo().values():
1289
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1290

    
1291
    return env, [], all_nodes
1292

    
1293
  def Exec(self, feedback_fn):
1294
    """Verify integrity of cluster, performing various test on nodes.
1295

1296
    """
1297
    self.bad = False
1298
    _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1299
    verbose = self.op.verbose
1300
    self._feedback_fn = feedback_fn
1301
    feedback_fn("* Verifying global settings")
1302
    for msg in self.cfg.VerifyConfig():
1303
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1304

    
1305
    vg_name = self.cfg.GetVGName()
1306
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1307
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1308
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1309
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1310
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1311
                        for iname in instancelist)
1312
    i_non_redundant = [] # Non redundant instances
1313
    i_non_a_balanced = [] # Non auto-balanced instances
1314
    n_offline = [] # List of offline nodes
1315
    n_drained = [] # List of nodes being drained
1316
    node_volume = {}
1317
    node_instance = {}
1318
    node_info = {}
1319
    instance_cfg = {}
1320

    
1321
    # FIXME: verify OS list
1322
    # do local checksums
1323
    master_files = [constants.CLUSTER_CONF_FILE]
1324

    
1325
    file_names = ssconf.SimpleStore().GetFileList()
1326
    file_names.append(constants.SSL_CERT_FILE)
1327
    file_names.append(constants.RAPI_CERT_FILE)
1328
    file_names.extend(master_files)
1329

    
1330
    local_checksums = utils.FingerprintFiles(file_names)
1331

    
1332
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1333
    node_verify_param = {
1334
      constants.NV_FILELIST: file_names,
1335
      constants.NV_NODELIST: [node.name for node in nodeinfo
1336
                              if not node.offline],
1337
      constants.NV_HYPERVISOR: hypervisors,
1338
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1339
                                  node.secondary_ip) for node in nodeinfo
1340
                                 if not node.offline],
1341
      constants.NV_INSTANCELIST: hypervisors,
1342
      constants.NV_VERSION: None,
1343
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1344
      constants.NV_NODESETUP: None,
1345
      constants.NV_TIME: None,
1346
      }
1347

    
1348
    if vg_name is not None:
1349
      node_verify_param[constants.NV_VGLIST] = None
1350
      node_verify_param[constants.NV_LVLIST] = vg_name
1351
      node_verify_param[constants.NV_PVLIST] = [vg_name]
1352
      node_verify_param[constants.NV_DRBDLIST] = None
1353

    
1354
    # Due to the way our RPC system works, exact response times cannot be
1355
    # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1356
    # time before and after executing the request, we can at least have a time
1357
    # window.
1358
    nvinfo_starttime = time.time()
1359
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1360
                                           self.cfg.GetClusterName())
1361
    nvinfo_endtime = time.time()
1362

    
1363
    cluster = self.cfg.GetClusterInfo()
1364
    master_node = self.cfg.GetMasterNode()
1365
    all_drbd_map = self.cfg.ComputeDRBDMap()
1366

    
1367
    feedback_fn("* Verifying node status")
1368
    for node_i in nodeinfo:
1369
      node = node_i.name
1370

    
1371
      if node_i.offline:
1372
        if verbose:
1373
          feedback_fn("* Skipping offline node %s" % (node,))
1374
        n_offline.append(node)
1375
        continue
1376

    
1377
      if node == master_node:
1378
        ntype = "master"
1379
      elif node_i.master_candidate:
1380
        ntype = "master candidate"
1381
      elif node_i.drained:
1382
        ntype = "drained"
1383
        n_drained.append(node)
1384
      else:
1385
        ntype = "regular"
1386
      if verbose:
1387
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1388

    
1389
      msg = all_nvinfo[node].fail_msg
1390
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1391
      if msg:
1392
        continue
1393

    
1394
      nresult = all_nvinfo[node].payload
1395
      node_drbd = {}
1396
      for minor, instance in all_drbd_map[node].items():
1397
        test = instance not in instanceinfo
1398
        _ErrorIf(test, self.ECLUSTERCFG, None,
1399
                 "ghost instance '%s' in temporary DRBD map", instance)
1400
          # ghost instance should not be running, but otherwise we
1401
          # don't give double warnings (both ghost instance and
1402
          # unallocated minor in use)
1403
        if test:
1404
          node_drbd[minor] = (instance, False)
1405
        else:
1406
          instance = instanceinfo[instance]
1407
          node_drbd[minor] = (instance.name, instance.admin_up)
1408

    
1409
      self._VerifyNode(node_i, file_names, local_checksums,
1410
                       nresult, master_files, node_drbd, vg_name)
1411

    
1412
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1413
      if vg_name is None:
1414
        node_volume[node] = {}
1415
      elif isinstance(lvdata, basestring):
1416
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1417
                 utils.SafeEncode(lvdata))
1418
        node_volume[node] = {}
1419
      elif not isinstance(lvdata, dict):
1420
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1421
        continue
1422
      else:
1423
        node_volume[node] = lvdata
1424

    
1425
      # node_instance
1426
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1427
      test = not isinstance(idata, list)
1428
      _ErrorIf(test, self.ENODEHV, node,
1429
               "rpc call to node failed (instancelist)")
1430
      if test:
1431
        continue
1432

    
1433
      node_instance[node] = idata
1434

    
1435
      # node_info
1436
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1437
      test = not isinstance(nodeinfo, dict)
1438
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1439
      if test:
1440
        continue
1441

    
1442
      # Node time
1443
      ntime = nresult.get(constants.NV_TIME, None)
1444
      try:
1445
        ntime_merged = utils.MergeTime(ntime)
1446
      except (ValueError, TypeError):
1447
        _ErrorIf(test, self.ENODETIME, node, "Node returned invalid time")
1448

    
1449
      if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1450
        ntime_diff = abs(nvinfo_starttime - ntime_merged)
1451
      elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1452
        ntime_diff = abs(ntime_merged - nvinfo_endtime)
1453
      else:
1454
        ntime_diff = None
1455

    
1456
      _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1457
               "Node time diverges by at least %0.1fs from master node time",
1458
               ntime_diff)
1459

    
1460
      if ntime_diff is not None:
1461
        continue
1462

    
1463
      try:
1464
        node_info[node] = {
1465
          "mfree": int(nodeinfo['memory_free']),
1466
          "pinst": [],
1467
          "sinst": [],
1468
          # dictionary holding all instances this node is secondary for,
1469
          # grouped by their primary node. Each key is a cluster node, and each
1470
          # value is a list of instances which have the key as primary and the
1471
          # current node as secondary.  this is handy to calculate N+1 memory
1472
          # availability if you can only failover from a primary to its
1473
          # secondary.
1474
          "sinst-by-pnode": {},
1475
        }
1476
        # FIXME: devise a free space model for file based instances as well
1477
        if vg_name is not None:
1478
          test = (constants.NV_VGLIST not in nresult or
1479
                  vg_name not in nresult[constants.NV_VGLIST])
1480
          _ErrorIf(test, self.ENODELVM, node,
1481
                   "node didn't return data for the volume group '%s'"
1482
                   " - it is either missing or broken", vg_name)
1483
          if test:
1484
            continue
1485
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1486
      except (ValueError, KeyError):
1487
        _ErrorIf(True, self.ENODERPC, node,
1488
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1489
        continue
1490

    
1491
    node_vol_should = {}
1492

    
1493
    feedback_fn("* Verifying instance status")
1494
    for instance in instancelist:
1495
      if verbose:
1496
        feedback_fn("* Verifying instance %s" % instance)
1497
      inst_config = instanceinfo[instance]
1498
      self._VerifyInstance(instance, inst_config, node_volume,
1499
                           node_instance, n_offline)
1500
      inst_nodes_offline = []
1501

    
1502
      inst_config.MapLVsByNode(node_vol_should)
1503

    
1504
      instance_cfg[instance] = inst_config
1505

    
1506
      pnode = inst_config.primary_node
1507
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1508
               self.ENODERPC, pnode, "instance %s, connection to"
1509
               " primary node failed", instance)
1510
      if pnode in node_info:
1511
        node_info[pnode]['pinst'].append(instance)
1512

    
1513
      if pnode in n_offline:
1514
        inst_nodes_offline.append(pnode)
1515

    
1516
      # If the instance is non-redundant we cannot survive losing its primary
1517
      # node, so we are not N+1 compliant. On the other hand we have no disk
1518
      # templates with more than one secondary so that situation is not well
1519
      # supported either.
1520
      # FIXME: does not support file-backed instances
1521
      if len(inst_config.secondary_nodes) == 0:
1522
        i_non_redundant.append(instance)
1523
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1524
               self.EINSTANCELAYOUT, instance,
1525
               "instance has multiple secondary nodes", code="WARNING")
1526

    
1527
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1528
        i_non_a_balanced.append(instance)
1529

    
1530
      for snode in inst_config.secondary_nodes:
1531
        _ErrorIf(snode not in node_info and snode not in n_offline,
1532
                 self.ENODERPC, snode,
1533
                 "instance %s, connection to secondary node"
1534
                 "failed", instance)
1535

    
1536
        if snode in node_info:
1537
          node_info[snode]['sinst'].append(instance)
1538
          if pnode not in node_info[snode]['sinst-by-pnode']:
1539
            node_info[snode]['sinst-by-pnode'][pnode] = []
1540
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1541

    
1542
        if snode in n_offline:
1543
          inst_nodes_offline.append(snode)
1544

    
1545
      # warn that the instance lives on offline nodes
1546
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1547
               "instance lives on offline node(s) %s",
1548
               utils.CommaJoin(inst_nodes_offline))
1549

    
1550
    feedback_fn("* Verifying orphan volumes")
1551
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1552

    
1553
    feedback_fn("* Verifying remaining instances")
1554
    self._VerifyOrphanInstances(instancelist, node_instance)
1555

    
1556
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1557
      feedback_fn("* Verifying N+1 Memory redundancy")
1558
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1559

    
1560
    feedback_fn("* Other Notes")
1561
    if i_non_redundant:
1562
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1563
                  % len(i_non_redundant))
1564

    
1565
    if i_non_a_balanced:
1566
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1567
                  % len(i_non_a_balanced))
1568

    
1569
    if n_offline:
1570
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1571

    
1572
    if n_drained:
1573
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1574

    
1575
    return not self.bad
1576

    
1577
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1578
    """Analyze the post-hooks' result
1579

1580
    This method analyses the hook result, handles it, and sends some
1581
    nicely-formatted feedback back to the user.
1582

1583
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1584
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1585
    @param hooks_results: the results of the multi-node hooks rpc call
1586
    @param feedback_fn: function used send feedback back to the caller
1587
    @param lu_result: previous Exec result
1588
    @return: the new Exec result, based on the previous result
1589
        and hook results
1590

1591
    """
1592
    # We only really run POST phase hooks, and are only interested in
1593
    # their results
1594
    if phase == constants.HOOKS_PHASE_POST:
1595
      # Used to change hooks' output to proper indentation
1596
      indent_re = re.compile('^', re.M)
1597
      feedback_fn("* Hooks Results")
1598
      assert hooks_results, "invalid result from hooks"
1599

    
1600
      for node_name in hooks_results:
1601
        res = hooks_results[node_name]
1602
        msg = res.fail_msg
1603
        test = msg and not res.offline
1604
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1605
                      "Communication failure in hooks execution: %s", msg)
1606
        if res.offline or msg:
1607
          # No need to investigate payload if node is offline or gave an error.
1608
          # override manually lu_result here as _ErrorIf only
1609
          # overrides self.bad
1610
          lu_result = 1
1611
          continue
1612
        for script, hkr, output in res.payload:
1613
          test = hkr == constants.HKR_FAIL
1614
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1615
                        "Script %s failed, output:", script)
1616
          if test:
1617
            output = indent_re.sub('      ', output)
1618
            feedback_fn("%s" % output)
1619
            lu_result = 1
1620

    
1621
      return lu_result
1622

    
1623

    
1624
class LUVerifyDisks(NoHooksLU):
1625
  """Verifies the cluster disks status.
1626

1627
  """
1628
  _OP_REQP = []
1629
  REQ_BGL = False
1630

    
1631
  def ExpandNames(self):
1632
    self.needed_locks = {
1633
      locking.LEVEL_NODE: locking.ALL_SET,
1634
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1635
    }
1636
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1637

    
1638
  def CheckPrereq(self):
1639
    """Check prerequisites.
1640

1641
    This has no prerequisites.
1642

1643
    """
1644
    pass
1645

    
1646
  def Exec(self, feedback_fn):
1647
    """Verify integrity of cluster disks.
1648

1649
    @rtype: tuple of three items
1650
    @return: a tuple of (dict of node-to-node_error, list of instances
1651
        which need activate-disks, dict of instance: (node, volume) for
1652
        missing volumes
1653

1654
    """
1655
    result = res_nodes, res_instances, res_missing = {}, [], {}
1656

    
1657
    vg_name = self.cfg.GetVGName()
1658
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1659
    instances = [self.cfg.GetInstanceInfo(name)
1660
                 for name in self.cfg.GetInstanceList()]
1661

    
1662
    nv_dict = {}
1663
    for inst in instances:
1664
      inst_lvs = {}
1665
      if (not inst.admin_up or
1666
          inst.disk_template not in constants.DTS_NET_MIRROR):
1667
        continue
1668
      inst.MapLVsByNode(inst_lvs)
1669
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1670
      for node, vol_list in inst_lvs.iteritems():
1671
        for vol in vol_list:
1672
          nv_dict[(node, vol)] = inst
1673

    
1674
    if not nv_dict:
1675
      return result
1676

    
1677
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1678

    
1679
    for node in nodes:
1680
      # node_volume
1681
      node_res = node_lvs[node]
1682
      if node_res.offline:
1683
        continue
1684
      msg = node_res.fail_msg
1685
      if msg:
1686
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1687
        res_nodes[node] = msg
1688
        continue
1689

    
1690
      lvs = node_res.payload
1691
      for lv_name, (_, _, lv_online) in lvs.items():
1692
        inst = nv_dict.pop((node, lv_name), None)
1693
        if (not lv_online and inst is not None
1694
            and inst.name not in res_instances):
1695
          res_instances.append(inst.name)
1696

    
1697
    # any leftover items in nv_dict are missing LVs, let's arrange the
1698
    # data better
1699
    for key, inst in nv_dict.iteritems():
1700
      if inst.name not in res_missing:
1701
        res_missing[inst.name] = []
1702
      res_missing[inst.name].append(key)
1703

    
1704
    return result
1705

    
1706

    
1707
class LURepairDiskSizes(NoHooksLU):
1708
  """Verifies the cluster disks sizes.
1709

1710
  """
1711
  _OP_REQP = ["instances"]
1712
  REQ_BGL = False
1713

    
1714
  def ExpandNames(self):
1715
    if not isinstance(self.op.instances, list):
1716
      raise errors.OpPrereqError("Invalid argument type 'instances'",
1717
                                 errors.ECODE_INVAL)
1718

    
1719
    if self.op.instances:
1720
      self.wanted_names = []
1721
      for name in self.op.instances:
1722
        full_name = self.cfg.ExpandInstanceName(name)
1723
        if full_name is None:
1724
          raise errors.OpPrereqError("Instance '%s' not known" % name,
1725
                                     errors.ECODE_NOENT)
1726
        self.wanted_names.append(full_name)
1727
      self.needed_locks = {
1728
        locking.LEVEL_NODE: [],
1729
        locking.LEVEL_INSTANCE: self.wanted_names,
1730
        }
1731
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1732
    else:
1733
      self.wanted_names = None
1734
      self.needed_locks = {
1735
        locking.LEVEL_NODE: locking.ALL_SET,
1736
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1737
        }
1738
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1739

    
1740
  def DeclareLocks(self, level):
1741
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1742
      self._LockInstancesNodes(primary_only=True)
1743

    
1744
  def CheckPrereq(self):
1745
    """Check prerequisites.
1746

1747
    This only checks the optional instance list against the existing names.
1748

1749
    """
1750
    if self.wanted_names is None:
1751
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1752

    
1753
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1754
                             in self.wanted_names]
1755

    
1756
  def _EnsureChildSizes(self, disk):
1757
    """Ensure children of the disk have the needed disk size.
1758

1759
    This is valid mainly for DRBD8 and fixes an issue where the
1760
    children have smaller disk size.
1761

1762
    @param disk: an L{ganeti.objects.Disk} object
1763

1764
    """
1765
    if disk.dev_type == constants.LD_DRBD8:
1766
      assert disk.children, "Empty children for DRBD8?"
1767
      fchild = disk.children[0]
1768
      mismatch = fchild.size < disk.size
1769
      if mismatch:
1770
        self.LogInfo("Child disk has size %d, parent %d, fixing",
1771
                     fchild.size, disk.size)
1772
        fchild.size = disk.size
1773

    
1774
      # and we recurse on this child only, not on the metadev
1775
      return self._EnsureChildSizes(fchild) or mismatch
1776
    else:
1777
      return False
1778

    
1779
  def Exec(self, feedback_fn):
1780
    """Verify the size of cluster disks.
1781

1782
    """
1783
    # TODO: check child disks too
1784
    # TODO: check differences in size between primary/secondary nodes
1785
    per_node_disks = {}
1786
    for instance in self.wanted_instances:
1787
      pnode = instance.primary_node
1788
      if pnode not in per_node_disks:
1789
        per_node_disks[pnode] = []
1790
      for idx, disk in enumerate(instance.disks):
1791
        per_node_disks[pnode].append((instance, idx, disk))
1792

    
1793
    changed = []
1794
    for node, dskl in per_node_disks.items():
1795
      newl = [v[2].Copy() for v in dskl]
1796
      for dsk in newl:
1797
        self.cfg.SetDiskID(dsk, node)
1798
      result = self.rpc.call_blockdev_getsizes(node, newl)
1799
      if result.fail_msg:
1800
        self.LogWarning("Failure in blockdev_getsizes call to node"
1801
                        " %s, ignoring", node)
1802
        continue
1803
      if len(result.data) != len(dskl):
1804
        self.LogWarning("Invalid result from node %s, ignoring node results",
1805
                        node)
1806
        continue
1807
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1808
        if size is None:
1809
          self.LogWarning("Disk %d of instance %s did not return size"
1810
                          " information, ignoring", idx, instance.name)
1811
          continue
1812
        if not isinstance(size, (int, long)):
1813
          self.LogWarning("Disk %d of instance %s did not return valid"
1814
                          " size information, ignoring", idx, instance.name)
1815
          continue
1816
        size = size >> 20
1817
        if size != disk.size:
1818
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1819
                       " correcting: recorded %d, actual %d", idx,
1820
                       instance.name, disk.size, size)
1821
          disk.size = size
1822
          self.cfg.Update(instance, feedback_fn)
1823
          changed.append((instance.name, idx, size))
1824
        if self._EnsureChildSizes(disk):
1825
          self.cfg.Update(instance, feedback_fn)
1826
          changed.append((instance.name, idx, disk.size))
1827
    return changed
1828

    
1829

    
1830
class LURenameCluster(LogicalUnit):
1831
  """Rename the cluster.
1832

1833
  """
1834
  HPATH = "cluster-rename"
1835
  HTYPE = constants.HTYPE_CLUSTER
1836
  _OP_REQP = ["name"]
1837

    
1838
  def BuildHooksEnv(self):
1839
    """Build hooks env.
1840

1841
    """
1842
    env = {
1843
      "OP_TARGET": self.cfg.GetClusterName(),
1844
      "NEW_NAME": self.op.name,
1845
      }
1846
    mn = self.cfg.GetMasterNode()
1847
    all_nodes = self.cfg.GetNodeList()
1848
    return env, [mn], all_nodes
1849

    
1850
  def CheckPrereq(self):
1851
    """Verify that the passed name is a valid one.
1852

1853
    """
1854
    hostname = utils.GetHostInfo(self.op.name)
1855

    
1856
    new_name = hostname.name
1857
    self.ip = new_ip = hostname.ip
1858
    old_name = self.cfg.GetClusterName()
1859
    old_ip = self.cfg.GetMasterIP()
1860
    if new_name == old_name and new_ip == old_ip:
1861
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1862
                                 " cluster has changed",
1863
                                 errors.ECODE_INVAL)
1864
    if new_ip != old_ip:
1865
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1866
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1867
                                   " reachable on the network. Aborting." %
1868
                                   new_ip, errors.ECODE_NOTUNIQUE)
1869

    
1870
    self.op.name = new_name
1871

    
1872
  def Exec(self, feedback_fn):
1873
    """Rename the cluster.
1874

1875
    """
1876
    clustername = self.op.name
1877
    ip = self.ip
1878

    
1879
    # shutdown the master IP
1880
    master = self.cfg.GetMasterNode()
1881
    result = self.rpc.call_node_stop_master(master, False)
1882
    result.Raise("Could not disable the master role")
1883

    
1884
    try:
1885
      cluster = self.cfg.GetClusterInfo()
1886
      cluster.cluster_name = clustername
1887
      cluster.master_ip = ip
1888
      self.cfg.Update(cluster, feedback_fn)
1889

    
1890
      # update the known hosts file
1891
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1892
      node_list = self.cfg.GetNodeList()
1893
      try:
1894
        node_list.remove(master)
1895
      except ValueError:
1896
        pass
1897
      result = self.rpc.call_upload_file(node_list,
1898
                                         constants.SSH_KNOWN_HOSTS_FILE)
1899
      for to_node, to_result in result.iteritems():
1900
        msg = to_result.fail_msg
1901
        if msg:
1902
          msg = ("Copy of file %s to node %s failed: %s" %
1903
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1904
          self.proc.LogWarning(msg)
1905

    
1906
    finally:
1907
      result = self.rpc.call_node_start_master(master, False, False)
1908
      msg = result.fail_msg
1909
      if msg:
1910
        self.LogWarning("Could not re-enable the master role on"
1911
                        " the master, please restart manually: %s", msg)
1912

    
1913

    
1914
def _RecursiveCheckIfLVMBased(disk):
1915
  """Check if the given disk or its children are lvm-based.
1916

1917
  @type disk: L{objects.Disk}
1918
  @param disk: the disk to check
1919
  @rtype: boolean
1920
  @return: boolean indicating whether a LD_LV dev_type was found or not
1921

1922
  """
1923
  if disk.children:
1924
    for chdisk in disk.children:
1925
      if _RecursiveCheckIfLVMBased(chdisk):
1926
        return True
1927
  return disk.dev_type == constants.LD_LV
1928

    
1929

    
1930
class LUSetClusterParams(LogicalUnit):
1931
  """Change the parameters of the cluster.
1932

1933
  """
1934
  HPATH = "cluster-modify"
1935
  HTYPE = constants.HTYPE_CLUSTER
1936
  _OP_REQP = []
1937
  REQ_BGL = False
1938

    
1939
  def CheckArguments(self):
1940
    """Check parameters
1941

1942
    """
1943
    if not hasattr(self.op, "candidate_pool_size"):
1944
      self.op.candidate_pool_size = None
1945
    if self.op.candidate_pool_size is not None:
1946
      try:
1947
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1948
      except (ValueError, TypeError), err:
1949
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1950
                                   str(err), errors.ECODE_INVAL)
1951
      if self.op.candidate_pool_size < 1:
1952
        raise errors.OpPrereqError("At least one master candidate needed",
1953
                                   errors.ECODE_INVAL)
1954

    
1955
  def ExpandNames(self):
1956
    # FIXME: in the future maybe other cluster params won't require checking on
1957
    # all nodes to be modified.
1958
    self.needed_locks = {
1959
      locking.LEVEL_NODE: locking.ALL_SET,
1960
    }
1961
    self.share_locks[locking.LEVEL_NODE] = 1
1962

    
1963
  def BuildHooksEnv(self):
1964
    """Build hooks env.
1965

1966
    """
1967
    env = {
1968
      "OP_TARGET": self.cfg.GetClusterName(),
1969
      "NEW_VG_NAME": self.op.vg_name,
1970
      }
1971
    mn = self.cfg.GetMasterNode()
1972
    return env, [mn], [mn]
1973

    
1974
  def CheckPrereq(self):
1975
    """Check prerequisites.
1976

1977
    This checks whether the given params don't conflict and
1978
    if the given volume group is valid.
1979

1980
    """
1981
    if self.op.vg_name is not None and not self.op.vg_name:
1982
      instances = self.cfg.GetAllInstancesInfo().values()
1983
      for inst in instances:
1984
        for disk in inst.disks:
1985
          if _RecursiveCheckIfLVMBased(disk):
1986
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1987
                                       " lvm-based instances exist",
1988
                                       errors.ECODE_INVAL)
1989

    
1990
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1991

    
1992
    # if vg_name not None, checks given volume group on all nodes
1993
    if self.op.vg_name:
1994
      vglist = self.rpc.call_vg_list(node_list)
1995
      for node in node_list:
1996
        msg = vglist[node].fail_msg
1997
        if msg:
1998
          # ignoring down node
1999
          self.LogWarning("Error while gathering data on node %s"
2000
                          " (ignoring node): %s", node, msg)
2001
          continue
2002
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2003
                                              self.op.vg_name,
2004
                                              constants.MIN_VG_SIZE)
2005
        if vgstatus:
2006
          raise errors.OpPrereqError("Error on node '%s': %s" %
2007
                                     (node, vgstatus), errors.ECODE_ENVIRON)
2008

    
2009
    self.cluster = cluster = self.cfg.GetClusterInfo()
2010
    # validate params changes
2011
    if self.op.beparams:
2012
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2013
      self.new_beparams = objects.FillDict(
2014
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
2015

    
2016
    if self.op.nicparams:
2017
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2018
      self.new_nicparams = objects.FillDict(
2019
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
2020
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
2021
      nic_errors = []
2022

    
2023
      # check all instances for consistency
2024
      for instance in self.cfg.GetAllInstancesInfo().values():
2025
        for nic_idx, nic in enumerate(instance.nics):
2026
          params_copy = copy.deepcopy(nic.nicparams)
2027
          params_filled = objects.FillDict(self.new_nicparams, params_copy)
2028

    
2029
          # check parameter syntax
2030
          try:
2031
            objects.NIC.CheckParameterSyntax(params_filled)
2032
          except errors.ConfigurationError, err:
2033
            nic_errors.append("Instance %s, nic/%d: %s" %
2034
                              (instance.name, nic_idx, err))
2035

    
2036
          # if we're moving instances to routed, check that they have an ip
2037
          target_mode = params_filled[constants.NIC_MODE]
2038
          if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2039
            nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2040
                              (instance.name, nic_idx))
2041
      if nic_errors:
2042
        raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2043
                                   "\n".join(nic_errors))
2044

    
2045
    # hypervisor list/parameters
2046
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
2047
    if self.op.hvparams:
2048
      if not isinstance(self.op.hvparams, dict):
2049
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2050
                                   errors.ECODE_INVAL)
2051
      for hv_name, hv_dict in self.op.hvparams.items():
2052
        if hv_name not in self.new_hvparams:
2053
          self.new_hvparams[hv_name] = hv_dict
2054
        else:
2055
          self.new_hvparams[hv_name].update(hv_dict)
2056

    
2057
    if self.op.enabled_hypervisors is not None:
2058
      self.hv_list = self.op.enabled_hypervisors
2059
      if not self.hv_list:
2060
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2061
                                   " least one member",
2062
                                   errors.ECODE_INVAL)
2063
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2064
      if invalid_hvs:
2065
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2066
                                   " entries: %s" %
2067
                                   utils.CommaJoin(invalid_hvs),
2068
                                   errors.ECODE_INVAL)
2069
    else:
2070
      self.hv_list = cluster.enabled_hypervisors
2071

    
2072
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
2073
      # either the enabled list has changed, or the parameters have, validate
2074
      for hv_name, hv_params in self.new_hvparams.items():
2075
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
2076
            (self.op.enabled_hypervisors and
2077
             hv_name in self.op.enabled_hypervisors)):
2078
          # either this is a new hypervisor, or its parameters have changed
2079
          hv_class = hypervisor.GetHypervisor(hv_name)
2080
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2081
          hv_class.CheckParameterSyntax(hv_params)
2082
          _CheckHVParams(self, node_list, hv_name, hv_params)
2083

    
2084
  def Exec(self, feedback_fn):
2085
    """Change the parameters of the cluster.
2086

2087
    """
2088
    if self.op.vg_name is not None:
2089
      new_volume = self.op.vg_name
2090
      if not new_volume:
2091
        new_volume = None
2092
      if new_volume != self.cfg.GetVGName():
2093
        self.cfg.SetVGName(new_volume)
2094
      else:
2095
        feedback_fn("Cluster LVM configuration already in desired"
2096
                    " state, not changing")
2097
    if self.op.hvparams:
2098
      self.cluster.hvparams = self.new_hvparams
2099
    if self.op.enabled_hypervisors is not None:
2100
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2101
    if self.op.beparams:
2102
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2103
    if self.op.nicparams:
2104
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2105

    
2106
    if self.op.candidate_pool_size is not None:
2107
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
2108
      # we need to update the pool size here, otherwise the save will fail
2109
      _AdjustCandidatePool(self, [])
2110

    
2111
    self.cfg.Update(self.cluster, feedback_fn)
2112

    
2113

    
2114
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2115
  """Distribute additional files which are part of the cluster configuration.
2116

2117
  ConfigWriter takes care of distributing the config and ssconf files, but
2118
  there are more files which should be distributed to all nodes. This function
2119
  makes sure those are copied.
2120

2121
  @param lu: calling logical unit
2122
  @param additional_nodes: list of nodes not in the config to distribute to
2123

2124
  """
2125
  # 1. Gather target nodes
2126
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2127
  dist_nodes = lu.cfg.GetNodeList()
2128
  if additional_nodes is not None:
2129
    dist_nodes.extend(additional_nodes)
2130
  if myself.name in dist_nodes:
2131
    dist_nodes.remove(myself.name)
2132

    
2133
  # 2. Gather files to distribute
2134
  dist_files = set([constants.ETC_HOSTS,
2135
                    constants.SSH_KNOWN_HOSTS_FILE,
2136
                    constants.RAPI_CERT_FILE,
2137
                    constants.RAPI_USERS_FILE,
2138
                    constants.HMAC_CLUSTER_KEY,
2139
                   ])
2140

    
2141
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2142
  for hv_name in enabled_hypervisors:
2143
    hv_class = hypervisor.GetHypervisor(hv_name)
2144
    dist_files.update(hv_class.GetAncillaryFiles())
2145

    
2146
  # 3. Perform the files upload
2147
  for fname in dist_files:
2148
    if os.path.exists(fname):
2149
      result = lu.rpc.call_upload_file(dist_nodes, fname)
2150
      for to_node, to_result in result.items():
2151
        msg = to_result.fail_msg
2152
        if msg:
2153
          msg = ("Copy of file %s to node %s failed: %s" %
2154
                 (fname, to_node, msg))
2155
          lu.proc.LogWarning(msg)
2156

    
2157

    
2158
class LURedistributeConfig(NoHooksLU):
2159
  """Force the redistribution of cluster configuration.
2160

2161
  This is a very simple LU.
2162

2163
  """
2164
  _OP_REQP = []
2165
  REQ_BGL = False
2166

    
2167
  def ExpandNames(self):
2168
    self.needed_locks = {
2169
      locking.LEVEL_NODE: locking.ALL_SET,
2170
    }
2171
    self.share_locks[locking.LEVEL_NODE] = 1
2172

    
2173
  def CheckPrereq(self):
2174
    """Check prerequisites.
2175

2176
    """
2177

    
2178
  def Exec(self, feedback_fn):
2179
    """Redistribute the configuration.
2180

2181
    """
2182
    self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2183
    _RedistributeAncillaryFiles(self)
2184

    
2185

    
2186
def _WaitForSync(lu, instance, oneshot=False):
2187
  """Sleep and poll for an instance's disk to sync.
2188

2189
  """
2190
  if not instance.disks:
2191
    return True
2192

    
2193
  if not oneshot:
2194
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2195

    
2196
  node = instance.primary_node
2197

    
2198
  for dev in instance.disks:
2199
    lu.cfg.SetDiskID(dev, node)
2200

    
2201
  # TODO: Convert to utils.Retry
2202

    
2203
  retries = 0
2204
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2205
  while True:
2206
    max_time = 0
2207
    done = True
2208
    cumul_degraded = False
2209
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2210
    msg = rstats.fail_msg
2211
    if msg:
2212
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2213
      retries += 1
2214
      if retries >= 10:
2215
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2216
                                 " aborting." % node)
2217
      time.sleep(6)
2218
      continue
2219
    rstats = rstats.payload
2220
    retries = 0
2221
    for i, mstat in enumerate(rstats):
2222
      if mstat is None:
2223
        lu.LogWarning("Can't compute data for node %s/%s",
2224
                           node, instance.disks[i].iv_name)
2225
        continue
2226

    
2227
      cumul_degraded = (cumul_degraded or
2228
                        (mstat.is_degraded and mstat.sync_percent is None))
2229
      if mstat.sync_percent is not None:
2230
        done = False
2231
        if mstat.estimated_time is not None:
2232
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2233
          max_time = mstat.estimated_time
2234
        else:
2235
          rem_time = "no time estimate"
2236
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2237
                        (instance.disks[i].iv_name, mstat.sync_percent,
2238
                         rem_time))
2239

    
2240
    # if we're done but degraded, let's do a few small retries, to
2241
    # make sure we see a stable and not transient situation; therefore
2242
    # we force restart of the loop
2243
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2244
      logging.info("Degraded disks found, %d retries left", degr_retries)
2245
      degr_retries -= 1
2246
      time.sleep(1)
2247
      continue
2248

    
2249
    if done or oneshot:
2250
      break
2251

    
2252
    time.sleep(min(60, max_time))
2253

    
2254
  if done:
2255
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2256
  return not cumul_degraded
2257

    
2258

    
2259
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2260
  """Check that mirrors are not degraded.
2261

2262
  The ldisk parameter, if True, will change the test from the
2263
  is_degraded attribute (which represents overall non-ok status for
2264
  the device(s)) to the ldisk (representing the local storage status).
2265

2266
  """
2267
  lu.cfg.SetDiskID(dev, node)
2268

    
2269
  result = True
2270

    
2271
  if on_primary or dev.AssembleOnSecondary():
2272
    rstats = lu.rpc.call_blockdev_find(node, dev)
2273
    msg = rstats.fail_msg
2274
    if msg:
2275
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2276
      result = False
2277
    elif not rstats.payload:
2278
      lu.LogWarning("Can't find disk on node %s", node)
2279
      result = False
2280
    else:
2281
      if ldisk:
2282
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2283
      else:
2284
        result = result and not rstats.payload.is_degraded
2285

    
2286
  if dev.children:
2287
    for child in dev.children:
2288
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2289

    
2290
  return result
2291

    
2292

    
2293
class LUDiagnoseOS(NoHooksLU):
2294
  """Logical unit for OS diagnose/query.
2295

2296
  """
2297
  _OP_REQP = ["output_fields", "names"]
2298
  REQ_BGL = False
2299
  _FIELDS_STATIC = utils.FieldSet()
2300
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants")
2301
  # Fields that need calculation of global os validity
2302
  _FIELDS_NEEDVALID = frozenset(["valid", "variants"])
2303

    
2304
  def ExpandNames(self):
2305
    if self.op.names:
2306
      raise errors.OpPrereqError("Selective OS query not supported",
2307
                                 errors.ECODE_INVAL)
2308

    
2309
    _CheckOutputFields(static=self._FIELDS_STATIC,
2310
                       dynamic=self._FIELDS_DYNAMIC,
2311
                       selected=self.op.output_fields)
2312

    
2313
    # Lock all nodes, in shared mode
2314
    # Temporary removal of locks, should be reverted later
2315
    # TODO: reintroduce locks when they are lighter-weight
2316
    self.needed_locks = {}
2317
    #self.share_locks[locking.LEVEL_NODE] = 1
2318
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2319

    
2320
  def CheckPrereq(self):
2321
    """Check prerequisites.
2322

2323
    """
2324

    
2325
  @staticmethod
2326
  def _DiagnoseByOS(rlist):
2327
    """Remaps a per-node return list into an a per-os per-node dictionary
2328

2329
    @param rlist: a map with node names as keys and OS objects as values
2330

2331
    @rtype: dict
2332
    @return: a dictionary with osnames as keys and as value another map, with
2333
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2334

2335
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2336
                                     (/srv/..., False, "invalid api")],
2337
                           "node2": [(/srv/..., True, "")]}
2338
          }
2339

2340
    """
2341
    all_os = {}
2342
    # we build here the list of nodes that didn't fail the RPC (at RPC
2343
    # level), so that nodes with a non-responding node daemon don't
2344
    # make all OSes invalid
2345
    good_nodes = [node_name for node_name in rlist
2346
                  if not rlist[node_name].fail_msg]
2347
    for node_name, nr in rlist.items():
2348
      if nr.fail_msg or not nr.payload:
2349
        continue
2350
      for name, path, status, diagnose, variants in nr.payload:
2351
        if name not in all_os:
2352
          # build a list of nodes for this os containing empty lists
2353
          # for each node in node_list
2354
          all_os[name] = {}
2355
          for nname in good_nodes:
2356
            all_os[name][nname] = []
2357
        all_os[name][node_name].append((path, status, diagnose, variants))
2358
    return all_os
2359

    
2360
  def Exec(self, feedback_fn):
2361
    """Compute the list of OSes.
2362

2363
    """
2364
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2365
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2366
    pol = self._DiagnoseByOS(node_data)
2367
    output = []
2368
    calc_valid = self._FIELDS_NEEDVALID.intersection(self.op.output_fields)
2369
    calc_variants = "variants" in self.op.output_fields
2370

    
2371
    for os_name, os_data in pol.items():
2372
      row = []
2373
      if calc_valid:
2374
        valid = True
2375
        variants = None
2376
        for osl in os_data.values():
2377
          valid = valid and osl and osl[0][1]
2378
          if not valid:
2379
            variants = None
2380
            break
2381
          if calc_variants:
2382
            node_variants = osl[0][3]
2383
            if variants is None:
2384
              variants = node_variants
2385
            else:
2386
              variants = [v for v in variants if v in node_variants]
2387

    
2388
      for field in self.op.output_fields:
2389
        if field == "name":
2390
          val = os_name
2391
        elif field == "valid":
2392
          val = valid
2393
        elif field == "node_status":
2394
          # this is just a copy of the dict
2395
          val = {}
2396
          for node_name, nos_list in os_data.items():
2397
            val[node_name] = nos_list
2398
        elif field == "variants":
2399
          val =  variants
2400
        else:
2401
          raise errors.ParameterError(field)
2402
        row.append(val)
2403
      output.append(row)
2404

    
2405
    return output
2406

    
2407

    
2408
class LURemoveNode(LogicalUnit):
2409
  """Logical unit for removing a node.
2410

2411
  """
2412
  HPATH = "node-remove"
2413
  HTYPE = constants.HTYPE_NODE
2414
  _OP_REQP = ["node_name"]
2415

    
2416
  def BuildHooksEnv(self):
2417
    """Build hooks env.
2418

2419
    This doesn't run on the target node in the pre phase as a failed
2420
    node would then be impossible to remove.
2421

2422
    """
2423
    env = {
2424
      "OP_TARGET": self.op.node_name,
2425
      "NODE_NAME": self.op.node_name,
2426
      }
2427
    all_nodes = self.cfg.GetNodeList()
2428
    if self.op.node_name in all_nodes:
2429
      all_nodes.remove(self.op.node_name)
2430
    return env, all_nodes, all_nodes
2431

    
2432
  def CheckPrereq(self):
2433
    """Check prerequisites.
2434

2435
    This checks:
2436
     - the node exists in the configuration
2437
     - it does not have primary or secondary instances
2438
     - it's not the master
2439

2440
    Any errors are signaled by raising errors.OpPrereqError.
2441

2442
    """
2443
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2444
    if node is None:
2445
      raise errors.OpPrereqError("Node '%s' is unknown." % self.op.node_name,
2446
                                 errors.ECODE_NOENT)
2447

    
2448
    instance_list = self.cfg.GetInstanceList()
2449

    
2450
    masternode = self.cfg.GetMasterNode()
2451
    if node.name == masternode:
2452
      raise errors.OpPrereqError("Node is the master node,"
2453
                                 " you need to failover first.",
2454
                                 errors.ECODE_INVAL)
2455

    
2456
    for instance_name in instance_list:
2457
      instance = self.cfg.GetInstanceInfo(instance_name)
2458
      if node.name in instance.all_nodes:
2459
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2460
                                   " please remove first." % instance_name,
2461
                                   errors.ECODE_INVAL)
2462
    self.op.node_name = node.name
2463
    self.node = node
2464

    
2465
  def Exec(self, feedback_fn):
2466
    """Removes the node from the cluster.
2467

2468
    """
2469
    node = self.node
2470
    logging.info("Stopping the node daemon and removing configs from node %s",
2471
                 node.name)
2472

    
2473
    modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2474

    
2475
    # Promote nodes to master candidate as needed
2476
    _AdjustCandidatePool(self, exceptions=[node.name])
2477
    self.context.RemoveNode(node.name)
2478

    
2479
    # Run post hooks on the node before it's removed
2480
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2481
    try:
2482
      hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2483
    except:
2484
      # pylint: disable-msg=W0702
2485
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2486

    
2487
    result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2488
    msg = result.fail_msg
2489
    if msg:
2490
      self.LogWarning("Errors encountered on the remote node while leaving"
2491
                      " the cluster: %s", msg)
2492

    
2493

    
2494
class LUQueryNodes(NoHooksLU):
2495
  """Logical unit for querying nodes.
2496

2497
  """
2498
  # pylint: disable-msg=W0142
2499
  _OP_REQP = ["output_fields", "names", "use_locking"]
2500
  REQ_BGL = False
2501

    
2502
  _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2503
                    "master_candidate", "offline", "drained"]
2504

    
2505
  _FIELDS_DYNAMIC = utils.FieldSet(
2506
    "dtotal", "dfree",
2507
    "mtotal", "mnode", "mfree",
2508
    "bootid",
2509
    "ctotal", "cnodes", "csockets",
2510
    )
2511

    
2512
  _FIELDS_STATIC = utils.FieldSet(*[
2513
    "pinst_cnt", "sinst_cnt",
2514
    "pinst_list", "sinst_list",
2515
    "pip", "sip", "tags",
2516
    "master",
2517
    "role"] + _SIMPLE_FIELDS
2518
    )
2519

    
2520
  def ExpandNames(self):
2521
    _CheckOutputFields(static=self._FIELDS_STATIC,
2522
                       dynamic=self._FIELDS_DYNAMIC,
2523
                       selected=self.op.output_fields)
2524

    
2525
    self.needed_locks = {}
2526
    self.share_locks[locking.LEVEL_NODE] = 1
2527

    
2528
    if self.op.names:
2529
      self.wanted = _GetWantedNodes(self, self.op.names)
2530
    else:
2531
      self.wanted = locking.ALL_SET
2532

    
2533
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2534
    self.do_locking = self.do_node_query and self.op.use_locking
2535
    if self.do_locking:
2536
      # if we don't request only static fields, we need to lock the nodes
2537
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2538

    
2539
  def CheckPrereq(self):
2540
    """Check prerequisites.
2541

2542
    """
2543
    # The validation of the node list is done in the _GetWantedNodes,
2544
    # if non empty, and if empty, there's no validation to do
2545
    pass
2546

    
2547
  def Exec(self, feedback_fn):
2548
    """Computes the list of nodes and their attributes.
2549

2550
    """
2551
    all_info = self.cfg.GetAllNodesInfo()
2552
    if self.do_locking:
2553
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2554
    elif self.wanted != locking.ALL_SET:
2555
      nodenames = self.wanted
2556
      missing = set(nodenames).difference(all_info.keys())
2557
      if missing:
2558
        raise errors.OpExecError(
2559
          "Some nodes were removed before retrieving their data: %s" % missing)
2560
    else:
2561
      nodenames = all_info.keys()
2562

    
2563
    nodenames = utils.NiceSort(nodenames)
2564
    nodelist = [all_info[name] for name in nodenames]
2565

    
2566
    # begin data gathering
2567

    
2568
    if self.do_node_query:
2569
      live_data = {}
2570
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2571
                                          self.cfg.GetHypervisorType())
2572
      for name in nodenames:
2573
        nodeinfo = node_data[name]
2574
        if not nodeinfo.fail_msg and nodeinfo.payload:
2575
          nodeinfo = nodeinfo.payload
2576
          fn = utils.TryConvert
2577
          live_data[name] = {
2578
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2579
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2580
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2581
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2582
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2583
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2584
            "bootid": nodeinfo.get('bootid', None),
2585
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2586
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2587
            }
2588
        else:
2589
          live_data[name] = {}
2590
    else:
2591
      live_data = dict.fromkeys(nodenames, {})
2592

    
2593
    node_to_primary = dict([(name, set()) for name in nodenames])
2594
    node_to_secondary = dict([(name, set()) for name in nodenames])
2595

    
2596
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2597
                             "sinst_cnt", "sinst_list"))
2598
    if inst_fields & frozenset(self.op.output_fields):
2599
      inst_data = self.cfg.GetAllInstancesInfo()
2600

    
2601
      for inst in inst_data.values():
2602
        if inst.primary_node in node_to_primary:
2603
          node_to_primary[inst.primary_node].add(inst.name)
2604
        for secnode in inst.secondary_nodes:
2605
          if secnode in node_to_secondary:
2606
            node_to_secondary[secnode].add(inst.name)
2607

    
2608
    master_node = self.cfg.GetMasterNode()
2609

    
2610
    # end data gathering
2611

    
2612
    output = []
2613
    for node in nodelist:
2614
      node_output = []
2615
      for field in self.op.output_fields:
2616
        if field in self._SIMPLE_FIELDS:
2617
          val = getattr(node, field)
2618
        elif field == "pinst_list":
2619
          val = list(node_to_primary[node.name])
2620
        elif field == "sinst_list":
2621
          val = list(node_to_secondary[node.name])
2622
        elif field == "pinst_cnt":
2623
          val = len(node_to_primary[node.name])
2624
        elif field == "sinst_cnt":
2625
          val = len(node_to_secondary[node.name])
2626
        elif field == "pip":
2627
          val = node.primary_ip
2628
        elif field == "sip":
2629
          val = node.secondary_ip
2630
        elif field == "tags":
2631
          val = list(node.GetTags())
2632
        elif field == "master":
2633
          val = node.name == master_node
2634
        elif self._FIELDS_DYNAMIC.Matches(field):
2635
          val = live_data[node.name].get(field, None)
2636
        elif field == "role":
2637
          if node.name == master_node:
2638
            val = "M"
2639
          elif node.master_candidate:
2640
            val = "C"
2641
          elif node.drained:
2642
            val = "D"
2643
          elif node.offline:
2644
            val = "O"
2645
          else:
2646
            val = "R"
2647
        else:
2648
          raise errors.ParameterError(field)
2649
        node_output.append(val)
2650
      output.append(node_output)
2651

    
2652
    return output
2653

    
2654

    
2655
class LUQueryNodeVolumes(NoHooksLU):
2656
  """Logical unit for getting volumes on node(s).
2657

2658
  """
2659
  _OP_REQP = ["nodes", "output_fields"]
2660
  REQ_BGL = False
2661
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2662
  _FIELDS_STATIC = utils.FieldSet("node")
2663

    
2664
  def ExpandNames(self):
2665
    _CheckOutputFields(static=self._FIELDS_STATIC,
2666
                       dynamic=self._FIELDS_DYNAMIC,
2667
                       selected=self.op.output_fields)
2668

    
2669
    self.needed_locks = {}
2670
    self.share_locks[locking.LEVEL_NODE] = 1
2671
    if not self.op.nodes:
2672
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2673
    else:
2674
      self.needed_locks[locking.LEVEL_NODE] = \
2675
        _GetWantedNodes(self, self.op.nodes)
2676

    
2677
  def CheckPrereq(self):
2678
    """Check prerequisites.
2679

2680
    This checks that the fields required are valid output fields.
2681

2682
    """
2683
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2684

    
2685
  def Exec(self, feedback_fn):
2686
    """Computes the list of nodes and their attributes.
2687

2688
    """
2689
    nodenames = self.nodes
2690
    volumes = self.rpc.call_node_volumes(nodenames)
2691

    
2692
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2693
             in self.cfg.GetInstanceList()]
2694

    
2695
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2696

    
2697
    output = []
2698
    for node in nodenames:
2699
      nresult = volumes[node]
2700
      if nresult.offline:
2701
        continue
2702
      msg = nresult.fail_msg
2703
      if msg:
2704
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2705
        continue
2706

    
2707
      node_vols = nresult.payload[:]
2708
      node_vols.sort(key=lambda vol: vol['dev'])
2709

    
2710
      for vol in node_vols:
2711
        node_output = []
2712
        for field in self.op.output_fields:
2713
          if field == "node":
2714
            val = node
2715
          elif field == "phys":
2716
            val = vol['dev']
2717
          elif field == "vg":
2718
            val = vol['vg']
2719
          elif field == "name":
2720
            val = vol['name']
2721
          elif field == "size":
2722
            val = int(float(vol['size']))
2723
          elif field == "instance":
2724
            for inst in ilist:
2725
              if node not in lv_by_node[inst]:
2726
                continue
2727
              if vol['name'] in lv_by_node[inst][node]:
2728
                val = inst.name
2729
                break
2730
            else:
2731
              val = '-'
2732
          else:
2733
            raise errors.ParameterError(field)
2734
          node_output.append(str(val))
2735

    
2736
        output.append(node_output)
2737

    
2738
    return output
2739

    
2740

    
2741
class LUQueryNodeStorage(NoHooksLU):
2742
  """Logical unit for getting information on storage units on node(s).
2743

2744
  """
2745
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2746
  REQ_BGL = False
2747
  _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
2748

    
2749
  def ExpandNames(self):
2750
    storage_type = self.op.storage_type
2751

    
2752
    if storage_type not in constants.VALID_STORAGE_TYPES:
2753
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2754
                                 errors.ECODE_INVAL)
2755

    
2756
    _CheckOutputFields(static=self._FIELDS_STATIC,
2757
                       dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
2758
                       selected=self.op.output_fields)
2759

    
2760
    self.needed_locks = {}
2761
    self.share_locks[locking.LEVEL_NODE] = 1
2762

    
2763
    if self.op.nodes:
2764
      self.needed_locks[locking.LEVEL_NODE] = \
2765
        _GetWantedNodes(self, self.op.nodes)
2766
    else:
2767
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2768

    
2769
  def CheckPrereq(self):
2770
    """Check prerequisites.
2771

2772
    This checks that the fields required are valid output fields.
2773

2774
    """
2775
    self.op.name = getattr(self.op, "name", None)
2776

    
2777
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2778

    
2779
  def Exec(self, feedback_fn):
2780
    """Computes the list of nodes and their attributes.
2781

2782
    """
2783
    # Always get name to sort by
2784
    if constants.SF_NAME in self.op.output_fields:
2785
      fields = self.op.output_fields[:]
2786
    else:
2787
      fields = [constants.SF_NAME] + self.op.output_fields
2788

    
2789
    # Never ask for node or type as it's only known to the LU
2790
    for extra in [constants.SF_NODE, constants.SF_TYPE]:
2791
      while extra in fields:
2792
        fields.remove(extra)
2793

    
2794
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2795
    name_idx = field_idx[constants.SF_NAME]
2796

    
2797
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2798
    data = self.rpc.call_storage_list(self.nodes,
2799
                                      self.op.storage_type, st_args,
2800
                                      self.op.name, fields)
2801

    
2802
    result = []
2803

    
2804
    for node in utils.NiceSort(self.nodes):
2805
      nresult = data[node]
2806
      if nresult.offline:
2807
        continue
2808

    
2809
      msg = nresult.fail_msg
2810
      if msg:
2811
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2812
        continue
2813

    
2814
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2815

    
2816
      for name in utils.NiceSort(rows.keys()):
2817
        row = rows[name]
2818

    
2819
        out = []
2820

    
2821
        for field in self.op.output_fields:
2822
          if field == constants.SF_NODE:
2823
            val = node
2824
          elif field == constants.SF_TYPE:
2825
            val = self.op.storage_type
2826
          elif field in field_idx:
2827
            val = row[field_idx[field]]
2828
          else:
2829
            raise errors.ParameterError(field)
2830

    
2831
          out.append(val)
2832

    
2833
        result.append(out)
2834

    
2835
    return result
2836

    
2837

    
2838
class LUModifyNodeStorage(NoHooksLU):
2839
  """Logical unit for modifying a storage volume on a node.
2840

2841
  """
2842
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2843
  REQ_BGL = False
2844

    
2845
  def CheckArguments(self):
2846
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2847
    if node_name is None:
2848
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
2849
                                 errors.ECODE_NOENT)
2850

    
2851
    self.op.node_name = node_name
2852

    
2853
    storage_type = self.op.storage_type
2854
    if storage_type not in constants.VALID_STORAGE_TYPES:
2855
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
2856
                                 errors.ECODE_INVAL)
2857

    
2858
  def ExpandNames(self):
2859
    self.needed_locks = {
2860
      locking.LEVEL_NODE: self.op.node_name,
2861
      }
2862

    
2863
  def CheckPrereq(self):
2864
    """Check prerequisites.
2865

2866
    """
2867
    storage_type = self.op.storage_type
2868

    
2869
    try:
2870
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2871
    except KeyError:
2872
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2873
                                 " modified" % storage_type,
2874
                                 errors.ECODE_INVAL)
2875

    
2876
    diff = set(self.op.changes.keys()) - modifiable
2877
    if diff:
2878
      raise errors.OpPrereqError("The following fields can not be modified for"
2879
                                 " storage units of type '%s': %r" %
2880
                                 (storage_type, list(diff)),
2881
                                 errors.ECODE_INVAL)
2882

    
2883
  def Exec(self, feedback_fn):
2884
    """Computes the list of nodes and their attributes.
2885

2886
    """
2887
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2888
    result = self.rpc.call_storage_modify(self.op.node_name,
2889
                                          self.op.storage_type, st_args,
2890
                                          self.op.name, self.op.changes)
2891
    result.Raise("Failed to modify storage unit '%s' on %s" %
2892
                 (self.op.name, self.op.node_name))
2893

    
2894

    
2895
class LUAddNode(LogicalUnit):
2896
  """Logical unit for adding node to the cluster.
2897

2898
  """
2899
  HPATH = "node-add"
2900
  HTYPE = constants.HTYPE_NODE
2901
  _OP_REQP = ["node_name"]
2902

    
2903
  def BuildHooksEnv(self):
2904
    """Build hooks env.
2905

2906
    This will run on all nodes before, and on all nodes + the new node after.
2907

2908
    """
2909
    env = {
2910
      "OP_TARGET": self.op.node_name,
2911
      "NODE_NAME": self.op.node_name,
2912
      "NODE_PIP": self.op.primary_ip,
2913
      "NODE_SIP": self.op.secondary_ip,
2914
      }
2915
    nodes_0 = self.cfg.GetNodeList()
2916
    nodes_1 = nodes_0 + [self.op.node_name, ]
2917
    return env, nodes_0, nodes_1
2918

    
2919
  def CheckPrereq(self):
2920
    """Check prerequisites.
2921

2922
    This checks:
2923
     - the new node is not already in the config
2924
     - it is resolvable
2925
     - its parameters (single/dual homed) matches the cluster
2926

2927
    Any errors are signaled by raising errors.OpPrereqError.
2928

2929
    """
2930
    node_name = self.op.node_name
2931
    cfg = self.cfg
2932

    
2933
    dns_data = utils.GetHostInfo(node_name)
2934

    
2935
    node = dns_data.name
2936
    primary_ip = self.op.primary_ip = dns_data.ip
2937
    secondary_ip = getattr(self.op, "secondary_ip", None)
2938
    if secondary_ip is None:
2939
      secondary_ip = primary_ip
2940
    if not utils.IsValidIP(secondary_ip):
2941
      raise errors.OpPrereqError("Invalid secondary IP given",
2942
                                 errors.ECODE_INVAL)
2943
    self.op.secondary_ip = secondary_ip
2944

    
2945
    node_list = cfg.GetNodeList()
2946
    if not self.op.readd and node in node_list:
2947
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2948
                                 node, errors.ECODE_EXISTS)
2949
    elif self.op.readd and node not in node_list:
2950
      raise errors.OpPrereqError("Node %s is not in the configuration" % node,
2951
                                 errors.ECODE_NOENT)
2952

    
2953
    for existing_node_name in node_list:
2954
      existing_node = cfg.GetNodeInfo(existing_node_name)
2955

    
2956
      if self.op.readd and node == existing_node_name:
2957
        if (existing_node.primary_ip != primary_ip or
2958
            existing_node.secondary_ip != secondary_ip):
2959
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2960
                                     " address configuration as before",
2961
                                     errors.ECODE_INVAL)
2962
        continue
2963

    
2964
      if (existing_node.primary_ip == primary_ip or
2965
          existing_node.secondary_ip == primary_ip or
2966
          existing_node.primary_ip == secondary_ip or
2967
          existing_node.secondary_ip == secondary_ip):
2968
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2969
                                   " existing node %s" % existing_node.name,
2970
                                   errors.ECODE_NOTUNIQUE)
2971

    
2972
    # check that the type of the node (single versus dual homed) is the
2973
    # same as for the master
2974
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2975
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2976
    newbie_singlehomed = secondary_ip == primary_ip
2977
    if master_singlehomed != newbie_singlehomed:
2978
      if master_singlehomed:
2979
        raise errors.OpPrereqError("The master has no private ip but the"
2980
                                   " new node has one",
2981
                                   errors.ECODE_INVAL)
2982
      else:
2983
        raise errors.OpPrereqError("The master has a private ip but the"
2984
                                   " new node doesn't have one",
2985
                                   errors.ECODE_INVAL)
2986

    
2987
    # checks reachability
2988
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2989
      raise errors.OpPrereqError("Node not reachable by ping",
2990
                                 errors.ECODE_ENVIRON)
2991

    
2992
    if not newbie_singlehomed:
2993
      # check reachability from my secondary ip to newbie's secondary ip
2994
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2995
                           source=myself.secondary_ip):
2996
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2997
                                   " based ping to noded port",
2998
                                   errors.ECODE_ENVIRON)
2999

    
3000
    if self.op.readd:
3001
      exceptions = [node]
3002
    else:
3003
      exceptions = []
3004

    
3005
    self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3006

    
3007
    if self.op.readd:
3008
      self.new_node = self.cfg.GetNodeInfo(node)
3009
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
3010
    else:
3011
      self.new_node = objects.Node(name=node,
3012
                                   primary_ip=primary_ip,
3013
                                   secondary_ip=secondary_ip,
3014
                                   master_candidate=self.master_candidate,
3015
                                   offline=False, drained=False)
3016

    
3017
  def Exec(self, feedback_fn):
3018
    """Adds the new node to the cluster.
3019

3020
    """
3021
    new_node = self.new_node
3022
    node = new_node.name
3023

    
3024
    # for re-adds, reset the offline/drained/master-candidate flags;
3025
    # we need to reset here, otherwise offline would prevent RPC calls
3026
    # later in the procedure; this also means that if the re-add
3027
    # fails, we are left with a non-offlined, broken node
3028
    if self.op.readd:
3029
      new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3030
      self.LogInfo("Readding a node, the offline/drained flags were reset")
3031
      # if we demote the node, we do cleanup later in the procedure
3032
      new_node.master_candidate = self.master_candidate
3033

    
3034
    # notify the user about any possible mc promotion
3035
    if new_node.master_candidate:
3036
      self.LogInfo("Node will be a master candidate")
3037

    
3038
    # check connectivity
3039
    result = self.rpc.call_version([node])[node]
3040
    result.Raise("Can't get version information from node %s" % node)
3041
    if constants.PROTOCOL_VERSION == result.payload:
3042
      logging.info("Communication to node %s fine, sw version %s match",
3043
                   node, result.payload)
3044
    else:
3045
      raise errors.OpExecError("Version mismatch master version %s,"
3046
                               " node version %s" %
3047
                               (constants.PROTOCOL_VERSION, result.payload))
3048

    
3049
    # setup ssh on node
3050
    if self.cfg.GetClusterInfo().modify_ssh_setup:
3051
      logging.info("Copy ssh key to node %s", node)
3052
      priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3053
      keyarray = []
3054
      keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3055
                  constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3056
                  priv_key, pub_key]
3057

    
3058
      for i in keyfiles:
3059
        keyarray.append(utils.ReadFile(i))
3060

    
3061
      result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3062
                                      keyarray[2], keyarray[3], keyarray[4],
3063
                                      keyarray[5])
3064
      result.Raise("Cannot transfer ssh keys to the new node")
3065

    
3066
    # Add node to our /etc/hosts, and add key to known_hosts
3067
    if self.cfg.GetClusterInfo().modify_etc_hosts:
3068
      utils.AddHostToEtcHosts(new_node.name)
3069

    
3070
    if new_node.secondary_ip != new_node.primary_ip:
3071
      result = self.rpc.call_node_has_ip_address(new_node.name,
3072
                                                 new_node.secondary_ip)
3073
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3074
                   prereq=True, ecode=errors.ECODE_ENVIRON)
3075
      if not result.payload:
3076
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3077
                                 " you gave (%s). Please fix and re-run this"
3078
                                 " command." % new_node.secondary_ip)
3079

    
3080
    node_verify_list = [self.cfg.GetMasterNode()]
3081
    node_verify_param = {
3082
      constants.NV_NODELIST: [node],
3083
      # TODO: do a node-net-test as well?
3084
    }
3085

    
3086
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3087
                                       self.cfg.GetClusterName())
3088
    for verifier in node_verify_list:
3089
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
3090
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
3091
      if nl_payload:
3092
        for failed in nl_payload:
3093
          feedback_fn("ssh/hostname verification failed"
3094
                      " (checking from %s): %s" %
3095
                      (verifier, nl_payload[failed]))
3096
        raise errors.OpExecError("ssh/hostname verification failed.")
3097

    
3098
    if self.op.readd:
3099
      _RedistributeAncillaryFiles(self)
3100
      self.context.ReaddNode(new_node)
3101
      # make sure we redistribute the config
3102
      self.cfg.Update(new_node, feedback_fn)
3103
      # and make sure the new node will not have old files around
3104
      if not new_node.master_candidate:
3105
        result = self.rpc.call_node_demote_from_mc(new_node.name)
3106
        msg = result.fail_msg
3107
        if msg:
3108
          self.LogWarning("Node failed to demote itself from master"
3109
                          " candidate status: %s" % msg)
3110
    else:
3111
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
3112
      self.context.AddNode(new_node, self.proc.GetECId())
3113

    
3114

    
3115
class LUSetNodeParams(LogicalUnit):
3116
  """Modifies the parameters of a node.
3117

3118
  """
3119
  HPATH = "node-modify"
3120
  HTYPE = constants.HTYPE_NODE
3121
  _OP_REQP = ["node_name"]
3122
  REQ_BGL = False
3123

    
3124
  def CheckArguments(self):
3125
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3126
    if node_name is None:
3127
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3128
                                 errors.ECODE_INVAL)
3129
    self.op.node_name = node_name
3130
    _CheckBooleanOpField(self.op, 'master_candidate')
3131
    _CheckBooleanOpField(self.op, 'offline')
3132
    _CheckBooleanOpField(self.op, 'drained')
3133
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3134
    if all_mods.count(None) == 3:
3135
      raise errors.OpPrereqError("Please pass at least one modification",
3136
                                 errors.ECODE_INVAL)
3137
    if all_mods.count(True) > 1:
3138
      raise errors.OpPrereqError("Can't set the node into more than one"
3139
                                 " state at the same time",
3140
                                 errors.ECODE_INVAL)
3141

    
3142
  def ExpandNames(self):
3143
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3144

    
3145
  def BuildHooksEnv(self):
3146
    """Build hooks env.
3147

3148
    This runs on the master node.
3149

3150
    """
3151
    env = {
3152
      "OP_TARGET": self.op.node_name,
3153
      "MASTER_CANDIDATE": str(self.op.master_candidate),
3154
      "OFFLINE": str(self.op.offline),
3155
      "DRAINED": str(self.op.drained),
3156
      }
3157
    nl = [self.cfg.GetMasterNode(),
3158
          self.op.node_name]
3159
    return env, nl, nl
3160

    
3161
  def CheckPrereq(self):
3162
    """Check prerequisites.
3163

3164
    This only checks the instance list against the existing names.
3165

3166
    """
3167
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3168

    
3169
    if (self.op.master_candidate is not None or
3170
        self.op.drained is not None or
3171
        self.op.offline is not None):
3172
      # we can't change the master's node flags
3173
      if self.op.node_name == self.cfg.GetMasterNode():
3174
        raise errors.OpPrereqError("The master role can be changed"
3175
                                   " only via masterfailover",
3176
                                   errors.ECODE_INVAL)
3177

    
3178
    # Boolean value that tells us whether we're offlining or draining the node
3179
    offline_or_drain = self.op.offline == True or self.op.drained == True
3180
    deoffline_or_drain = self.op.offline == False or self.op.drained == False
3181

    
3182
    if (node.master_candidate and
3183
        (self.op.master_candidate == False or offline_or_drain)):
3184
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
3185
      mc_now, mc_should, mc_max = self.cfg.GetMasterCandidateStats()
3186
      if mc_now <= cp_size:
3187
        msg = ("Not enough master candidates (desired"
3188
               " %d, new value will be %d)" % (cp_size, mc_now-1))
3189
        # Only allow forcing the operation if it's an offline/drain operation,
3190
        # and we could not possibly promote more nodes.
3191
        # FIXME: this can still lead to issues if in any way another node which
3192
        # could be promoted appears in the meantime.
3193
        if self.op.force and offline_or_drain and mc_should == mc_max:
3194
          self.LogWarning(msg)
3195
        else:
3196
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
3197

    
3198
    if (self.op.master_candidate == True and
3199
        ((node.offline and not self.op.offline == False) or
3200
         (node.drained and not self.op.drained == False))):
3201
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3202
                                 " to master_candidate" % node.name,
3203
                                 errors.ECODE_INVAL)
3204

    
3205
    # If we're being deofflined/drained, we'll MC ourself if needed
3206
    if (deoffline_or_drain and not offline_or_drain and not
3207
        self.op.master_candidate == True and not node.master_candidate):
3208
      self.op.master_candidate = _DecideSelfPromotion(self)
3209
      if self.op.master_candidate:
3210
        self.LogInfo("Autopromoting node to master candidate")
3211

    
3212
    return
3213

    
3214
  def Exec(self, feedback_fn):
3215
    """Modifies a node.
3216

3217
    """
3218
    node = self.node
3219

    
3220
    result = []
3221
    changed_mc = False
3222

    
3223
    if self.op.offline is not None:
3224
      node.offline = self.op.offline
3225
      result.append(("offline", str(self.op.offline)))
3226
      if self.op.offline == True:
3227
        if node.master_candidate:
3228
          node.master_candidate = False
3229
          changed_mc = True
3230
          result.append(("master_candidate", "auto-demotion due to offline"))
3231
        if node.drained:
3232
          node.drained = False
3233
          result.append(("drained", "clear drained status due to offline"))
3234

    
3235
    if self.op.master_candidate is not None:
3236
      node.master_candidate = self.op.master_candidate
3237
      changed_mc = True
3238
      result.append(("master_candidate", str(self.op.master_candidate)))
3239
      if self.op.master_candidate == False:
3240
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3241
        msg = rrc.fail_msg
3242
        if msg:
3243
          self.LogWarning("Node failed to demote itself: %s" % msg)
3244

    
3245
    if self.op.drained is not None:
3246
      node.drained = self.op.drained
3247
      result.append(("drained", str(self.op.drained)))
3248
      if self.op.drained == True:
3249
        if node.master_candidate:
3250
          node.master_candidate = False
3251
          changed_mc = True
3252
          result.append(("master_candidate", "auto-demotion due to drain"))
3253
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3254
          msg = rrc.fail_msg
3255
          if msg:
3256
            self.LogWarning("Node failed to demote itself: %s" % msg)
3257
        if node.offline:
3258
          node.offline = False
3259
          result.append(("offline", "clear offline status due to drain"))
3260

    
3261
    # this will trigger configuration file update, if needed
3262
    self.cfg.Update(node, feedback_fn)
3263
    # this will trigger job queue propagation or cleanup
3264
    if changed_mc:
3265
      self.context.ReaddNode(node)
3266

    
3267
    return result
3268

    
3269

    
3270
class LUPowercycleNode(NoHooksLU):
3271
  """Powercycles a node.
3272

3273
  """
3274
  _OP_REQP = ["node_name", "force"]
3275
  REQ_BGL = False
3276

    
3277
  def CheckArguments(self):
3278
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3279
    if node_name is None:
3280
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
3281
                                 errors.ECODE_NOENT)
3282
    self.op.node_name = node_name
3283
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3284
      raise errors.OpPrereqError("The node is the master and the force"
3285
                                 " parameter was not set",
3286
                                 errors.ECODE_INVAL)
3287

    
3288
  def ExpandNames(self):
3289
    """Locking for PowercycleNode.
3290

3291
    This is a last-resort option and shouldn't block on other
3292
    jobs. Therefore, we grab no locks.
3293

3294
    """
3295
    self.needed_locks = {}
3296

    
3297
  def CheckPrereq(self):
3298
    """Check prerequisites.
3299

3300
    This LU has no prereqs.
3301

3302
    """
3303
    pass
3304

    
3305
  def Exec(self, feedback_fn):
3306
    """Reboots a node.
3307

3308
    """
3309
    result = self.rpc.call_node_powercycle(self.op.node_name,
3310
                                           self.cfg.GetHypervisorType())
3311
    result.Raise("Failed to schedule the reboot")
3312
    return result.payload
3313

    
3314

    
3315
class LUQueryClusterInfo(NoHooksLU):
3316
  """Query cluster configuration.
3317

3318
  """
3319
  _OP_REQP = []
3320
  REQ_BGL = False
3321

    
3322
  def ExpandNames(self):
3323
    self.needed_locks = {}
3324

    
3325
  def CheckPrereq(self):
3326
    """No prerequsites needed for this LU.
3327

3328
    """
3329
    pass
3330

    
3331
  def Exec(self, feedback_fn):
3332
    """Return cluster config.
3333

3334
    """
3335
    cluster = self.cfg.GetClusterInfo()
3336
    result = {
3337
      "software_version": constants.RELEASE_VERSION,
3338
      "protocol_version": constants.PROTOCOL_VERSION,
3339
      "config_version": constants.CONFIG_VERSION,
3340
      "os_api_version": max(constants.OS_API_VERSIONS),
3341
      "export_version": constants.EXPORT_VERSION,
3342
      "architecture": (platform.architecture()[0], platform.machine()),
3343
      "name": cluster.cluster_name,
3344
      "master": cluster.master_node,
3345
      "default_hypervisor": cluster.enabled_hypervisors[0],
3346
      "enabled_hypervisors": cluster.enabled_hypervisors,
3347
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3348
                        for hypervisor_name in cluster.enabled_hypervisors]),
3349
      "beparams": cluster.beparams,
3350
      "nicparams": cluster.nicparams,
3351
      "candidate_pool_size": cluster.candidate_pool_size,
3352
      "master_netdev": cluster.master_netdev,
3353
      "volume_group_name": cluster.volume_group_name,
3354
      "file_storage_dir": cluster.file_storage_dir,
3355
      "ctime": cluster.ctime,
3356
      "mtime": cluster.mtime,
3357
      "uuid": cluster.uuid,
3358
      "tags": list(cluster.GetTags()),
3359
      }
3360

    
3361
    return result
3362

    
3363

    
3364
class LUQueryConfigValues(NoHooksLU):
3365
  """Return configuration values.
3366

3367
  """
3368
  _OP_REQP = []
3369
  REQ_BGL = False
3370
  _FIELDS_DYNAMIC = utils.FieldSet()
3371
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3372
                                  "watcher_pause")
3373

    
3374
  def ExpandNames(self):
3375
    self.needed_locks = {}
3376

    
3377
    _CheckOutputFields(static=self._FIELDS_STATIC,
3378
                       dynamic=self._FIELDS_DYNAMIC,
3379
                       selected=self.op.output_fields)
3380

    
3381
  def CheckPrereq(self):
3382
    """No prerequisites.
3383

3384
    """
3385
    pass
3386

    
3387
  def Exec(self, feedback_fn):
3388
    """Dump a representation of the cluster config to the standard output.
3389

3390
    """
3391
    values = []
3392
    for field in self.op.output_fields:
3393
      if field == "cluster_name":
3394
        entry = self.cfg.GetClusterName()
3395
      elif field == "master_node":
3396
        entry = self.cfg.GetMasterNode()
3397
      elif field == "drain_flag":
3398
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3399
      elif field == "watcher_pause":
3400
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3401
      else:
3402
        raise errors.ParameterError(field)
3403
      values.append(entry)
3404
    return values
3405

    
3406

    
3407
class LUActivateInstanceDisks(NoHooksLU):
3408
  """Bring up an instance's disks.
3409

3410
  """
3411
  _OP_REQP = ["instance_name"]
3412
  REQ_BGL = False
3413

    
3414
  def ExpandNames(self):
3415
    self._ExpandAndLockInstance()
3416
    self.needed_locks[locking.LEVEL_NODE] = []
3417
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3418

    
3419
  def DeclareLocks(self, level):
3420
    if level == locking.LEVEL_NODE:
3421
      self._LockInstancesNodes()
3422

    
3423
  def CheckPrereq(self):
3424
    """Check prerequisites.
3425

3426
    This checks that the instance is in the cluster.
3427

3428
    """
3429
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3430
    assert self.instance is not None, \
3431
      "Cannot retrieve locked instance %s" % self.op.instance_name
3432
    _CheckNodeOnline(self, self.instance.primary_node)
3433
    if not hasattr(self.op, "ignore_size"):
3434
      self.op.ignore_size = False
3435

    
3436
  def Exec(self, feedback_fn):
3437
    """Activate the disks.
3438

3439
    """
3440
    disks_ok, disks_info = \
3441
              _AssembleInstanceDisks(self, self.instance,
3442
                                     ignore_size=self.op.ignore_size)
3443
    if not disks_ok:
3444
      raise errors.OpExecError("Cannot activate block devices")
3445

    
3446
    return disks_info
3447

    
3448

    
3449
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3450
                           ignore_size=False):
3451
  """Prepare the block devices for an instance.
3452

3453
  This sets up the block devices on all nodes.
3454

3455
  @type lu: L{LogicalUnit}
3456
  @param lu: the logical unit on whose behalf we execute
3457
  @type instance: L{objects.Instance}
3458
  @param instance: the instance for whose disks we assemble
3459
  @type ignore_secondaries: boolean
3460
  @param ignore_secondaries: if true, errors on secondary nodes
3461
      won't result in an error return from the function
3462
  @type ignore_size: boolean
3463
  @param ignore_size: if true, the current known size of the disk
3464
      will not be used during the disk activation, useful for cases
3465
      when the size is wrong
3466
  @return: False if the operation failed, otherwise a list of
3467
      (host, instance_visible_name, node_visible_name)
3468
      with the mapping from node devices to instance devices
3469

3470
  """
3471
  device_info = []
3472
  disks_ok = True
3473
  iname = instance.name
3474
  # With the two passes mechanism we try to reduce the window of
3475
  # opportunity for the race condition of switching DRBD to primary
3476
  # before handshaking occured, but we do not eliminate it
3477

    
3478
  # The proper fix would be to wait (with some limits) until the
3479
  # connection has been made and drbd transitions from WFConnection
3480
  # into any other network-connected state (Connected, SyncTarget,
3481
  # SyncSource, etc.)
3482

    
3483
  # 1st pass, assemble on all nodes in secondary mode
3484
  for inst_disk in instance.disks:
3485
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3486
      if ignore_size:
3487
        node_disk = node_disk.Copy()
3488
        node_disk.UnsetSize()
3489
      lu.cfg.SetDiskID(node_disk, node)
3490
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3491
      msg = result.fail_msg
3492
      if msg:
3493
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3494
                           " (is_primary=False, pass=1): %s",
3495
                           inst_disk.iv_name, node, msg)
3496
        if not ignore_secondaries:
3497
          disks_ok = False
3498

    
3499
  # FIXME: race condition on drbd migration to primary
3500

    
3501
  # 2nd pass, do only the primary node
3502
  for inst_disk in instance.disks:
3503
    dev_path = None
3504

    
3505
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3506
      if node != instance.primary_node:
3507
        continue
3508
      if ignore_size:
3509
        node_disk = node_disk.Copy()
3510
        node_disk.UnsetSize()
3511
      lu.cfg.SetDiskID(node_disk, node)
3512
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3513
      msg = result.fail_msg
3514
      if msg:
3515
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3516
                           " (is_primary=True, pass=2): %s",
3517
                           inst_disk.iv_name, node, msg)
3518
        disks_ok = False
3519
      else:
3520
        dev_path = result.payload
3521

    
3522
    device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
3523

    
3524
  # leave the disks configured for the primary node
3525
  # this is a workaround that would be fixed better by
3526
  # improving the logical/physical id handling
3527
  for disk in instance.disks:
3528
    lu.cfg.SetDiskID(disk, instance.primary_node)
3529

    
3530
  return disks_ok, device_info
3531

    
3532

    
3533
def _StartInstanceDisks(lu, instance, force):
3534
  """Start the disks of an instance.
3535

3536
  """
3537
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3538
                                           ignore_secondaries=force)
3539
  if not disks_ok:
3540
    _ShutdownInstanceDisks(lu, instance)
3541
    if force is not None and not force:
3542
      lu.proc.LogWarning("", hint="If the message above refers to a"
3543
                         " secondary node,"
3544
                         " you can retry the operation using '--force'.")
3545
    raise errors.OpExecError("Disk consistency error")
3546

    
3547

    
3548
class LUDeactivateInstanceDisks(NoHooksLU):
3549
  """Shutdown an instance's disks.
3550

3551
  """
3552
  _OP_REQP = ["instance_name"]
3553
  REQ_BGL = False
3554

    
3555
  def ExpandNames(self):
3556
    self._ExpandAndLockInstance()
3557
    self.needed_locks[locking.LEVEL_NODE] = []
3558
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3559

    
3560
  def DeclareLocks(self, level):
3561
    if level == locking.LEVEL_NODE:
3562
      self._LockInstancesNodes()
3563

    
3564
  def CheckPrereq(self):
3565
    """Check prerequisites.
3566

3567
    This checks that the instance is in the cluster.
3568

3569
    """
3570
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3571
    assert self.instance is not None, \
3572
      "Cannot retrieve locked instance %s" % self.op.instance_name
3573

    
3574
  def Exec(self, feedback_fn):
3575
    """Deactivate the disks
3576

3577
    """
3578
    instance = self.instance
3579
    _SafeShutdownInstanceDisks(self, instance)
3580

    
3581

    
3582
def _SafeShutdownInstanceDisks(lu, instance):
3583
  """Shutdown block devices of an instance.
3584

3585
  This function checks if an instance is running, before calling
3586
  _ShutdownInstanceDisks.
3587

3588
  """
3589
  pnode = instance.primary_node
3590
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3591
  ins_l.Raise("Can't contact node %s" % pnode)
3592

    
3593
  if instance.name in ins_l.payload:
3594
    raise errors.OpExecError("Instance is running, can't shutdown"
3595
                             " block devices.")
3596

    
3597
  _ShutdownInstanceDisks(lu, instance)
3598

    
3599

    
3600
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3601
  """Shutdown block devices of an instance.
3602

3603
  This does the shutdown on all nodes of the instance.
3604

3605
  If the ignore_primary is false, errors on the primary node are
3606
  ignored.
3607

3608
  """
3609
  all_result = True
3610
  for disk in instance.disks:
3611
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3612
      lu.cfg.SetDiskID(top_disk, node)
3613
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3614
      msg = result.fail_msg
3615
      if msg:
3616
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3617
                      disk.iv_name, node, msg)
3618
        if not ignore_primary or node != instance.primary_node:
3619
          all_result = False
3620
  return all_result
3621

    
3622

    
3623
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3624
  """Checks if a node has enough free memory.
3625

3626
  This function check if a given node has the needed amount of free
3627
  memory. In case the node has less memory or we cannot get the
3628
  information from the node, this function raise an OpPrereqError
3629
  exception.
3630

3631
  @type lu: C{LogicalUnit}
3632
  @param lu: a logical unit from which we get configuration data
3633
  @type node: C{str}
3634
  @param node: the node to check
3635
  @type reason: C{str}
3636
  @param reason: string to use in the error message
3637
  @type requested: C{int}
3638
  @param requested: the amount of memory in MiB to check for
3639
  @type hypervisor_name: C{str}
3640
  @param hypervisor_name: the hypervisor to ask for memory stats
3641
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3642
      we cannot check the node
3643

3644
  """
3645
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3646
  nodeinfo[node].Raise("Can't get data from node %s" % node,
3647
                       prereq=True, ecode=errors.ECODE_ENVIRON)
3648
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3649
  if not isinstance(free_mem, int):
3650
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3651
                               " was '%s'" % (node, free_mem),
3652
                               errors.ECODE_ENVIRON)
3653
  if requested > free_mem:
3654
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3655
                               " needed %s MiB, available %s MiB" %
3656
                               (node, reason, requested, free_mem),
3657
                               errors.ECODE_NORES)
3658

    
3659

    
3660
class LUStartupInstance(LogicalUnit):
3661
  """Starts an instance.
3662

3663
  """
3664
  HPATH = "instance-start"
3665
  HTYPE = constants.HTYPE_INSTANCE
3666
  _OP_REQP = ["instance_name", "force"]
3667
  REQ_BGL = False
3668

    
3669
  def ExpandNames(self):
3670
    self._ExpandAndLockInstance()
3671

    
3672
  def BuildHooksEnv(self):
3673
    """Build hooks env.
3674

3675
    This runs on master, primary and secondary nodes of the instance.
3676

3677
    """
3678
    env = {
3679
      "FORCE": self.op.force,
3680
      }
3681
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3682
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3683
    return env, nl, nl
3684

    
3685
  def CheckPrereq(self):
3686
    """Check prerequisites.
3687

3688
    This checks that the instance is in the cluster.
3689

3690
    """
3691
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3692
    assert self.instance is not None, \
3693
      "Cannot retrieve locked instance %s" % self.op.instance_name
3694

    
3695
    # extra beparams
3696
    self.beparams = getattr(self.op, "beparams", {})
3697
    if self.beparams:
3698
      if not isinstance(self.beparams, dict):
3699
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3700
                                   " dict" % (type(self.beparams), ),
3701
                                   errors.ECODE_INVAL)
3702
      # fill the beparams dict
3703
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3704
      self.op.beparams = self.beparams
3705

    
3706
    # extra hvparams
3707
    self.hvparams = getattr(self.op, "hvparams", {})
3708
    if self.hvparams:
3709
      if not isinstance(self.hvparams, dict):
3710
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3711
                                   " dict" % (type(self.hvparams), ),
3712
                                   errors.ECODE_INVAL)
3713

    
3714
      # check hypervisor parameter syntax (locally)
3715
      cluster = self.cfg.GetClusterInfo()
3716
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3717
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3718
                                    instance.hvparams)
3719
      filled_hvp.update(self.hvparams)
3720
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3721
      hv_type.CheckParameterSyntax(filled_hvp)
3722
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3723
      self.op.hvparams = self.hvparams
3724

    
3725
    _CheckNodeOnline(self, instance.primary_node)
3726

    
3727
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3728
    # check bridges existence
3729
    _CheckInstanceBridgesExist(self, instance)
3730

    
3731
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3732
                                              instance.name,
3733
                                              instance.hypervisor)
3734
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3735
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3736
    if not remote_info.payload: # not running already
3737
      _CheckNodeFreeMemory(self, instance.primary_node,
3738
                           "starting instance %s" % instance.name,
3739
                           bep[constants.BE_MEMORY], instance.hypervisor)
3740

    
3741
  def Exec(self, feedback_fn):
3742
    """Start the instance.
3743

3744
    """
3745
    instance = self.instance
3746
    force = self.op.force
3747

    
3748
    self.cfg.MarkInstanceUp(instance.name)
3749

    
3750
    node_current = instance.primary_node
3751

    
3752
    _StartInstanceDisks(self, instance, force)
3753

    
3754
    result = self.rpc.call_instance_start(node_current, instance,
3755
                                          self.hvparams, self.beparams)
3756
    msg = result.fail_msg
3757
    if msg:
3758
      _ShutdownInstanceDisks(self, instance)
3759
      raise errors.OpExecError("Could not start instance: %s" % msg)
3760

    
3761

    
3762
class LURebootInstance(LogicalUnit):
3763
  """Reboot an instance.
3764

3765
  """
3766
  HPATH = "instance-reboot"
3767
  HTYPE = constants.HTYPE_INSTANCE
3768
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3769
  REQ_BGL = False
3770

    
3771
  def CheckArguments(self):
3772
    """Check the arguments.
3773

3774
    """
3775
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
3776
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
3777

    
3778
  def ExpandNames(self):
3779
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3780
                                   constants.INSTANCE_REBOOT_HARD,
3781
                                   constants.INSTANCE_REBOOT_FULL]:
3782
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3783
                                  (constants.INSTANCE_REBOOT_SOFT,
3784
                                   constants.INSTANCE_REBOOT_HARD,
3785
                                   constants.INSTANCE_REBOOT_FULL))
3786
    self._ExpandAndLockInstance()
3787

    
3788
  def BuildHooksEnv(self):
3789
    """Build hooks env.
3790

3791
    This runs on master, primary and secondary nodes of the instance.
3792

3793
    """
3794
    env = {
3795
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3796
      "REBOOT_TYPE": self.op.reboot_type,
3797
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
3798
      }
3799
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3800
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3801
    return env, nl, nl
3802

    
3803
  def CheckPrereq(self):
3804
    """Check prerequisites.
3805

3806
    This checks that the instance is in the cluster.
3807

3808
    """
3809
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3810
    assert self.instance is not None, \
3811
      "Cannot retrieve locked instance %s" % self.op.instance_name
3812

    
3813
    _CheckNodeOnline(self, instance.primary_node)
3814

    
3815
    # check bridges existence
3816
    _CheckInstanceBridgesExist(self, instance)
3817

    
3818
  def Exec(self, feedback_fn):
3819
    """Reboot the instance.
3820

3821
    """
3822
    instance = self.instance
3823
    ignore_secondaries = self.op.ignore_secondaries
3824
    reboot_type = self.op.reboot_type
3825

    
3826
    node_current = instance.primary_node
3827

    
3828
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3829
                       constants.INSTANCE_REBOOT_HARD]:
3830
      for disk in instance.disks:
3831
        self.cfg.SetDiskID(disk, node_current)
3832
      result = self.rpc.call_instance_reboot(node_current, instance,
3833
                                             reboot_type,
3834
                                             self.shutdown_timeout)
3835
      result.Raise("Could not reboot instance")
3836
    else:
3837
      result = self.rpc.call_instance_shutdown(node_current, instance,
3838
                                               self.shutdown_timeout)
3839
      result.Raise("Could not shutdown instance for full reboot")
3840
      _ShutdownInstanceDisks(self, instance)
3841
      _StartInstanceDisks(self, instance, ignore_secondaries)
3842
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3843
      msg = result.fail_msg
3844
      if msg:
3845
        _ShutdownInstanceDisks(self, instance)
3846
        raise errors.OpExecError("Could not start instance for"
3847
                                 " full reboot: %s" % msg)
3848

    
3849
    self.cfg.MarkInstanceUp(instance.name)
3850

    
3851

    
3852
class LUShutdownInstance(LogicalUnit):
3853
  """Shutdown an instance.
3854

3855
  """
3856
  HPATH = "instance-stop"
3857
  HTYPE = constants.HTYPE_INSTANCE
3858
  _OP_REQP = ["instance_name"]
3859
  REQ_BGL = False
3860

    
3861
  def CheckArguments(self):
3862
    """Check the arguments.
3863

3864
    """
3865
    self.timeout = getattr(self.op, "timeout",
3866
                           constants.DEFAULT_SHUTDOWN_TIMEOUT)
3867

    
3868
  def ExpandNames(self):
3869
    self._ExpandAndLockInstance()
3870

    
3871
  def BuildHooksEnv(self):
3872
    """Build hooks env.
3873

3874
    This runs on master, primary and secondary nodes of the instance.
3875

3876
    """
3877
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3878
    env["TIMEOUT"] = self.timeout
3879
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3880
    return env, nl, nl
3881

    
3882
  def CheckPrereq(self):
3883
    """Check prerequisites.
3884

3885
    This checks that the instance is in the cluster.
3886

3887
    """
3888
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3889
    assert self.instance is not None, \
3890
      "Cannot retrieve locked instance %s" % self.op.instance_name
3891
    _CheckNodeOnline(self, self.instance.primary_node)
3892

    
3893
  def Exec(self, feedback_fn):
3894
    """Shutdown the instance.
3895

3896
    """
3897
    instance = self.instance
3898
    node_current = instance.primary_node
3899
    timeout = self.timeout
3900
    self.cfg.MarkInstanceDown(instance.name)
3901
    result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
3902
    msg = result.fail_msg
3903
    if msg:
3904
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3905

    
3906
    _ShutdownInstanceDisks(self, instance)
3907

    
3908

    
3909
class LUReinstallInstance(LogicalUnit):
3910
  """Reinstall an instance.
3911

3912
  """
3913
  HPATH = "instance-reinstall"
3914
  HTYPE = constants.HTYPE_INSTANCE
3915
  _OP_REQP = ["instance_name"]
3916
  REQ_BGL = False
3917

    
3918
  def ExpandNames(self):
3919
    self._ExpandAndLockInstance()
3920

    
3921
  def BuildHooksEnv(self):
3922
    """Build hooks env.
3923

3924
    This runs on master, primary and secondary nodes of the instance.
3925

3926
    """
3927
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3928
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3929
    return env, nl, nl
3930

    
3931
  def CheckPrereq(self):
3932
    """Check prerequisites.
3933

3934
    This checks that the instance is in the cluster and is not running.
3935

3936
    """
3937
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3938
    assert instance is not None, \
3939
      "Cannot retrieve locked instance %s" % self.op.instance_name
3940
    _CheckNodeOnline(self, instance.primary_node)
3941

    
3942
    if instance.disk_template == constants.DT_DISKLESS:
3943
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3944
                                 self.op.instance_name,
3945
                                 errors.ECODE_INVAL)
3946
    if instance.admin_up:
3947
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3948
                                 self.op.instance_name,
3949
                                 errors.ECODE_STATE)
3950
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3951
                                              instance.name,
3952
                                              instance.hypervisor)
3953
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3954
                      prereq=True, ecode=errors.ECODE_ENVIRON)
3955
    if remote_info.payload:
3956
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3957
                                 (self.op.instance_name,
3958
                                  instance.primary_node),
3959
                                 errors.ECODE_STATE)
3960

    
3961
    self.op.os_type = getattr(self.op, "os_type", None)
3962
    self.op.force_variant = getattr(self.op, "force_variant", False)
3963
    if self.op.os_type is not None:
3964
      # OS verification
3965
      pnode = self.cfg.GetNodeInfo(
3966
        self.cfg.ExpandNodeName(instance.primary_node))
3967
      if pnode is None:
3968
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3969
                                   self.op.pnode, errors.ECODE_NOENT)
3970
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3971
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3972
                   (self.op.os_type, pnode.name),
3973
                   prereq=True, ecode=errors.ECODE_INVAL)
3974
      if not self.op.force_variant:
3975
        _CheckOSVariant(result.payload, self.op.os_type)
3976

    
3977
    self.instance = instance
3978

    
3979
  def Exec(self, feedback_fn):
3980
    """Reinstall the instance.
3981

3982
    """
3983
    inst = self.instance
3984

    
3985
    if self.op.os_type is not None:
3986
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3987
      inst.os = self.op.os_type
3988
      self.cfg.Update(inst, feedback_fn)
3989

    
3990
    _StartInstanceDisks(self, inst, None)
3991
    try:
3992
      feedback_fn("Running the instance OS create scripts...")
3993
      # FIXME: pass debug option from opcode to backend
3994
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True, 0)
3995
      result.Raise("Could not install OS for instance %s on node %s" %
3996
                   (inst.name, inst.primary_node))
3997
    finally:
3998
      _ShutdownInstanceDisks(self, inst)
3999

    
4000

    
4001
class LURecreateInstanceDisks(LogicalUnit):
4002
  """Recreate an instance's missing disks.
4003

4004
  """
4005
  HPATH = "instance-recreate-disks"
4006
  HTYPE = constants.HTYPE_INSTANCE
4007
  _OP_REQP = ["instance_name", "disks"]
4008
  REQ_BGL = False
4009

    
4010
  def CheckArguments(self):
4011
    """Check the arguments.
4012

4013
    """
4014
    if not isinstance(self.op.disks, list):
4015
      raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4016
    for item in self.op.disks:
4017
      if (not isinstance(item, int) or
4018
          item < 0):
4019
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
4020
                                   str(item), errors.ECODE_INVAL)
4021

    
4022
  def ExpandNames(self):
4023
    self._ExpandAndLockInstance()
4024

    
4025
  def BuildHooksEnv(self):
4026
    """Build hooks env.
4027

4028
    This runs on master, primary and secondary nodes of the instance.
4029

4030
    """
4031
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4032
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4033
    return env, nl, nl
4034

    
4035
  def CheckPrereq(self):
4036
    """Check prerequisites.
4037

4038
    This checks that the instance is in the cluster and is not running.
4039

4040
    """
4041
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4042
    assert instance is not None, \
4043
      "Cannot retrieve locked instance %s" % self.op.instance_name
4044
    _CheckNodeOnline(self, instance.primary_node)
4045

    
4046
    if instance.disk_template == constants.DT_DISKLESS:
4047
      raise errors.OpPrereqError("Instance '%s' has no disks" %
4048
                                 self.op.instance_name, errors.ECODE_INVAL)
4049
    if instance.admin_up:
4050
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4051
                                 self.op.instance_name, errors.ECODE_STATE)
4052
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4053
                                              instance.name,
4054
                                              instance.hypervisor)
4055
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4056
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4057
    if remote_info.payload:
4058
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4059
                                 (self.op.instance_name,
4060
                                  instance.primary_node), errors.ECODE_STATE)
4061

    
4062
    if not self.op.disks:
4063
      self.op.disks = range(len(instance.disks))
4064
    else:
4065
      for idx in self.op.disks:
4066
        if idx >= len(instance.disks):
4067
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4068
                                     errors.ECODE_INVAL)
4069

    
4070
    self.instance = instance
4071

    
4072
  def Exec(self, feedback_fn):
4073
    """Recreate the disks.
4074

4075
    """
4076
    to_skip = []
4077
    for idx, _ in enumerate(self.instance.disks):
4078
      if idx not in self.op.disks: # disk idx has not been passed in
4079
        to_skip.append(idx)
4080
        continue
4081

    
4082
    _CreateDisks(self, self.instance, to_skip=to_skip)
4083

    
4084

    
4085
class LURenameInstance(LogicalUnit):
4086
  """Rename an instance.
4087

4088
  """
4089
  HPATH = "instance-rename"
4090
  HTYPE = constants.HTYPE_INSTANCE
4091
  _OP_REQP = ["instance_name", "new_name"]
4092

    
4093
  def BuildHooksEnv(self):
4094
    """Build hooks env.
4095

4096
    This runs on master, primary and secondary nodes of the instance.
4097

4098
    """
4099
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4100
    env["INSTANCE_NEW_NAME"] = self.op.new_name
4101
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4102
    return env, nl, nl
4103

    
4104
  def CheckPrereq(self):
4105
    """Check prerequisites.
4106

4107
    This checks that the instance is in the cluster and is not running.
4108

4109
    """
4110
    instance = self.cfg.GetInstanceInfo(
4111
      self.cfg.ExpandInstanceName(self.op.instance_name))
4112
    if instance is None:
4113
      raise errors.OpPrereqError("Instance '%s' not known" %
4114
                                 self.op.instance_name, errors.ECODE_NOENT)
4115
    _CheckNodeOnline(self, instance.primary_node)
4116

    
4117
    if instance.admin_up:
4118
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
4119
                                 self.op.instance_name, errors.ECODE_STATE)
4120
    remote_info = self.rpc.call_instance_info(instance.primary_node,
4121
                                              instance.name,
4122
                                              instance.hypervisor)
4123
    remote_info.Raise("Error checking node %s" % instance.primary_node,
4124
                      prereq=True, ecode=errors.ECODE_ENVIRON)
4125
    if remote_info.payload:
4126
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
4127
                                 (self.op.instance_name,
4128
                                  instance.primary_node), errors.ECODE_STATE)
4129
    self.instance = instance
4130

    
4131
    # new name verification
4132
    name_info = utils.GetHostInfo(self.op.new_name)
4133

    
4134
    self.op.new_name = new_name = name_info.name
4135
    instance_list = self.cfg.GetInstanceList()
4136
    if new_name in instance_list:
4137
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4138
                                 new_name, errors.ECODE_EXISTS)
4139

    
4140
    if not getattr(self.op, "ignore_ip", False):
4141
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4142
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4143
                                   (name_info.ip, new_name),
4144
                                   errors.ECODE_NOTUNIQUE)
4145

    
4146

    
4147
  def Exec(self, feedback_fn):
4148
    """Reinstall the instance.
4149

4150
    """
4151
    inst = self.instance
4152
    old_name = inst.name
4153

    
4154
    if inst.disk_template == constants.DT_FILE:
4155
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4156

    
4157
    self.cfg.RenameInstance(inst.name, self.op.new_name)
4158
    # Change the instance lock. This is definitely safe while we hold the BGL
4159
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4160
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4161

    
4162
    # re-read the instance from the configuration after rename
4163
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
4164

    
4165
    if inst.disk_template == constants.DT_FILE:
4166
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4167
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4168
                                                     old_file_storage_dir,
4169
                                                     new_file_storage_dir)
4170
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
4171
                   " (but the instance has been renamed in Ganeti)" %
4172
                   (inst.primary_node, old_file_storage_dir,
4173
                    new_file_storage_dir))
4174

    
4175
    _StartInstanceDisks(self, inst, None)
4176
    try:
4177
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4178
                                                 old_name, 0)
4179
      msg = result.fail_msg
4180
      if msg:
4181
        msg = ("Could not run OS rename script for instance %s on node %s"
4182
               " (but the instance has been renamed in Ganeti): %s" %
4183
               (inst.name, inst.primary_node, msg))
4184
        self.proc.LogWarning(msg)
4185
    finally:
4186
      _ShutdownInstanceDisks(self, inst)
4187

    
4188

    
4189
class LURemoveInstance(LogicalUnit):
4190
  """Remove an instance.
4191

4192
  """
4193
  HPATH = "instance-remove"
4194
  HTYPE = constants.HTYPE_INSTANCE
4195
  _OP_REQP = ["instance_name", "ignore_failures"]
4196
  REQ_BGL = False
4197

    
4198
  def CheckArguments(self):
4199
    """Check the arguments.
4200

4201
    """
4202
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4203
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4204

    
4205
  def ExpandNames(self):
4206
    self._ExpandAndLockInstance()
4207
    self.needed_locks[locking.LEVEL_NODE] = []
4208
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4209

    
4210
  def DeclareLocks(self, level):
4211
    if level == locking.LEVEL_NODE:
4212
      self._LockInstancesNodes()
4213

    
4214
  def BuildHooksEnv(self):
4215
    """Build hooks env.
4216

4217
    This runs on master, primary and secondary nodes of the instance.
4218

4219
    """
4220
    env = _BuildInstanceHookEnvByObject(self, self.instance)
4221
    env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4222
    nl = [self.cfg.GetMasterNode()]
4223
    return env, nl, nl
4224

    
4225
  def CheckPrereq(self):
4226
    """Check prerequisites.
4227

4228
    This checks that the instance is in the cluster.
4229

4230
    """
4231
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4232
    assert self.instance is not None, \
4233
      "Cannot retrieve locked instance %s" % self.op.instance_name
4234

    
4235
  def Exec(self, feedback_fn):
4236
    """Remove the instance.
4237

4238
    """
4239
    instance = self.instance
4240
    logging.info("Shutting down instance %s on node %s",
4241
                 instance.name, instance.primary_node)
4242

    
4243
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4244
                                             self.shutdown_timeout)
4245
    msg = result.fail_msg
4246
    if msg:
4247
      if self.op.ignore_failures:
4248
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
4249
      else:
4250
        raise errors.OpExecError("Could not shutdown instance %s on"
4251
                                 " node %s: %s" %
4252
                                 (instance.name, instance.primary_node, msg))
4253

    
4254
    logging.info("Removing block devices for instance %s", instance.name)
4255

    
4256
    if not _RemoveDisks(self, instance):
4257
      if self.op.ignore_failures:
4258
        feedback_fn("Warning: can't remove instance's disks")
4259
      else:
4260
        raise errors.OpExecError("Can't remove instance's disks")
4261

    
4262
    logging.info("Removing instance %s out of cluster config", instance.name)
4263

    
4264
    self.cfg.RemoveInstance(instance.name)
4265
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4266

    
4267

    
4268
class LUQueryInstances(NoHooksLU):
4269
  """Logical unit for querying instances.
4270

4271
  """
4272
  # pylint: disable-msg=W0142
4273
  _OP_REQP = ["output_fields", "names", "use_locking"]
4274
  REQ_BGL = False
4275
  _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4276
                    "serial_no", "ctime", "mtime", "uuid"]
4277
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4278
                                    "admin_state",
4279
                                    "disk_template", "ip", "mac", "bridge",
4280
                                    "nic_mode", "nic_link",
4281
                                    "sda_size", "sdb_size", "vcpus", "tags",
4282
                                    "network_port", "beparams",
4283
                                    r"(disk)\.(size)/([0-9]+)",
4284
                                    r"(disk)\.(sizes)", "disk_usage",
4285
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4286
                                    r"(nic)\.(bridge)/([0-9]+)",
4287
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
4288
                                    r"(disk|nic)\.(count)",
4289
                                    "hvparams",
4290
                                    ] + _SIMPLE_FIELDS +
4291
                                  ["hv/%s" % name
4292
                                   for name in constants.HVS_PARAMETERS
4293
                                   if name not in constants.HVC_GLOBALS] +
4294
                                  ["be/%s" % name
4295
                                   for name in constants.BES_PARAMETERS])
4296
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4297

    
4298

    
4299
  def ExpandNames(self):
4300
    _CheckOutputFields(static=self._FIELDS_STATIC,
4301
                       dynamic=self._FIELDS_DYNAMIC,
4302
                       selected=self.op.output_fields)
4303

    
4304
    self.needed_locks = {}
4305
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4306
    self.share_locks[locking.LEVEL_NODE] = 1
4307

    
4308
    if self.op.names:
4309
      self.wanted = _GetWantedInstances(self, self.op.names)
4310
    else:
4311
      self.wanted = locking.ALL_SET
4312

    
4313
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4314
    self.do_locking = self.do_node_query and self.op.use_locking
4315
    if self.do_locking:
4316
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4317
      self.needed_locks[locking.LEVEL_NODE] = []
4318
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4319

    
4320
  def DeclareLocks(self, level):
4321
    if level == locking.LEVEL_NODE and self.do_locking:
4322
      self._LockInstancesNodes()
4323

    
4324
  def CheckPrereq(self):
4325
    """Check prerequisites.
4326

4327
    """
4328
    pass
4329

    
4330
  def Exec(self, feedback_fn):
4331
    """Computes the list of nodes and their attributes.
4332

4333
    """
4334
    # pylint: disable-msg=R0912
4335
    # way too many branches here
4336
    all_info = self.cfg.GetAllInstancesInfo()
4337
    if self.wanted == locking.ALL_SET:
4338
      # caller didn't specify instance names, so ordering is not important
4339
      if self.do_locking:
4340
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4341
      else:
4342
        instance_names = all_info.keys()
4343
      instance_names = utils.NiceSort(instance_names)
4344
    else:
4345
      # caller did specify names, so we must keep the ordering
4346
      if self.do_locking:
4347
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4348
      else:
4349
        tgt_set = all_info.keys()
4350
      missing = set(self.wanted).difference(tgt_set)
4351
      if missing:
4352
        raise errors.OpExecError("Some instances were removed before"
4353
                                 " retrieving their data: %s" % missing)
4354
      instance_names = self.wanted
4355

    
4356
    instance_list = [all_info[iname] for iname in instance_names]
4357

    
4358
    # begin data gathering
4359

    
4360
    nodes = frozenset([inst.primary_node for inst in instance_list])
4361
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4362

    
4363
    bad_nodes = []
4364
    off_nodes = []
4365
    if self.do_node_query:
4366
      live_data = {}
4367
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4368
      for name in nodes:
4369
        result = node_data[name]
4370
        if result.offline:
4371
          # offline nodes will be in both lists
4372
          off_nodes.append(name)
4373
        if result.fail_msg:
4374
          bad_nodes.append(name)
4375
        else:
4376
          if result.payload:
4377
            live_data.update(result.payload)
4378
          # else no instance is alive
4379
    else:
4380
      live_data = dict([(name, {}) for name in instance_names])
4381

    
4382
    # end data gathering
4383

    
4384
    HVPREFIX = "hv/"
4385
    BEPREFIX = "be/"
4386
    output = []
4387
    cluster = self.cfg.GetClusterInfo()
4388
    for instance in instance_list:
4389
      iout = []
4390
      i_hv = cluster.FillHV(instance, skip_globals=True)
4391
      i_be = cluster.FillBE(instance)
4392
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4393
                                 nic.nicparams) for nic in instance.nics]
4394
      for field in self.op.output_fields:
4395
        st_match = self._FIELDS_STATIC.Matches(field)
4396
        if field in self._SIMPLE_FIELDS:
4397
          val = getattr(instance, field)
4398
        elif field == "pnode":
4399
          val = instance.primary_node
4400
        elif field == "snodes":
4401
          val = list(instance.secondary_nodes)
4402
        elif field == "admin_state":
4403
          val = instance.admin_up
4404
        elif field == "oper_state":
4405
          if instance.primary_node in bad_nodes:
4406
            val = None
4407
          else:
4408
            val = bool(live_data.get(instance.name))
4409
        elif field == "status":
4410
          if instance.primary_node in off_nodes:
4411
            val = "ERROR_nodeoffline"
4412
          elif instance.primary_node in bad_nodes:
4413
            val = "ERROR_nodedown"
4414
          else:
4415
            running = bool(live_data.get(instance.name))
4416
            if running:
4417
              if instance.admin_up:
4418
                val = "running"
4419
              else:
4420
                val = "ERROR_up"
4421
            else:
4422
              if instance.admin_up:
4423
                val = "ERROR_down"
4424
              else:
4425
                val = "ADMIN_down"
4426
        elif field == "oper_ram":
4427
          if instance.primary_node in bad_nodes:
4428
            val = None
4429
          elif instance.name in live_data:
4430
            val = live_data[instance.name].get("memory", "?")
4431
          else:
4432
            val = "-"
4433
        elif field == "vcpus":
4434
          val = i_be[constants.BE_VCPUS]
4435
        elif field == "disk_template":
4436
          val = instance.disk_template
4437
        elif field == "ip":
4438
          if instance.nics:
4439
            val = instance.nics[0].ip
4440
          else:
4441
            val = None
4442
        elif field == "nic_mode":
4443
          if instance.nics:
4444
            val = i_nicp[0][constants.NIC_MODE]
4445
          else:
4446
            val = None
4447
        elif field == "nic_link":
4448
          if instance.nics:
4449
            val = i_nicp[0][constants.NIC_LINK]
4450
          else:
4451
            val = None
4452
        elif field == "bridge":
4453
          if (instance.nics and
4454
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4455
            val = i_nicp[0][constants.NIC_LINK]
4456
          else:
4457
            val = None
4458
        elif field == "mac":
4459
          if instance.nics:
4460
            val = instance.nics[0].mac
4461
          else:
4462
            val = None
4463
        elif field == "sda_size" or field == "sdb_size":
4464
          idx = ord(field[2]) - ord('a')
4465
          try:
4466
            val = instance.FindDisk(idx).size
4467
          except errors.OpPrereqError:
4468
            val = None
4469
        elif field == "disk_usage": # total disk usage per node
4470
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4471
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4472
        elif field == "tags":
4473
          val = list(instance.GetTags())
4474
        elif field == "hvparams":
4475
          val = i_hv
4476
        elif (field.startswith(HVPREFIX) and
4477
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4478
              field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4479
          val = i_hv.get(field[len(HVPREFIX):], None)
4480
        elif field == "beparams":
4481
          val = i_be
4482
        elif (field.startswith(BEPREFIX) and
4483
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4484
          val = i_be.get(field[len(BEPREFIX):], None)
4485
        elif st_match and st_match.groups():
4486
          # matches a variable list
4487
          st_groups = st_match.groups()
4488
          if st_groups and st_groups[0] == "disk":
4489
            if st_groups[1] == "count":
4490
              val = len(instance.disks)
4491
            elif st_groups[1] == "sizes":
4492
              val = [disk.size for disk in instance.disks]
4493
            elif st_groups[1] == "size":
4494
              try:
4495
                val = instance.FindDisk(st_groups[2]).size
4496
              except errors.OpPrereqError:
4497
                val = None
4498
            else:
4499
              assert False, "Unhandled disk parameter"
4500
          elif st_groups[0] == "nic":
4501
            if st_groups[1] == "count":
4502
              val = len(instance.nics)
4503
            elif st_groups[1] == "macs":
4504
              val = [nic.mac for nic in instance.nics]
4505
            elif st_groups[1] == "ips":
4506
              val = [nic.ip for nic in instance.nics]
4507
            elif st_groups[1] == "modes":
4508
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4509
            elif st_groups[1] == "links":
4510
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4511
            elif st_groups[1] == "bridges":
4512
              val = []
4513
              for nicp in i_nicp:
4514
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4515
                  val.append(nicp[constants.NIC_LINK])
4516
                else:
4517
                  val.append(None)
4518
            else:
4519
              # index-based item
4520
              nic_idx = int(st_groups[2])
4521
              if nic_idx >= len(instance.nics):
4522
                val = None
4523
              else:
4524
                if st_groups[1] == "mac":
4525
                  val = instance.nics[nic_idx].mac
4526
                elif st_groups[1] == "ip":
4527
                  val = instance.nics[nic_idx].ip
4528
                elif st_groups[1] == "mode":
4529
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4530
                elif st_groups[1] == "link":
4531
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4532
                elif st_groups[1] == "bridge":
4533
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4534
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4535
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4536
                  else:
4537
                    val = None
4538
                else:
4539
                  assert False, "Unhandled NIC parameter"
4540
          else:
4541
            assert False, ("Declared but unhandled variable parameter '%s'" %
4542
                           field)
4543
        else:
4544
          assert False, "Declared but unhandled parameter '%s'" % field
4545
        iout.append(val)
4546
      output.append(iout)
4547

    
4548
    return output
4549

    
4550

    
4551
class LUFailoverInstance(LogicalUnit):
4552
  """Failover an instance.
4553

4554
  """
4555
  HPATH = "instance-failover"
4556
  HTYPE = constants.HTYPE_INSTANCE
4557
  _OP_REQP = ["instance_name", "ignore_consistency"]
4558
  REQ_BGL = False
4559

    
4560
  def CheckArguments(self):
4561
    """Check the arguments.
4562

4563
    """
4564
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4565
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4566

    
4567
  def ExpandNames(self):
4568
    self._ExpandAndLockInstance()
4569
    self.needed_locks[locking.LEVEL_NODE] = []
4570
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4571

    
4572
  def DeclareLocks(self, level):
4573
    if level == locking.LEVEL_NODE:
4574
      self._LockInstancesNodes()
4575

    
4576
  def BuildHooksEnv(self):
4577
    """Build hooks env.
4578

4579
    This runs on master, primary and secondary nodes of the instance.
4580

4581
    """
4582
    env = {
4583
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4584
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4585
      }
4586
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4587
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4588
    return env, nl, nl
4589

    
4590
  def CheckPrereq(self):
4591
    """Check prerequisites.
4592

4593
    This checks that the instance is in the cluster.
4594

4595
    """
4596
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4597
    assert self.instance is not None, \
4598
      "Cannot retrieve locked instance %s" % self.op.instance_name
4599

    
4600
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4601
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4602
      raise errors.OpPrereqError("Instance's disk layout is not"
4603
                                 " network mirrored, cannot failover.",
4604
                                 errors.ECODE_STATE)
4605

    
4606
    secondary_nodes = instance.secondary_nodes
4607
    if not secondary_nodes:
4608
      raise errors.ProgrammerError("no secondary node but using "
4609
                                   "a mirrored disk template")
4610

    
4611
    target_node = secondary_nodes[0]
4612
    _CheckNodeOnline(self, target_node)
4613
    _CheckNodeNotDrained(self, target_node)
4614
    if instance.admin_up:
4615
      # check memory requirements on the secondary node
4616
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4617
                           instance.name, bep[constants.BE_MEMORY],
4618
                           instance.hypervisor)
4619
    else:
4620
      self.LogInfo("Not checking memory on the secondary node as"
4621
                   " instance will not be started")
4622

    
4623
    # check bridge existance
4624
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4625

    
4626
  def Exec(self, feedback_fn):
4627
    """Failover an instance.
4628

4629
    The failover is done by shutting it down on its present node and
4630
    starting it on the secondary.
4631

4632
    """
4633
    instance = self.instance
4634

    
4635
    source_node = instance.primary_node
4636
    target_node = instance.secondary_nodes[0]
4637

    
4638
    if instance.admin_up:
4639
      feedback_fn("* checking disk consistency between source and target")
4640
      for dev in instance.disks:
4641
        # for drbd, these are drbd over lvm
4642
        if not _CheckDiskConsistency(self, dev, target_node, False):
4643
          if not self.op.ignore_consistency:
4644
            raise errors.OpExecError("Disk %s is degraded on target node,"
4645
                                     " aborting failover." % dev.iv_name)
4646
    else:
4647
      feedback_fn("* not checking disk consistency as instance is not running")
4648

    
4649
    feedback_fn("* shutting down instance on source node")
4650
    logging.info("Shutting down instance %s on node %s",
4651
                 instance.name, source_node)
4652

    
4653
    result = self.rpc.call_instance_shutdown(source_node, instance,
4654
                                             self.shutdown_timeout)
4655
    msg = result.fail_msg
4656
    if msg:
4657
      if self.op.ignore_consistency:
4658
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4659
                             " Proceeding anyway. Please make sure node"
4660
                             " %s is down. Error details: %s",
4661
                             instance.name, source_node, source_node, msg)
4662
      else:
4663
        raise errors.OpExecError("Could not shutdown instance %s on"
4664
                                 " node %s: %s" %
4665
                                 (instance.name, source_node, msg))
4666

    
4667
    feedback_fn("* deactivating the instance's disks on source node")
4668
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4669
      raise errors.OpExecError("Can't shut down the instance's disks.")
4670

    
4671
    instance.primary_node = target_node
4672
    # distribute new instance config to the other nodes
4673
    self.cfg.Update(instance, feedback_fn)
4674

    
4675
    # Only start the instance if it's marked as up
4676
    if instance.admin_up:
4677
      feedback_fn("* activating the instance's disks on target node")
4678
      logging.info("Starting instance %s on node %s",
4679
                   instance.name, target_node)
4680

    
4681
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4682
                                               ignore_secondaries=True)
4683
      if not disks_ok:
4684
        _ShutdownInstanceDisks(self, instance)
4685
        raise errors.OpExecError("Can't activate the instance's disks")
4686

    
4687
      feedback_fn("* starting the instance on the target node")
4688
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4689
      msg = result.fail_msg
4690
      if msg:
4691
        _ShutdownInstanceDisks(self, instance)
4692
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4693
                                 (instance.name, target_node, msg))
4694

    
4695

    
4696
class LUMigrateInstance(LogicalUnit):
4697
  """Migrate an instance.
4698

4699
  This is migration without shutting down, compared to the failover,
4700
  which is done with shutdown.
4701

4702
  """
4703
  HPATH = "instance-migrate"
4704
  HTYPE = constants.HTYPE_INSTANCE
4705
  _OP_REQP = ["instance_name", "live", "cleanup"]
4706

    
4707
  REQ_BGL = False
4708

    
4709
  def ExpandNames(self):
4710
    self._ExpandAndLockInstance()
4711

    
4712
    self.needed_locks[locking.LEVEL_NODE] = []
4713
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4714

    
4715
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4716
                                       self.op.live, self.op.cleanup)
4717
    self.tasklets = [self._migrater]
4718

    
4719
  def DeclareLocks(self, level):
4720
    if level == locking.LEVEL_NODE:
4721
      self._LockInstancesNodes()
4722

    
4723
  def BuildHooksEnv(self):
4724
    """Build hooks env.
4725

4726
    This runs on master, primary and secondary nodes of the instance.
4727

4728
    """
4729
    instance = self._migrater.instance
4730
    env = _BuildInstanceHookEnvByObject(self, instance)
4731
    env["MIGRATE_LIVE"] = self.op.live
4732
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4733
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4734
    return env, nl, nl
4735

    
4736

    
4737
class LUMoveInstance(LogicalUnit):
4738
  """Move an instance by data-copying.
4739

4740
  """
4741
  HPATH = "instance-move"
4742
  HTYPE = constants.HTYPE_INSTANCE
4743
  _OP_REQP = ["instance_name", "target_node"]
4744
  REQ_BGL = False
4745

    
4746
  def CheckArguments(self):
4747
    """Check the arguments.
4748

4749
    """
4750
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4751
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
4752

    
4753
  def ExpandNames(self):
4754
    self._ExpandAndLockInstance()
4755
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4756
    if target_node is None:
4757
      raise errors.OpPrereqError("Node '%s' not known" %
4758
                                  self.op.target_node, errors.ECODE_NOENT)
4759
    self.op.target_node = target_node
4760
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4761
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4762

    
4763
  def DeclareLocks(self, level):
4764
    if level == locking.LEVEL_NODE:
4765
      self._LockInstancesNodes(primary_only=True)
4766

    
4767
  def BuildHooksEnv(self):
4768
    """Build hooks env.
4769

4770
    This runs on master, primary and secondary nodes of the instance.
4771

4772
    """
4773
    env = {
4774
      "TARGET_NODE": self.op.target_node,
4775
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4776
      }
4777
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4778
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4779
                                       self.op.target_node]
4780
    return env, nl, nl
4781

    
4782
  def CheckPrereq(self):
4783
    """Check prerequisites.
4784

4785
    This checks that the instance is in the cluster.
4786

4787
    """
4788
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4789
    assert self.instance is not None, \
4790
      "Cannot retrieve locked instance %s" % self.op.instance_name
4791

    
4792
    node = self.cfg.GetNodeInfo(self.op.target_node)
4793
    assert node is not None, \
4794
      "Cannot retrieve locked node %s" % self.op.target_node
4795

    
4796
    self.target_node = target_node = node.name
4797

    
4798
    if target_node == instance.primary_node:
4799
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4800
                                 (instance.name, target_node),
4801
                                 errors.ECODE_STATE)
4802

    
4803
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4804

    
4805
    for idx, dsk in enumerate(instance.disks):
4806
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4807
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4808
                                   " cannot copy" % idx, errors.ECODE_STATE)
4809

    
4810
    _CheckNodeOnline(self, target_node)
4811
    _CheckNodeNotDrained(self, target_node)
4812

    
4813
    if instance.admin_up:
4814
      # check memory requirements on the secondary node
4815
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4816
                           instance.name, bep[constants.BE_MEMORY],
4817
                           instance.hypervisor)
4818
    else:
4819
      self.LogInfo("Not checking memory on the secondary node as"
4820
                   " instance will not be started")
4821

    
4822
    # check bridge existance
4823
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4824

    
4825
  def Exec(self, feedback_fn):
4826
    """Move an instance.
4827

4828
    The move is done by shutting it down on its present node, copying
4829
    the data over (slow) and starting it on the new node.
4830

4831
    """
4832
    instance = self.instance
4833

    
4834
    source_node = instance.primary_node
4835
    target_node = self.target_node
4836

    
4837
    self.LogInfo("Shutting down instance %s on source node %s",
4838
                 instance.name, source_node)
4839

    
4840
    result = self.rpc.call_instance_shutdown(source_node, instance,
4841
                                             self.shutdown_timeout)
4842
    msg = result.fail_msg
4843
    if msg:
4844
      if self.op.ignore_consistency:
4845
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4846
                             " Proceeding anyway. Please make sure node"
4847
                             " %s is down. Error details: %s",
4848
                             instance.name, source_node, source_node, msg)
4849
      else:
4850
        raise errors.OpExecError("Could not shutdown instance %s on"
4851
                                 " node %s: %s" %
4852
                                 (instance.name, source_node, msg))
4853

    
4854
    # create the target disks
4855
    try:
4856
      _CreateDisks(self, instance, target_node=target_node)
4857
    except errors.OpExecError:
4858
      self.LogWarning("Device creation failed, reverting...")
4859
      try:
4860
        _RemoveDisks(self, instance, target_node=target_node)
4861
      finally:
4862
        self.cfg.ReleaseDRBDMinors(instance.name)
4863
        raise
4864

    
4865
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4866

    
4867
    errs = []
4868
    # activate, get path, copy the data over
4869
    for idx, disk in enumerate(instance.disks):
4870
      self.LogInfo("Copying data for disk %d", idx)
4871
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4872
                                               instance.name, True)
4873
      if result.fail_msg:
4874
        self.LogWarning("Can't assemble newly created disk %d: %s",
4875
                        idx, result.fail_msg)
4876
        errs.append(result.fail_msg)
4877
        break
4878
      dev_path = result.payload
4879
      result = self.rpc.call_blockdev_export(source_node, disk,
4880
                                             target_node, dev_path,
4881
                                             cluster_name)
4882
      if result.fail_msg:
4883
        self.LogWarning("Can't copy data over for disk %d: %s",
4884
                        idx, result.fail_msg)
4885
        errs.append(result.fail_msg)
4886
        break
4887

    
4888
    if errs:
4889
      self.LogWarning("Some disks failed to copy, aborting")
4890
      try:
4891
        _RemoveDisks(self, instance, target_node=target_node)
4892
      finally:
4893
        self.cfg.ReleaseDRBDMinors(instance.name)
4894
        raise errors.OpExecError("Errors during disk copy: %s" %
4895
                                 (",".join(errs),))
4896

    
4897
    instance.primary_node = target_node
4898
    self.cfg.Update(instance, feedback_fn)
4899

    
4900
    self.LogInfo("Removing the disks on the original node")
4901
    _RemoveDisks(self, instance, target_node=source_node)
4902

    
4903
    # Only start the instance if it's marked as up
4904
    if instance.admin_up:
4905
      self.LogInfo("Starting instance %s on node %s",
4906
                   instance.name, target_node)
4907

    
4908
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4909
                                           ignore_secondaries=True)
4910
      if not disks_ok:
4911
        _ShutdownInstanceDisks(self, instance)
4912
        raise errors.OpExecError("Can't activate the instance's disks")
4913

    
4914
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4915
      msg = result.fail_msg
4916
      if msg:
4917
        _ShutdownInstanceDisks(self, instance)
4918
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4919
                                 (instance.name, target_node, msg))
4920

    
4921

    
4922
class LUMigrateNode(LogicalUnit):
4923
  """Migrate all instances from a node.
4924

4925
  """
4926
  HPATH = "node-migrate"
4927
  HTYPE = constants.HTYPE_NODE
4928
  _OP_REQP = ["node_name", "live"]
4929
  REQ_BGL = False
4930

    
4931
  def ExpandNames(self):
4932
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4933
    if self.op.node_name is None:
4934
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
4935
                                 errors.ECODE_NOENT)
4936

    
4937
    self.needed_locks = {
4938
      locking.LEVEL_NODE: [self.op.node_name],
4939
      }
4940

    
4941
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4942

    
4943
    # Create tasklets for migrating instances for all instances on this node
4944
    names = []
4945
    tasklets = []
4946

    
4947
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4948
      logging.debug("Migrating instance %s", inst.name)
4949
      names.append(inst.name)
4950

    
4951
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4952

    
4953
    self.tasklets = tasklets
4954

    
4955
    # Declare instance locks
4956
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4957

    
4958
  def DeclareLocks(self, level):
4959
    if level == locking.LEVEL_NODE:
4960
      self._LockInstancesNodes()
4961

    
4962
  def BuildHooksEnv(self):
4963
    """Build hooks env.
4964

4965
    This runs on the master, the primary and all the secondaries.
4966

4967
    """
4968
    env = {
4969
      "NODE_NAME": self.op.node_name,
4970
      }
4971

    
4972
    nl = [self.cfg.GetMasterNode()]
4973

    
4974
    return (env, nl, nl)
4975

    
4976

    
4977
class TLMigrateInstance(Tasklet):
4978
  def __init__(self, lu, instance_name, live, cleanup):
4979
    """Initializes this class.
4980

4981
    """
4982
    Tasklet.__init__(self, lu)
4983

    
4984
    # Parameters
4985
    self.instance_name = instance_name
4986
    self.live = live
4987
    self.cleanup = cleanup
4988

    
4989
  def CheckPrereq(self):
4990
    """Check prerequisites.
4991

4992
    This checks that the instance is in the cluster.
4993

4994
    """
4995
    instance = self.cfg.GetInstanceInfo(
4996
      self.cfg.ExpandInstanceName(self.instance_name))
4997
    if instance is None:
4998
      raise errors.OpPrereqError("Instance '%s' not known" %
4999
                                 self.instance_name, errors.ECODE_NOENT)
5000

    
5001
    if instance.disk_template != constants.DT_DRBD8:
5002
      raise errors.OpPrereqError("Instance's disk layout is not"
5003
                                 " drbd8, cannot migrate.", errors.ECODE_STATE)
5004

    
5005
    secondary_nodes = instance.secondary_nodes
5006
    if not secondary_nodes:
5007
      raise errors.ConfigurationError("No secondary node but using"
5008
                                      " drbd8 disk template")
5009

    
5010
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
5011

    
5012
    target_node = secondary_nodes[0]
5013
    # check memory requirements on the secondary node
5014
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
5015
                         instance.name, i_be[constants.BE_MEMORY],
5016
                         instance.hypervisor)
5017

    
5018
    # check bridge existance
5019
    _CheckInstanceBridgesExist(self, instance, node=target_node)
5020

    
5021
    if not self.cleanup:
5022
      _CheckNodeNotDrained(self, target_node)
5023
      result = self.rpc.call_instance_migratable(instance.primary_node,
5024
                                                 instance)
5025
      result.Raise("Can't migrate, please use failover",
5026
                   prereq=True, ecode=errors.ECODE_STATE)
5027

    
5028
    self.instance = instance
5029

    
5030
  def _WaitUntilSync(self):
5031
    """Poll with custom rpc for disk sync.
5032

5033
    This uses our own step-based rpc call.
5034

5035
    """
5036
    self.feedback_fn("* wait until resync is done")
5037
    all_done = False
5038
    while not all_done:
5039
      all_done = True
5040
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5041
                                            self.nodes_ip,
5042
                                            self.instance.disks)
5043
      min_percent = 100
5044
      for node, nres in result.items():
5045
        nres.Raise("Cannot resync disks on node %s" % node)
5046
        node_done, node_percent = nres.payload
5047
        all_done = all_done and node_done
5048
        if node_percent is not None:
5049
          min_percent = min(min_percent, node_percent)
5050
      if not all_done:
5051
        if min_percent < 100:
5052
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
5053
        time.sleep(2)
5054

    
5055
  def _EnsureSecondary(self, node):
5056
    """Demote a node to secondary.
5057

5058
    """
5059
    self.feedback_fn("* switching node %s to secondary mode" % node)
5060

    
5061
    for dev in self.instance.disks:
5062
      self.cfg.SetDiskID(dev, node)
5063

    
5064
    result = self.rpc.call_blockdev_close(node, self.instance.name,
5065
                                          self.instance.disks)
5066
    result.Raise("Cannot change disk to secondary on node %s" % node)
5067

    
5068
  def _GoStandalone(self):
5069
    """Disconnect from the network.
5070

5071
    """
5072
    self.feedback_fn("* changing into standalone mode")
5073
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5074
                                               self.instance.disks)
5075
    for node, nres in result.items():
5076
      nres.Raise("Cannot disconnect disks node %s" % node)
5077

    
5078
  def _GoReconnect(self, multimaster):
5079
    """Reconnect to the network.
5080

5081
    """
5082
    if multimaster:
5083
      msg = "dual-master"
5084
    else:
5085
      msg = "single-master"
5086
    self.feedback_fn("* changing disks into %s mode" % msg)
5087
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5088
                                           self.instance.disks,
5089
                                           self.instance.name, multimaster)
5090
    for node, nres in result.items():
5091
      nres.Raise("Cannot change disks config on node %s" % node)
5092

    
5093
  def _ExecCleanup(self):
5094
    """Try to cleanup after a failed migration.
5095

5096
    The cleanup is done by:
5097
      - check that the instance is running only on one node
5098
        (and update the config if needed)
5099
      - change disks on its secondary node to secondary
5100
      - wait until disks are fully synchronized
5101
      - disconnect from the network
5102
      - change disks into single-master mode
5103
      - wait again until disks are fully synchronized
5104

5105
    """
5106
    instance = self.instance
5107
    target_node = self.target_node
5108
    source_node = self.source_node
5109

    
5110
    # check running on only one node
5111
    self.feedback_fn("* checking where the instance actually runs"
5112
                     " (if this hangs, the hypervisor might be in"
5113
                     " a bad state)")
5114
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5115
    for node, result in ins_l.items():
5116
      result.Raise("Can't contact node %s" % node)
5117

    
5118
    runningon_source = instance.name in ins_l[source_node].payload
5119
    runningon_target = instance.name in ins_l[target_node].payload
5120

    
5121
    if runningon_source and runningon_target:
5122
      raise errors.OpExecError("Instance seems to be running on two nodes,"
5123
                               " or the hypervisor is confused. You will have"
5124
                               " to ensure manually that it runs only on one"
5125
                               " and restart this operation.")
5126

    
5127
    if not (runningon_source or runningon_target):
5128
      raise errors.OpExecError("Instance does not seem to be running at all."
5129
                               " In this case, it's safer to repair by"
5130
                               " running 'gnt-instance stop' to ensure disk"
5131
                               " shutdown, and then restarting it.")
5132

    
5133
    if runningon_target:
5134
      # the migration has actually succeeded, we need to update the config
5135
      self.feedback_fn("* instance running on secondary node (%s),"
5136
                       " updating config" % target_node)
5137
      instance.primary_node = target_node
5138
      self.cfg.Update(instance, self.feedback_fn)
5139
      demoted_node = source_node
5140
    else:
5141
      self.feedback_fn("* instance confirmed to be running on its"
5142
                       " primary node (%s)" % source_node)
5143
      demoted_node = target_node
5144

    
5145
    self._EnsureSecondary(demoted_node)
5146
    try:
5147
      self._WaitUntilSync()
5148
    except errors.OpExecError:
5149
      # we ignore here errors, since if the device is standalone, it
5150
      # won't be able to sync
5151
      pass
5152
    self._GoStandalone()
5153
    self._GoReconnect(False)
5154
    self._WaitUntilSync()
5155

    
5156
    self.feedback_fn("* done")
5157

    
5158
  def _RevertDiskStatus(self):
5159
    """Try to revert the disk status after a failed migration.
5160

5161
    """
5162
    target_node = self.target_node
5163
    try:
5164
      self._EnsureSecondary(target_node)
5165
      self._GoStandalone()
5166
      self._GoReconnect(False)
5167
      self._WaitUntilSync()
5168
    except errors.OpExecError, err:
5169
      self.lu.LogWarning("Migration failed and I can't reconnect the"
5170
                         " drives: error '%s'\n"
5171
                         "Please look and recover the instance status" %
5172
                         str(err))
5173

    
5174
  def _AbortMigration(self):
5175
    """Call the hypervisor code to abort a started migration.
5176

5177
    """
5178
    instance = self.instance
5179
    target_node = self.target_node
5180
    migration_info = self.migration_info
5181

    
5182
    abort_result = self.rpc.call_finalize_migration(target_node,
5183
                                                    instance,
5184
                                                    migration_info,
5185
                                                    False)
5186
    abort_msg = abort_result.fail_msg
5187
    if abort_msg:
5188
      logging.error("Aborting migration failed on target node %s: %s",
5189
                    target_node, abort_msg)
5190
      # Don't raise an exception here, as we stil have to try to revert the
5191
      # disk status, even if this step failed.
5192

    
5193
  def _ExecMigration(self):
5194
    """Migrate an instance.
5195

5196
    The migrate is done by:
5197
      - change the disks into dual-master mode
5198
      - wait until disks are fully synchronized again
5199
      - migrate the instance
5200
      - change disks on the new secondary node (the old primary) to secondary
5201
      - wait until disks are fully synchronized
5202
      - change disks into single-master mode
5203

5204
    """
5205
    instance = self.instance
5206
    target_node = self.target_node
5207
    source_node = self.source_node
5208

    
5209
    self.feedback_fn("* checking disk consistency between source and target")
5210
    for dev in instance.disks:
5211
      if not _CheckDiskConsistency(self, dev, target_node, False):
5212
        raise errors.OpExecError("Disk %s is degraded or not fully"
5213
                                 " synchronized on target node,"
5214
                                 " aborting migrate." % dev.iv_name)
5215

    
5216
    # First get the migration information from the remote node
5217
    result = self.rpc.call_migration_info(source_node, instance)
5218
    msg = result.fail_msg
5219
    if msg:
5220
      log_err = ("Failed fetching source migration information from %s: %s" %
5221
                 (source_node, msg))
5222
      logging.error(log_err)
5223
      raise errors.OpExecError(log_err)
5224

    
5225
    self.migration_info = migration_info = result.payload
5226

    
5227
    # Then switch the disks to master/master mode
5228
    self._EnsureSecondary(target_node)
5229
    self._GoStandalone()
5230
    self._GoReconnect(True)
5231
    self._WaitUntilSync()
5232

    
5233
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
5234
    result = self.rpc.call_accept_instance(target_node,
5235
                                           instance,
5236
                                           migration_info,
5237
                                           self.nodes_ip[target_node])
5238

    
5239
    msg = result.fail_msg
5240
    if msg:
5241
      logging.error("Instance pre-migration failed, trying to revert"
5242
                    " disk status: %s", msg)
5243
      self.feedback_fn("Pre-migration failed, aborting")
5244
      self._AbortMigration()
5245
      self._RevertDiskStatus()
5246
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5247
                               (instance.name, msg))
5248

    
5249
    self.feedback_fn("* migrating instance to %s" % target_node)
5250
    time.sleep(10)
5251
    result = self.rpc.call_instance_migrate(source_node, instance,
5252
                                            self.nodes_ip[target_node],
5253
                                            self.live)
5254
    msg = result.fail_msg
5255
    if msg:
5256
      logging.error("Instance migration failed, trying to revert"
5257
                    " disk status: %s", msg)
5258
      self.feedback_fn("Migration failed, aborting")
5259
      self._AbortMigration()
5260
      self._RevertDiskStatus()
5261
      raise errors.OpExecError("Could not migrate instance %s: %s" %
5262
                               (instance.name, msg))
5263
    time.sleep(10)
5264

    
5265
    instance.primary_node = target_node
5266
    # distribute new instance config to the other nodes
5267
    self.cfg.Update(instance, self.feedback_fn)
5268

    
5269
    result = self.rpc.call_finalize_migration(target_node,
5270
                                              instance,
5271
                                              migration_info,
5272
                                              True)
5273
    msg = result.fail_msg
5274
    if msg:
5275
      logging.error("Instance migration succeeded, but finalization failed:"
5276
                    " %s", msg)
5277
      raise errors.OpExecError("Could not finalize instance migration: %s" %
5278
                               msg)
5279

    
5280
    self._EnsureSecondary(source_node)
5281
    self._WaitUntilSync()
5282
    self._GoStandalone()
5283
    self._GoReconnect(False)
5284
    self._WaitUntilSync()
5285

    
5286
    self.feedback_fn("* done")
5287

    
5288
  def Exec(self, feedback_fn):
5289
    """Perform the migration.
5290

5291
    """
5292
    feedback_fn("Migrating instance %s" % self.instance.name)
5293

    
5294
    self.feedback_fn = feedback_fn
5295

    
5296
    self.source_node = self.instance.primary_node
5297
    self.target_node = self.instance.secondary_nodes[0]
5298
    self.all_nodes = [self.source_node, self.target_node]
5299
    self.nodes_ip = {
5300
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5301
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5302
      }
5303

    
5304
    if self.cleanup:
5305
      return self._ExecCleanup()
5306
    else:
5307
      return self._ExecMigration()
5308

    
5309

    
5310
def _CreateBlockDev(lu, node, instance, device, force_create,
5311
                    info, force_open):
5312
  """Create a tree of block devices on a given node.
5313

5314
  If this device type has to be created on secondaries, create it and
5315
  all its children.
5316

5317
  If not, just recurse to children keeping the same 'force' value.
5318

5319
  @param lu: the lu on whose behalf we execute
5320
  @param node: the node on which to create the device
5321
  @type instance: L{objects.Instance}
5322
  @param instance: the instance which owns the device
5323
  @type device: L{objects.Disk}
5324
  @param device: the device to create
5325
  @type force_create: boolean
5326
  @param force_create: whether to force creation of this device; this
5327
      will be change to True whenever we find a device which has
5328
      CreateOnSecondary() attribute
5329
  @param info: the extra 'metadata' we should attach to the device
5330
      (this will be represented as a LVM tag)
5331
  @type force_open: boolean
5332
  @param force_open: this parameter will be passes to the
5333
      L{backend.BlockdevCreate} function where it specifies
5334
      whether we run on primary or not, and it affects both
5335
      the child assembly and the device own Open() execution
5336

5337
  """
5338
  if device.CreateOnSecondary():
5339
    force_create = True
5340

    
5341
  if device.children:
5342
    for child in device.children:
5343
      _CreateBlockDev(lu, node, instance, child, force_create,
5344
                      info, force_open)
5345

    
5346
  if not force_create:
5347
    return
5348

    
5349
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5350

    
5351

    
5352
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5353
  """Create a single block device on a given node.
5354

5355
  This will not recurse over children of the device, so they must be
5356
  created in advance.
5357

5358
  @param lu: the lu on whose behalf we execute
5359
  @param node: the node on which to create the device
5360
  @type instance: L{objects.Instance}
5361
  @param instance: the instance which owns the device
5362
  @type device: L{objects.Disk}
5363
  @param device: the device to create
5364
  @param info: the extra 'metadata' we should attach to the device
5365
      (this will be represented as a LVM tag)
5366
  @type force_open: boolean
5367
  @param force_open: this parameter will be passes to the
5368
      L{backend.BlockdevCreate} function where it specifies
5369
      whether we run on primary or not, and it affects both
5370
      the child assembly and the device own Open() execution
5371

5372
  """
5373
  lu.cfg.SetDiskID(device, node)
5374
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5375
                                       instance.name, force_open, info)
5376
  result.Raise("Can't create block device %s on"
5377
               " node %s for instance %s" % (device, node, instance.name))
5378
  if device.physical_id is None:
5379
    device.physical_id = result.payload
5380

    
5381

    
5382
def _GenerateUniqueNames(lu, exts):
5383
  """Generate a suitable LV name.
5384

5385
  This will generate a logical volume name for the given instance.
5386

5387
  """
5388
  results = []
5389
  for val in exts:
5390
    new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5391
    results.append("%s%s" % (new_id, val))
5392
  return results
5393

    
5394

    
5395
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5396
                         p_minor, s_minor):
5397
  """Generate a drbd8 device complete with its children.
5398

5399
  """
5400
  port = lu.cfg.AllocatePort()
5401
  vgname = lu.cfg.GetVGName()
5402
  shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5403
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5404
                          logical_id=(vgname, names[0]))
5405
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5406
                          logical_id=(vgname, names[1]))
5407
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5408
                          logical_id=(primary, secondary, port,
5409
                                      p_minor, s_minor,
5410
                                      shared_secret),
5411
                          children=[dev_data, dev_meta],
5412
                          iv_name=iv_name)
5413
  return drbd_dev
5414

    
5415

    
5416
def _GenerateDiskTemplate(lu, template_name,
5417
                          instance_name, primary_node,
5418
                          secondary_nodes, disk_info,
5419
                          file_storage_dir, file_driver,
5420
                          base_index):
5421
  """Generate the entire disk layout for a given template type.
5422

5423
  """
5424
  #TODO: compute space requirements
5425

    
5426
  vgname = lu.cfg.GetVGName()
5427
  disk_count = len(disk_info)
5428
  disks = []
5429
  if template_name == constants.DT_DISKLESS:
5430
    pass
5431
  elif template_name == constants.DT_PLAIN:
5432
    if len(secondary_nodes) != 0:
5433
      raise errors.ProgrammerError("Wrong template configuration")
5434

    
5435
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5436
                                      for i in range(disk_count)])
5437
    for idx, disk in enumerate(disk_info):
5438
      disk_index = idx + base_index
5439
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5440
                              logical_id=(vgname, names[idx]),
5441
                              iv_name="disk/%d" % disk_index,
5442
                              mode=disk["mode"])
5443
      disks.append(disk_dev)
5444
  elif template_name == constants.DT_DRBD8:
5445
    if len(secondary_nodes) != 1:
5446
      raise errors.ProgrammerError("Wrong template configuration")
5447
    remote_node = secondary_nodes[0]
5448
    minors = lu.cfg.AllocateDRBDMinor(
5449
      [primary_node, remote_node] * len(disk_info), instance_name)
5450

    
5451
    names = []
5452
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5453
                                               for i in range(disk_count)]):
5454
      names.append(lv_prefix + "_data")
5455
      names.append(lv_prefix + "_meta")
5456
    for idx, disk in enumerate(disk_info):
5457
      disk_index = idx + base_index
5458
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5459
                                      disk["size"], names[idx*2:idx*2+2],
5460
                                      "disk/%d" % disk_index,
5461
                                      minors[idx*2], minors[idx*2+1])
5462
      disk_dev.mode = disk["mode"]
5463
      disks.append(disk_dev)
5464
  elif template_name == constants.DT_FILE:
5465
    if len(secondary_nodes) != 0:
5466
      raise errors.ProgrammerError("Wrong template configuration")
5467

    
5468
    for idx, disk in enumerate(disk_info):
5469
      disk_index = idx + base_index
5470
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5471
                              iv_name="disk/%d" % disk_index,
5472
                              logical_id=(file_driver,
5473
                                          "%s/disk%d" % (file_storage_dir,
5474
                                                         disk_index)),
5475
                              mode=disk["mode"])
5476
      disks.append(disk_dev)
5477
  else:
5478
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5479
  return disks
5480

    
5481

    
5482
def _GetInstanceInfoText(instance):
5483
  """Compute that text that should be added to the disk's metadata.
5484

5485
  """
5486
  return "originstname+%s" % instance.name
5487

    
5488

    
5489
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5490
  """Create all disks for an instance.
5491

5492
  This abstracts away some work from AddInstance.
5493

5494
  @type lu: L{LogicalUnit}
5495
  @param lu: the logical unit on whose behalf we execute
5496
  @type instance: L{objects.Instance}
5497
  @param instance: the instance whose disks we should create
5498
  @type to_skip: list
5499
  @param to_skip: list of indices to skip
5500
  @type target_node: string
5501
  @param target_node: if passed, overrides the target node for creation
5502
  @rtype: boolean
5503
  @return: the success of the creation
5504

5505
  """
5506
  info = _GetInstanceInfoText(instance)
5507
  if target_node is None:
5508
    pnode = instance.primary_node
5509
    all_nodes = instance.all_nodes
5510
  else:
5511
    pnode = target_node
5512
    all_nodes = [pnode]
5513

    
5514
  if instance.disk_template == constants.DT_FILE:
5515
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5516
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5517

    
5518
    result.Raise("Failed to create directory '%s' on"
5519
                 " node %s" % (file_storage_dir, pnode))
5520

    
5521
  # Note: this needs to be kept in sync with adding of disks in
5522
  # LUSetInstanceParams
5523
  for idx, device in enumerate(instance.disks):
5524
    if to_skip and idx in to_skip:
5525
      continue
5526
    logging.info("Creating volume %s for instance %s",
5527
                 device.iv_name, instance.name)
5528
    #HARDCODE
5529
    for node in all_nodes:
5530
      f_create = node == pnode
5531
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5532

    
5533

    
5534
def _RemoveDisks(lu, instance, target_node=None):
5535
  """Remove all disks for an instance.
5536

5537
  This abstracts away some work from `AddInstance()` and
5538
  `RemoveInstance()`. Note that in case some of the devices couldn't
5539
  be removed, the removal will continue with the other ones (compare
5540
  with `_CreateDisks()`).
5541

5542
  @type lu: L{LogicalUnit}
5543
  @param lu: the logical unit on whose behalf we execute
5544
  @type instance: L{objects.Instance}
5545
  @param instance: the instance whose disks we should remove
5546
  @type target_node: string
5547
  @param target_node: used to override the node on which to remove the disks
5548
  @rtype: boolean
5549
  @return: the success of the removal
5550

5551
  """
5552
  logging.info("Removing block devices for instance %s", instance.name)
5553

    
5554
  all_result = True
5555
  for device in instance.disks:
5556
    if target_node:
5557
      edata = [(target_node, device)]
5558
    else:
5559
      edata = device.ComputeNodeTree(instance.primary_node)
5560
    for node, disk in edata:
5561
      lu.cfg.SetDiskID(disk, node)
5562
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5563
      if msg:
5564
        lu.LogWarning("Could not remove block device %s on node %s,"
5565
                      " continuing anyway: %s", device.iv_name, node, msg)
5566
        all_result = False
5567

    
5568
  if instance.disk_template == constants.DT_FILE:
5569
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5570
    if target_node:
5571
      tgt = target_node
5572
    else:
5573
      tgt = instance.primary_node
5574
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5575
    if result.fail_msg:
5576
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5577
                    file_storage_dir, instance.primary_node, result.fail_msg)
5578
      all_result = False
5579

    
5580
  return all_result
5581

    
5582

    
5583
def _ComputeDiskSize(disk_template, disks):
5584
  """Compute disk size requirements in the volume group
5585

5586
  """
5587
  # Required free disk space as a function of disk and swap space
5588
  req_size_dict = {
5589
    constants.DT_DISKLESS: None,
5590
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5591
    # 128 MB are added for drbd metadata for each disk
5592
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5593
    constants.DT_FILE: None,
5594
  }
5595

    
5596
  if disk_template not in req_size_dict:
5597
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5598
                                 " is unknown" %  disk_template)
5599

    
5600
  return req_size_dict[disk_template]
5601

    
5602

    
5603
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5604
  """Hypervisor parameter validation.
5605

5606
  This function abstract the hypervisor parameter validation to be
5607
  used in both instance create and instance modify.
5608

5609
  @type lu: L{LogicalUnit}
5610
  @param lu: the logical unit for which we check
5611
  @type nodenames: list
5612
  @param nodenames: the list of nodes on which we should check
5613
  @type hvname: string
5614
  @param hvname: the name of the hypervisor we should use
5615
  @type hvparams: dict
5616
  @param hvparams: the parameters which we need to check
5617
  @raise errors.OpPrereqError: if the parameters are not valid
5618

5619
  """
5620
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5621
                                                  hvname,
5622
                                                  hvparams)
5623
  for node in nodenames:
5624
    info = hvinfo[node]
5625
    if info.offline:
5626
      continue
5627
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5628

    
5629

    
5630
class LUCreateInstance(LogicalUnit):
5631
  """Create an instance.
5632

5633
  """
5634
  HPATH = "instance-add"
5635
  HTYPE = constants.HTYPE_INSTANCE
5636
  _OP_REQP = ["instance_name", "disks", "disk_template",
5637
              "mode", "start",
5638
              "wait_for_sync", "ip_check", "nics",
5639
              "hvparams", "beparams"]
5640
  REQ_BGL = False
5641

    
5642
  def CheckArguments(self):
5643
    """Check arguments.
5644

5645
    """
5646
    # do not require name_check to ease forward/backward compatibility
5647
    # for tools
5648
    if not hasattr(self.op, "name_check"):
5649
      self.op.name_check = True
5650
    if self.op.ip_check and not self.op.name_check:
5651
      # TODO: make the ip check more flexible and not depend on the name check
5652
      raise errors.OpPrereqError("Cannot do ip checks without a name check",
5653
                                 errors.ECODE_INVAL)
5654

    
5655
  def _ExpandNode(self, node):
5656
    """Expands and checks one node name.
5657

5658
    """
5659
    node_full = self.cfg.ExpandNodeName(node)
5660
    if node_full is None:
5661
      raise errors.OpPrereqError("Unknown node %s" % node, errors.ECODE_NOENT)
5662
    return node_full
5663

    
5664
  def ExpandNames(self):
5665
    """ExpandNames for CreateInstance.
5666

5667
    Figure out the right locks for instance creation.
5668

5669
    """
5670
    self.needed_locks = {}
5671

    
5672
    # set optional parameters to none if they don't exist
5673
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5674
      if not hasattr(self.op, attr):
5675
        setattr(self.op, attr, None)
5676

    
5677
    # cheap checks, mostly valid constants given
5678

    
5679
    # verify creation mode
5680
    if self.op.mode not in (constants.INSTANCE_CREATE,
5681
                            constants.INSTANCE_IMPORT):
5682
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5683
                                 self.op.mode, errors.ECODE_INVAL)
5684

    
5685
    # disk template and mirror node verification
5686
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5687
      raise errors.OpPrereqError("Invalid disk template name",
5688
                                 errors.ECODE_INVAL)
5689

    
5690
    if self.op.hypervisor is None:
5691
      self.op.hypervisor = self.cfg.GetHypervisorType()
5692

    
5693
    cluster = self.cfg.GetClusterInfo()
5694
    enabled_hvs = cluster.enabled_hypervisors
5695
    if self.op.hypervisor not in enabled_hvs:
5696
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5697
                                 " cluster (%s)" % (self.op.hypervisor,
5698
                                  ",".join(enabled_hvs)),
5699
                                 errors.ECODE_STATE)
5700

    
5701
    # check hypervisor parameter syntax (locally)
5702
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5703
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5704
                                  self.op.hvparams)
5705
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5706
    hv_type.CheckParameterSyntax(filled_hvp)
5707
    self.hv_full = filled_hvp
5708
    # check that we don't specify global parameters on an instance
5709
    _CheckGlobalHvParams(self.op.hvparams)
5710

    
5711
    # fill and remember the beparams dict
5712
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5713
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5714
                                    self.op.beparams)
5715

    
5716
    #### instance parameters check
5717

    
5718
    # instance name verification
5719
    if self.op.name_check:
5720
      hostname1 = utils.GetHostInfo(self.op.instance_name)
5721
      self.op.instance_name = instance_name = hostname1.name
5722
      # used in CheckPrereq for ip ping check
5723
      self.check_ip = hostname1.ip
5724
    else:
5725
      instance_name = self.op.instance_name
5726
      self.check_ip = None
5727

    
5728
    # this is just a preventive check, but someone might still add this
5729
    # instance in the meantime, and creation will fail at lock-add time
5730
    if instance_name in self.cfg.GetInstanceList():
5731
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5732
                                 instance_name, errors.ECODE_EXISTS)
5733

    
5734
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5735

    
5736
    # NIC buildup
5737
    self.nics = []
5738
    for idx, nic in enumerate(self.op.nics):
5739
      nic_mode_req = nic.get("mode", None)
5740
      nic_mode = nic_mode_req
5741
      if nic_mode is None:
5742
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5743

    
5744
      # in routed mode, for the first nic, the default ip is 'auto'
5745
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5746
        default_ip_mode = constants.VALUE_AUTO
5747
      else:
5748
        default_ip_mode = constants.VALUE_NONE
5749

    
5750
      # ip validity checks
5751
      ip = nic.get("ip", default_ip_mode)
5752
      if ip is None or ip.lower() == constants.VALUE_NONE:
5753
        nic_ip = None
5754
      elif ip.lower() == constants.VALUE_AUTO:
5755
        if not self.op.name_check:
5756
          raise errors.OpPrereqError("IP address set to auto but name checks"
5757
                                     " have been skipped. Aborting.",
5758
                                     errors.ECODE_INVAL)
5759
        nic_ip = hostname1.ip
5760
      else:
5761
        if not utils.IsValidIP(ip):
5762
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5763
                                     " like a valid IP" % ip,
5764
                                     errors.ECODE_INVAL)
5765
        nic_ip = ip
5766

    
5767
      # TODO: check the ip address for uniqueness
5768
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5769
        raise errors.OpPrereqError("Routed nic mode requires an ip address",
5770
                                   errors.ECODE_INVAL)
5771

    
5772
      # MAC address verification
5773
      mac = nic.get("mac", constants.VALUE_AUTO)
5774
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5775
        mac = utils.NormalizeAndValidateMac(mac)
5776

    
5777
        try:
5778
          self.cfg.ReserveMAC(mac, self.proc.GetECId())
5779
        except errors.ReservationError:
5780
          raise errors.OpPrereqError("MAC address %s already in use"
5781
                                     " in cluster" % mac,
5782
                                     errors.ECODE_NOTUNIQUE)
5783

    
5784
      # bridge verification
5785
      bridge = nic.get("bridge", None)
5786
      link = nic.get("link", None)
5787
      if bridge and link:
5788
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5789
                                   " at the same time", errors.ECODE_INVAL)
5790
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5791
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
5792
                                   errors.ECODE_INVAL)
5793
      elif bridge:
5794
        link = bridge
5795

    
5796
      nicparams = {}
5797
      if nic_mode_req:
5798
        nicparams[constants.NIC_MODE] = nic_mode_req
5799
      if link:
5800
        nicparams[constants.NIC_LINK] = link
5801

    
5802
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5803
                                      nicparams)
5804
      objects.NIC.CheckParameterSyntax(check_params)
5805
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5806

    
5807
    # disk checks/pre-build
5808
    self.disks = []
5809
    for disk in self.op.disks:
5810
      mode = disk.get("mode", constants.DISK_RDWR)
5811
      if mode not in constants.DISK_ACCESS_SET:
5812
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5813
                                   mode, errors.ECODE_INVAL)
5814
      size = disk.get("size", None)
5815
      if size is None:
5816
        raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
5817
      try:
5818
        size = int(size)
5819
      except ValueError:
5820
        raise errors.OpPrereqError("Invalid disk size '%s'" % size,
5821
                                   errors.ECODE_INVAL)
5822
      self.disks.append({"size": size, "mode": mode})
5823

    
5824
    # file storage checks
5825
    if (self.op.file_driver and
5826
        not self.op.file_driver in constants.FILE_DRIVER):
5827
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5828
                                 self.op.file_driver, errors.ECODE_INVAL)
5829

    
5830
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5831
      raise errors.OpPrereqError("File storage directory path not absolute",
5832
                                 errors.ECODE_INVAL)
5833

    
5834
    ### Node/iallocator related checks
5835
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5836
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5837
                                 " node must be given",
5838
                                 errors.ECODE_INVAL)
5839

    
5840
    if self.op.iallocator:
5841
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5842
    else:
5843
      self.op.pnode = self._ExpandNode(self.op.pnode)
5844
      nodelist = [self.op.pnode]
5845
      if self.op.snode is not None:
5846
        self.op.snode = self._ExpandNode(self.op.snode)
5847
        nodelist.append(self.op.snode)
5848
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5849

    
5850
    # in case of import lock the source node too
5851
    if self.op.mode == constants.INSTANCE_IMPORT:
5852
      src_node = getattr(self.op, "src_node", None)
5853
      src_path = getattr(self.op, "src_path", None)
5854

    
5855
      if src_path is None:
5856
        self.op.src_path = src_path = self.op.instance_name
5857

    
5858
      if src_node is None:
5859
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5860
        self.op.src_node = None
5861
        if os.path.isabs(src_path):
5862
          raise errors.OpPrereqError("Importing an instance from an absolute"
5863
                                     " path requires a source node option.",
5864
                                     errors.ECODE_INVAL)
5865
      else:
5866
        self.op.src_node = src_node = self._ExpandNode(src_node)
5867
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5868
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5869
        if not os.path.isabs(src_path):
5870
          self.op.src_path = src_path = \
5871
            os.path.join(constants.EXPORT_DIR, src_path)
5872

    
5873
      # On import force_variant must be True, because if we forced it at
5874
      # initial install, our only chance when importing it back is that it
5875
      # works again!
5876
      self.op.force_variant = True
5877

    
5878
    else: # INSTANCE_CREATE
5879
      if getattr(self.op, "os_type", None) is None:
5880
        raise errors.OpPrereqError("No guest OS specified",
5881
                                   errors.ECODE_INVAL)
5882
      self.op.force_variant = getattr(self.op, "force_variant", False)
5883

    
5884
  def _RunAllocator(self):
5885
    """Run the allocator based on input opcode.
5886

5887
    """
5888
    nics = [n.ToDict() for n in self.nics]
5889
    ial = IAllocator(self.cfg, self.rpc,
5890
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5891
                     name=self.op.instance_name,
5892
                     disk_template=self.op.disk_template,
5893
                     tags=[],
5894
                     os=self.op.os_type,
5895
                     vcpus=self.be_full[constants.BE_VCPUS],
5896
                     mem_size=self.be_full[constants.BE_MEMORY],
5897
                     disks=self.disks,
5898
                     nics=nics,
5899
                     hypervisor=self.op.hypervisor,
5900
                     )
5901

    
5902
    ial.Run(self.op.iallocator)
5903

    
5904
    if not ial.success:
5905
      raise errors.OpPrereqError("Can't compute nodes using"
5906
                                 " iallocator '%s': %s" %
5907
                                 (self.op.iallocator, ial.info),
5908
                                 errors.ECODE_NORES)
5909
    if len(ial.nodes) != ial.required_nodes:
5910
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5911
                                 " of nodes (%s), required %s" %
5912
                                 (self.op.iallocator, len(ial.nodes),
5913
                                  ial.required_nodes), errors.ECODE_FAULT)
5914
    self.op.pnode = ial.nodes[0]
5915
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5916
                 self.op.instance_name, self.op.iallocator,
5917
                 utils.CommaJoin(ial.nodes))
5918
    if ial.required_nodes == 2:
5919
      self.op.snode = ial.nodes[1]
5920

    
5921
  def BuildHooksEnv(self):
5922
    """Build hooks env.
5923

5924
    This runs on master, primary and secondary nodes of the instance.
5925

5926
    """
5927
    env = {
5928
      "ADD_MODE": self.op.mode,
5929
      }
5930
    if self.op.mode == constants.INSTANCE_IMPORT:
5931
      env["SRC_NODE"] = self.op.src_node
5932
      env["SRC_PATH"] = self.op.src_path
5933
      env["SRC_IMAGES"] = self.src_images
5934

    
5935
    env.update(_BuildInstanceHookEnv(
5936
      name=self.op.instance_name,
5937
      primary_node=self.op.pnode,
5938
      secondary_nodes=self.secondaries,
5939
      status=self.op.start,
5940
      os_type=self.op.os_type,
5941
      memory=self.be_full[constants.BE_MEMORY],
5942
      vcpus=self.be_full[constants.BE_VCPUS],
5943
      nics=_NICListToTuple(self, self.nics),
5944
      disk_template=self.op.disk_template,
5945
      disks=[(d["size"], d["mode"]) for d in self.disks],
5946
      bep=self.be_full,
5947
      hvp=self.hv_full,
5948
      hypervisor_name=self.op.hypervisor,
5949
    ))
5950

    
5951
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5952
          self.secondaries)
5953
    return env, nl, nl
5954

    
5955

    
5956
  def CheckPrereq(self):
5957
    """Check prerequisites.
5958

5959
    """
5960
    if (not self.cfg.GetVGName() and
5961
        self.op.disk_template not in constants.DTS_NOT_LVM):
5962
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5963
                                 " instances", errors.ECODE_STATE)
5964

    
5965
    if self.op.mode == constants.INSTANCE_IMPORT:
5966
      src_node = self.op.src_node
5967
      src_path = self.op.src_path
5968

    
5969
      if src_node is None:
5970
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5971
        exp_list = self.rpc.call_export_list(locked_nodes)
5972
        found = False
5973
        for node in exp_list:
5974
          if exp_list[node].fail_msg:
5975
            continue
5976
          if src_path in exp_list[node].payload:
5977
            found = True
5978
            self.op.src_node = src_node = node
5979
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5980
                                                       src_path)
5981
            break
5982
        if not found:
5983
          raise errors.OpPrereqError("No export found for relative path %s" %
5984
                                      src_path, errors.ECODE_INVAL)
5985

    
5986
      _CheckNodeOnline(self, src_node)
5987
      result = self.rpc.call_export_info(src_node, src_path)
5988
      result.Raise("No export or invalid export found in dir %s" % src_path)
5989

    
5990
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5991
      if not export_info.has_section(constants.INISECT_EXP):
5992
        raise errors.ProgrammerError("Corrupted export config",
5993
                                     errors.ECODE_ENVIRON)
5994

    
5995
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5996
      if (int(ei_version) != constants.EXPORT_VERSION):
5997
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5998
                                   (ei_version, constants.EXPORT_VERSION),
5999
                                   errors.ECODE_ENVIRON)
6000

    
6001
      # Check that the new instance doesn't have less disks than the export
6002
      instance_disks = len(self.disks)
6003
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6004
      if instance_disks < export_disks:
6005
        raise errors.OpPrereqError("Not enough disks to import."
6006
                                   " (instance: %d, export: %d)" %
6007
                                   (instance_disks, export_disks),
6008
                                   errors.ECODE_INVAL)
6009

    
6010
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
6011
      disk_images = []
6012
      for idx in range(export_disks):
6013
        option = 'disk%d_dump' % idx
6014
        if export_info.has_option(constants.INISECT_INS, option):
6015
          # FIXME: are the old os-es, disk sizes, etc. useful?
6016
          export_name = export_info.get(constants.INISECT_INS, option)
6017
          image = os.path.join(src_path, export_name)
6018
          disk_images.append(image)
6019
        else:
6020
          disk_images.append(False)
6021

    
6022
      self.src_images = disk_images
6023

    
6024
      old_name = export_info.get(constants.INISECT_INS, 'name')
6025
      # FIXME: int() here could throw a ValueError on broken exports
6026
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
6027
      if self.op.instance_name == old_name:
6028
        for idx, nic in enumerate(self.nics):
6029
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6030
            nic_mac_ini = 'nic%d_mac' % idx
6031
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6032

    
6033
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6034

    
6035
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
6036
    if self.op.ip_check:
6037
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6038
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
6039
                                   (self.check_ip, self.op.instance_name),
6040
                                   errors.ECODE_NOTUNIQUE)
6041

    
6042
    #### mac address generation
6043
    # By generating here the mac address both the allocator and the hooks get
6044
    # the real final mac address rather than the 'auto' or 'generate' value.
6045
    # There is a race condition between the generation and the instance object
6046
    # creation, which means that we know the mac is valid now, but we're not
6047
    # sure it will be when we actually add the instance. If things go bad
6048
    # adding the instance will abort because of a duplicate mac, and the
6049
    # creation job will fail.
6050
    for nic in self.nics:
6051
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6052
        nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6053

    
6054
    #### allocator run
6055

    
6056
    if self.op.iallocator is not None:
6057
      self._RunAllocator()
6058

    
6059
    #### node related checks
6060

    
6061
    # check primary node
6062
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6063
    assert self.pnode is not None, \
6064
      "Cannot retrieve locked node %s" % self.op.pnode
6065
    if pnode.offline:
6066
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6067
                                 pnode.name, errors.ECODE_STATE)
6068
    if pnode.drained:
6069
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6070
                                 pnode.name, errors.ECODE_STATE)
6071

    
6072
    self.secondaries = []
6073

    
6074
    # mirror node verification
6075
    if self.op.disk_template in constants.DTS_NET_MIRROR:
6076
      if self.op.snode is None:
6077
        raise errors.OpPrereqError("The networked disk templates need"
6078
                                   " a mirror node", errors.ECODE_INVAL)
6079
      if self.op.snode == pnode.name:
6080
        raise errors.OpPrereqError("The secondary node cannot be the"
6081
                                   " primary node.", errors.ECODE_INVAL)
6082
      _CheckNodeOnline(self, self.op.snode)
6083
      _CheckNodeNotDrained(self, self.op.snode)
6084
      self.secondaries.append(self.op.snode)
6085

    
6086
    nodenames = [pnode.name] + self.secondaries
6087

    
6088
    req_size = _ComputeDiskSize(self.op.disk_template,
6089
                                self.disks)
6090

    
6091
    # Check lv size requirements
6092
    if req_size is not None:
6093
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6094
                                         self.op.hypervisor)
6095
      for node in nodenames:
6096
        info = nodeinfo[node]
6097
        info.Raise("Cannot get current information from node %s" % node)
6098
        info = info.payload
6099
        vg_free = info.get('vg_free', None)
6100
        if not isinstance(vg_free, int):
6101
          raise errors.OpPrereqError("Can't compute free disk space on"
6102
                                     " node %s" % node, errors.ECODE_ENVIRON)
6103
        if req_size > vg_free:
6104
          raise errors.OpPrereqError("Not enough disk space on target node %s."
6105
                                     " %d MB available, %d MB required" %
6106
                                     (node, vg_free, req_size),
6107
                                     errors.ECODE_NORES)
6108

    
6109
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6110

    
6111
    # os verification
6112
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
6113
    result.Raise("OS '%s' not in supported os list for primary node %s" %
6114
                 (self.op.os_type, pnode.name),
6115
                 prereq=True, ecode=errors.ECODE_INVAL)
6116
    if not self.op.force_variant:
6117
      _CheckOSVariant(result.payload, self.op.os_type)
6118

    
6119
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6120

    
6121
    # memory check on primary node
6122
    if self.op.start:
6123
      _CheckNodeFreeMemory(self, self.pnode.name,
6124
                           "creating instance %s" % self.op.instance_name,
6125
                           self.be_full[constants.BE_MEMORY],
6126
                           self.op.hypervisor)
6127

    
6128
    self.dry_run_result = list(nodenames)
6129

    
6130
  def Exec(self, feedback_fn):
6131
    """Create and add the instance to the cluster.
6132

6133
    """
6134
    instance = self.op.instance_name
6135
    pnode_name = self.pnode.name
6136

    
6137
    ht_kind = self.op.hypervisor
6138
    if ht_kind in constants.HTS_REQ_PORT:
6139
      network_port = self.cfg.AllocatePort()
6140
    else:
6141
      network_port = None
6142

    
6143
    ##if self.op.vnc_bind_address is None:
6144
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
6145

    
6146
    # this is needed because os.path.join does not accept None arguments
6147
    if self.op.file_storage_dir is None:
6148
      string_file_storage_dir = ""
6149
    else:
6150
      string_file_storage_dir = self.op.file_storage_dir
6151

    
6152
    # build the full file storage dir path
6153
    file_storage_dir = os.path.normpath(os.path.join(
6154
                                        self.cfg.GetFileStorageDir(),
6155
                                        string_file_storage_dir, instance))
6156

    
6157

    
6158
    disks = _GenerateDiskTemplate(self,
6159
                                  self.op.disk_template,
6160
                                  instance, pnode_name,
6161
                                  self.secondaries,
6162
                                  self.disks,
6163
                                  file_storage_dir,
6164
                                  self.op.file_driver,
6165
                                  0)
6166

    
6167
    iobj = objects.Instance(name=instance, os=self.op.os_type,
6168
                            primary_node=pnode_name,
6169
                            nics=self.nics, disks=disks,
6170
                            disk_template=self.op.disk_template,
6171
                            admin_up=False,
6172
                            network_port=network_port,
6173
                            beparams=self.op.beparams,
6174
                            hvparams=self.op.hvparams,
6175
                            hypervisor=self.op.hypervisor,
6176
                            )
6177

    
6178
    feedback_fn("* creating instance disks...")
6179
    try:
6180
      _CreateDisks(self, iobj)
6181
    except errors.OpExecError:
6182
      self.LogWarning("Device creation failed, reverting...")
6183
      try:
6184
        _RemoveDisks(self, iobj)
6185
      finally:
6186
        self.cfg.ReleaseDRBDMinors(instance)
6187
        raise
6188

    
6189
    feedback_fn("adding instance %s to cluster config" % instance)
6190

    
6191
    self.cfg.AddInstance(iobj, self.proc.GetECId())
6192

    
6193
    # Declare that we don't want to remove the instance lock anymore, as we've
6194
    # added the instance to the config
6195
    del self.remove_locks[locking.LEVEL_INSTANCE]
6196
    # Unlock all the nodes
6197
    if self.op.mode == constants.INSTANCE_IMPORT:
6198
      nodes_keep = [self.op.src_node]
6199
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6200
                       if node != self.op.src_node]
6201
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6202
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6203
    else:
6204
      self.context.glm.release(locking.LEVEL_NODE)
6205
      del self.acquired_locks[locking.LEVEL_NODE]
6206

    
6207
    if self.op.wait_for_sync:
6208
      disk_abort = not _WaitForSync(self, iobj)
6209
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
6210
      # make sure the disks are not degraded (still sync-ing is ok)
6211
      time.sleep(15)
6212
      feedback_fn("* checking mirrors status")
6213
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6214
    else:
6215
      disk_abort = False
6216

    
6217
    if disk_abort:
6218
      _RemoveDisks(self, iobj)
6219
      self.cfg.RemoveInstance(iobj.name)
6220
      # Make sure the instance lock gets removed
6221
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6222
      raise errors.OpExecError("There are some degraded disks for"
6223
                               " this instance")
6224

    
6225
    feedback_fn("creating os for instance %s on node %s" %
6226
                (instance, pnode_name))
6227

    
6228
    if iobj.disk_template != constants.DT_DISKLESS:
6229
      if self.op.mode == constants.INSTANCE_CREATE:
6230
        feedback_fn("* running the instance OS create scripts...")
6231
        # FIXME: pass debug option from opcode to backend
6232
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False, 0)
6233
        result.Raise("Could not add os for instance %s"
6234
                     " on node %s" % (instance, pnode_name))
6235

    
6236
      elif self.op.mode == constants.INSTANCE_IMPORT:
6237
        feedback_fn("* running the instance OS import scripts...")
6238
        src_node = self.op.src_node
6239
        src_images = self.src_images
6240
        cluster_name = self.cfg.GetClusterName()
6241
        # FIXME: pass debug option from opcode to backend
6242
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
6243
                                                         src_node, src_images,
6244
                                                         cluster_name, 0)
6245
        msg = import_result.fail_msg
6246
        if msg:
6247
          self.LogWarning("Error while importing the disk images for instance"
6248
                          " %s on node %s: %s" % (instance, pnode_name, msg))
6249
      else:
6250
        # also checked in the prereq part
6251
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
6252
                                     % self.op.mode)
6253

    
6254
    if self.op.start:
6255
      iobj.admin_up = True
6256
      self.cfg.Update(iobj, feedback_fn)
6257
      logging.info("Starting instance %s on node %s", instance, pnode_name)
6258
      feedback_fn("* starting instance...")
6259
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
6260
      result.Raise("Could not start instance")
6261

    
6262
    return list(iobj.all_nodes)
6263

    
6264

    
6265
class LUConnectConsole(NoHooksLU):
6266
  """Connect to an instance's console.
6267

6268
  This is somewhat special in that it returns the command line that
6269
  you need to run on the master node in order to connect to the
6270
  console.
6271

6272
  """
6273
  _OP_REQP = ["instance_name"]
6274
  REQ_BGL = False
6275

    
6276
  def ExpandNames(self):
6277
    self._ExpandAndLockInstance()
6278

    
6279
  def CheckPrereq(self):
6280
    """Check prerequisites.
6281

6282
    This checks that the instance is in the cluster.
6283

6284
    """
6285
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6286
    assert self.instance is not None, \
6287
      "Cannot retrieve locked instance %s" % self.op.instance_name
6288
    _CheckNodeOnline(self, self.instance.primary_node)
6289

    
6290
  def Exec(self, feedback_fn):
6291
    """Connect to the console of an instance
6292

6293
    """
6294
    instance = self.instance
6295
    node = instance.primary_node
6296

    
6297
    node_insts = self.rpc.call_instance_list([node],
6298
                                             [instance.hypervisor])[node]
6299
    node_insts.Raise("Can't get node information from %s" % node)
6300

    
6301
    if instance.name not in node_insts.payload:
6302
      raise errors.OpExecError("Instance %s is not running." % instance.name)
6303

    
6304
    logging.debug("Connecting to console of %s on %s", instance.name, node)
6305

    
6306
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
6307
    cluster = self.cfg.GetClusterInfo()
6308
    # beparams and hvparams are passed separately, to avoid editing the
6309
    # instance and then saving the defaults in the instance itself.
6310
    hvparams = cluster.FillHV(instance)
6311
    beparams = cluster.FillBE(instance)
6312
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
6313

    
6314
    # build ssh cmdline
6315
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
6316

    
6317

    
6318
class LUReplaceDisks(LogicalUnit):
6319
  """Replace the disks of an instance.
6320

6321
  """
6322
  HPATH = "mirrors-replace"
6323
  HTYPE = constants.HTYPE_INSTANCE
6324
  _OP_REQP = ["instance_name", "mode", "disks"]
6325
  REQ_BGL = False
6326

    
6327
  def CheckArguments(self):
6328
    if not hasattr(self.op, "remote_node"):
6329
      self.op.remote_node = None
6330
    if not hasattr(self.op, "iallocator"):
6331
      self.op.iallocator = None
6332

    
6333
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
6334
                                  self.op.iallocator)
6335

    
6336
  def ExpandNames(self):
6337
    self._ExpandAndLockInstance()
6338

    
6339
    if self.op.iallocator is not None:
6340
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6341

    
6342
    elif self.op.remote_node is not None:
6343
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6344
      if remote_node is None:
6345
        raise errors.OpPrereqError("Node '%s' not known" %
6346
                                   self.op.remote_node, errors.ECODE_NOENT)
6347

    
6348
      self.op.remote_node = remote_node
6349

    
6350
      # Warning: do not remove the locking of the new secondary here
6351
      # unless DRBD8.AddChildren is changed to work in parallel;
6352
      # currently it doesn't since parallel invocations of
6353
      # FindUnusedMinor will conflict
6354
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6355
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6356

    
6357
    else:
6358
      self.needed_locks[locking.LEVEL_NODE] = []
6359
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6360

    
6361
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6362
                                   self.op.iallocator, self.op.remote_node,
6363
                                   self.op.disks, False)
6364

    
6365
    self.tasklets = [self.replacer]
6366

    
6367
  def DeclareLocks(self, level):
6368
    # If we're not already locking all nodes in the set we have to declare the
6369
    # instance's primary/secondary nodes.
6370
    if (level == locking.LEVEL_NODE and
6371
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6372
      self._LockInstancesNodes()
6373

    
6374
  def BuildHooksEnv(self):
6375
    """Build hooks env.
6376

6377
    This runs on the master, the primary and all the secondaries.
6378

6379
    """
6380
    instance = self.replacer.instance
6381
    env = {
6382
      "MODE": self.op.mode,
6383
      "NEW_SECONDARY": self.op.remote_node,
6384
      "OLD_SECONDARY": instance.secondary_nodes[0],
6385
      }
6386
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6387
    nl = [
6388
      self.cfg.GetMasterNode(),
6389
      instance.primary_node,
6390
      ]
6391
    if self.op.remote_node is not None:
6392
      nl.append(self.op.remote_node)
6393
    return env, nl, nl
6394

    
6395

    
6396
class LUEvacuateNode(LogicalUnit):
6397
  """Relocate the secondary instances from a node.
6398

6399
  """
6400
  HPATH = "node-evacuate"
6401
  HTYPE = constants.HTYPE_NODE
6402
  _OP_REQP = ["node_name"]
6403
  REQ_BGL = False
6404

    
6405
  def CheckArguments(self):
6406
    if not hasattr(self.op, "remote_node"):
6407
      self.op.remote_node = None
6408
    if not hasattr(self.op, "iallocator"):
6409
      self.op.iallocator = None
6410

    
6411
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6412
                                  self.op.remote_node,
6413
                                  self.op.iallocator)
6414

    
6415
  def ExpandNames(self):
6416
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6417
    if self.op.node_name is None:
6418
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name,
6419
                                 errors.ECODE_NOENT)
6420

    
6421
    self.needed_locks = {}
6422

    
6423
    # Declare node locks
6424
    if self.op.iallocator is not None:
6425
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6426

    
6427
    elif self.op.remote_node is not None:
6428
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6429
      if remote_node is None:
6430
        raise errors.OpPrereqError("Node '%s' not known" %
6431
                                   self.op.remote_node, errors.ECODE_NOENT)
6432

    
6433
      self.op.remote_node = remote_node
6434

    
6435
      # Warning: do not remove the locking of the new secondary here
6436
      # unless DRBD8.AddChildren is changed to work in parallel;
6437
      # currently it doesn't since parallel invocations of
6438
      # FindUnusedMinor will conflict
6439
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6440
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6441

    
6442
    else:
6443
      raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
6444

    
6445
    # Create tasklets for replacing disks for all secondary instances on this
6446
    # node
6447
    names = []
6448
    tasklets = []
6449

    
6450
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6451
      logging.debug("Replacing disks for instance %s", inst.name)
6452
      names.append(inst.name)
6453

    
6454
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6455
                                self.op.iallocator, self.op.remote_node, [],
6456
                                True)
6457
      tasklets.append(replacer)
6458

    
6459
    self.tasklets = tasklets
6460
    self.instance_names = names
6461

    
6462
    # Declare instance locks
6463
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6464

    
6465
  def DeclareLocks(self, level):
6466
    # If we're not already locking all nodes in the set we have to declare the
6467
    # instance's primary/secondary nodes.
6468
    if (level == locking.LEVEL_NODE and
6469
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6470
      self._LockInstancesNodes()
6471

    
6472
  def BuildHooksEnv(self):
6473
    """Build hooks env.
6474

6475
    This runs on the master, the primary and all the secondaries.
6476

6477
    """
6478
    env = {
6479
      "NODE_NAME": self.op.node_name,
6480
      }
6481

    
6482
    nl = [self.cfg.GetMasterNode()]
6483

    
6484
    if self.op.remote_node is not None:
6485
      env["NEW_SECONDARY"] = self.op.remote_node
6486
      nl.append(self.op.remote_node)
6487

    
6488
    return (env, nl, nl)
6489

    
6490

    
6491
class TLReplaceDisks(Tasklet):
6492
  """Replaces disks for an instance.
6493

6494
  Note: Locking is not within the scope of this class.
6495

6496
  """
6497
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6498
               disks, delay_iallocator):
6499
    """Initializes this class.
6500

6501
    """
6502
    Tasklet.__init__(self, lu)
6503

    
6504
    # Parameters
6505
    self.instance_name = instance_name
6506
    self.mode = mode
6507
    self.iallocator_name = iallocator_name
6508
    self.remote_node = remote_node
6509
    self.disks = disks
6510
    self.delay_iallocator = delay_iallocator
6511

    
6512
    # Runtime data
6513
    self.instance = None
6514
    self.new_node = None
6515
    self.target_node = None
6516
    self.other_node = None
6517
    self.remote_node_info = None
6518
    self.node_secondary_ip = None
6519

    
6520
  @staticmethod
6521
  def CheckArguments(mode, remote_node, iallocator):
6522
    """Helper function for users of this class.
6523

6524
    """
6525
    # check for valid parameter combination
6526
    if mode == constants.REPLACE_DISK_CHG:
6527
      if remote_node is None and iallocator is None:
6528
        raise errors.OpPrereqError("When changing the secondary either an"
6529
                                   " iallocator script must be used or the"
6530
                                   " new node given", errors.ECODE_INVAL)
6531

    
6532
      if remote_node is not None and iallocator is not None:
6533
        raise errors.OpPrereqError("Give either the iallocator or the new"
6534
                                   " secondary, not both", errors.ECODE_INVAL)
6535

    
6536
    elif remote_node is not None or iallocator is not None:
6537
      # Not replacing the secondary
6538
      raise errors.OpPrereqError("The iallocator and new node options can"
6539
                                 " only be used when changing the"
6540
                                 " secondary node", errors.ECODE_INVAL)
6541

    
6542
  @staticmethod
6543
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6544
    """Compute a new secondary node using an IAllocator.
6545

6546
    """
6547
    ial = IAllocator(lu.cfg, lu.rpc,
6548
                     mode=constants.IALLOCATOR_MODE_RELOC,
6549
                     name=instance_name,
6550
                     relocate_from=relocate_from)
6551

    
6552
    ial.Run(iallocator_name)
6553

    
6554
    if not ial.success:
6555
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6556
                                 " %s" % (iallocator_name, ial.info),
6557
                                 errors.ECODE_NORES)
6558

    
6559
    if len(ial.nodes) != ial.required_nodes:
6560
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6561
                                 " of nodes (%s), required %s" %
6562
                                 (iallocator_name,
6563
                                  len(ial.nodes), ial.required_nodes),
6564
                                 errors.ECODE_FAULT)
6565

    
6566
    remote_node_name = ial.nodes[0]
6567

    
6568
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6569
               instance_name, remote_node_name)
6570

    
6571
    return remote_node_name
6572

    
6573
  def _FindFaultyDisks(self, node_name):
6574
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6575
                                    node_name, True)
6576

    
6577
  def CheckPrereq(self):
6578
    """Check prerequisites.
6579

6580
    This checks that the instance is in the cluster.
6581

6582
    """
6583
    self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
6584
    assert instance is not None, \
6585
      "Cannot retrieve locked instance %s" % self.instance_name
6586

    
6587
    if instance.disk_template != constants.DT_DRBD8:
6588
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6589
                                 " instances", errors.ECODE_INVAL)
6590

    
6591
    if len(instance.secondary_nodes) != 1:
6592
      raise errors.OpPrereqError("The instance has a strange layout,"
6593
                                 " expected one secondary but found %d" %
6594
                                 len(instance.secondary_nodes),
6595
                                 errors.ECODE_FAULT)
6596

    
6597
    if not self.delay_iallocator:
6598
      self._CheckPrereq2()
6599

    
6600
  def _CheckPrereq2(self):
6601
    """Check prerequisites, second part.
6602

6603
    This function should always be part of CheckPrereq. It was separated and is
6604
    now called from Exec because during node evacuation iallocator was only
6605
    called with an unmodified cluster model, not taking planned changes into
6606
    account.
6607

6608
    """
6609
    instance = self.instance
6610
    secondary_node = instance.secondary_nodes[0]
6611

    
6612
    if self.iallocator_name is None:
6613
      remote_node = self.remote_node
6614
    else:
6615
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6616
                                       instance.name, instance.secondary_nodes)
6617

    
6618
    if remote_node is not None:
6619
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6620
      assert self.remote_node_info is not None, \
6621
        "Cannot retrieve locked node %s" % remote_node
6622
    else:
6623
      self.remote_node_info = None
6624

    
6625
    if remote_node == self.instance.primary_node:
6626
      raise errors.OpPrereqError("The specified node is the primary node of"
6627
                                 " the instance.", errors.ECODE_INVAL)
6628

    
6629
    if remote_node == secondary_node:
6630
      raise errors.OpPrereqError("The specified node is already the"
6631
                                 " secondary node of the instance.",
6632
                                 errors.ECODE_INVAL)
6633

    
6634
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6635
                                    constants.REPLACE_DISK_CHG):
6636
      raise errors.OpPrereqError("Cannot specify disks to be replaced",
6637
                                 errors.ECODE_INVAL)
6638

    
6639
    if self.mode == constants.REPLACE_DISK_AUTO:
6640
      faulty_primary = self._FindFaultyDisks(instance.primary_node)
6641
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6642

    
6643
      if faulty_primary and faulty_secondary:
6644
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6645
                                   " one node and can not be repaired"
6646
                                   " automatically" % self.instance_name,
6647
                                   errors.ECODE_STATE)
6648

    
6649
      if faulty_primary:
6650
        self.disks = faulty_primary
6651
        self.target_node = instance.primary_node
6652
        self.other_node = secondary_node
6653
        check_nodes = [self.target_node, self.other_node]
6654
      elif faulty_secondary:
6655
        self.disks = faulty_secondary
6656
        self.target_node = secondary_node
6657
        self.other_node = instance.primary_node
6658
        check_nodes = [self.target_node, self.other_node]
6659
      else:
6660
        self.disks = []
6661
        check_nodes = []
6662

    
6663
    else:
6664
      # Non-automatic modes
6665
      if self.mode == constants.REPLACE_DISK_PRI:
6666
        self.target_node = instance.primary_node
6667
        self.other_node = secondary_node
6668
        check_nodes = [self.target_node, self.other_node]
6669

    
6670
      elif self.mode == constants.REPLACE_DISK_SEC:
6671
        self.target_node = secondary_node
6672
        self.other_node = instance.primary_node
6673
        check_nodes = [self.target_node, self.other_node]
6674

    
6675
      elif self.mode == constants.REPLACE_DISK_CHG:
6676
        self.new_node = remote_node
6677
        self.other_node = instance.primary_node
6678
        self.target_node = secondary_node
6679
        check_nodes = [self.new_node, self.other_node]
6680

    
6681
        _CheckNodeNotDrained(self.lu, remote_node)
6682

    
6683
      else:
6684
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6685
                                     self.mode)
6686

    
6687
      # If not specified all disks should be replaced
6688
      if not self.disks:
6689
        self.disks = range(len(self.instance.disks))
6690

    
6691
    for node in check_nodes:
6692
      _CheckNodeOnline(self.lu, node)
6693

    
6694
    # Check whether disks are valid
6695
    for disk_idx in self.disks:
6696
      instance.FindDisk(disk_idx)
6697

    
6698
    # Get secondary node IP addresses
6699
    node_2nd_ip = {}
6700

    
6701
    for node_name in [self.target_node, self.other_node, self.new_node]:
6702
      if node_name is not None:
6703
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6704

    
6705
    self.node_secondary_ip = node_2nd_ip
6706

    
6707
  def Exec(self, feedback_fn):
6708
    """Execute disk replacement.
6709

6710
    This dispatches the disk replacement to the appropriate handler.
6711

6712
    """
6713
    if self.delay_iallocator:
6714
      self._CheckPrereq2()
6715

    
6716
    if not self.disks:
6717
      feedback_fn("No disks need replacement")
6718
      return
6719

    
6720
    feedback_fn("Replacing disk(s) %s for %s" %
6721
                (utils.CommaJoin(self.disks), self.instance.name))
6722

    
6723
    activate_disks = (not self.instance.admin_up)
6724

    
6725
    # Activate the instance disks if we're replacing them on a down instance
6726
    if activate_disks:
6727
      _StartInstanceDisks(self.lu, self.instance, True)
6728

    
6729
    try:
6730
      # Should we replace the secondary node?
6731
      if self.new_node is not None:
6732
        fn = self._ExecDrbd8Secondary
6733
      else:
6734
        fn = self._ExecDrbd8DiskOnly
6735

    
6736
      return fn(feedback_fn)
6737

    
6738
    finally:
6739
      # Deactivate the instance disks if we're replacing them on a
6740
      # down instance
6741
      if activate_disks:
6742
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6743

    
6744
  def _CheckVolumeGroup(self, nodes):
6745
    self.lu.LogInfo("Checking volume groups")
6746

    
6747
    vgname = self.cfg.GetVGName()
6748

    
6749
    # Make sure volume group exists on all involved nodes
6750
    results = self.rpc.call_vg_list(nodes)
6751
    if not results:
6752
      raise errors.OpExecError("Can't list volume groups on the nodes")
6753

    
6754
    for node in nodes:
6755
      res = results[node]
6756
      res.Raise("Error checking node %s" % node)
6757
      if vgname not in res.payload:
6758
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6759
                                 (vgname, node))
6760

    
6761
  def _CheckDisksExistence(self, nodes):
6762
    # Check disk existence
6763
    for idx, dev in enumerate(self.instance.disks):
6764
      if idx not in self.disks:
6765
        continue
6766

    
6767
      for node in nodes:
6768
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6769
        self.cfg.SetDiskID(dev, node)
6770

    
6771
        result = self.rpc.call_blockdev_find(node, dev)
6772

    
6773
        msg = result.fail_msg
6774
        if msg or not result.payload:
6775
          if not msg:
6776
            msg = "disk not found"
6777
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6778
                                   (idx, node, msg))
6779

    
6780
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6781
    for idx, dev in enumerate(self.instance.disks):
6782
      if idx not in self.disks:
6783
        continue
6784

    
6785
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6786
                      (idx, node_name))
6787

    
6788
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6789
                                   ldisk=ldisk):
6790
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6791
                                 " replace disks for instance %s" %
6792
                                 (node_name, self.instance.name))
6793

    
6794
  def _CreateNewStorage(self, node_name):
6795
    vgname = self.cfg.GetVGName()
6796
    iv_names = {}
6797

    
6798
    for idx, dev in enumerate(self.instance.disks):
6799
      if idx not in self.disks:
6800
        continue
6801

    
6802
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6803

    
6804
      self.cfg.SetDiskID(dev, node_name)
6805

    
6806
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6807
      names = _GenerateUniqueNames(self.lu, lv_names)
6808

    
6809
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6810
                             logical_id=(vgname, names[0]))
6811
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6812
                             logical_id=(vgname, names[1]))
6813

    
6814
      new_lvs = [lv_data, lv_meta]
6815
      old_lvs = dev.children
6816
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6817

    
6818
      # we pass force_create=True to force the LVM creation
6819
      for new_lv in new_lvs:
6820
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6821
                        _GetInstanceInfoText(self.instance), False)
6822

    
6823
    return iv_names
6824

    
6825
  def _CheckDevices(self, node_name, iv_names):
6826
    for name, (dev, _, _) in iv_names.iteritems():
6827
      self.cfg.SetDiskID(dev, node_name)
6828

    
6829
      result = self.rpc.call_blockdev_find(node_name, dev)
6830

    
6831
      msg = result.fail_msg
6832
      if msg or not result.payload:
6833
        if not msg:
6834
          msg = "disk not found"
6835
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6836
                                 (name, msg))
6837

    
6838
      if result.payload.is_degraded:
6839
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6840

    
6841
  def _RemoveOldStorage(self, node_name, iv_names):
6842
    for name, (_, old_lvs, _) in iv_names.iteritems():
6843
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6844

    
6845
      for lv in old_lvs:
6846
        self.cfg.SetDiskID(lv, node_name)
6847

    
6848
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6849
        if msg:
6850
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6851
                             hint="remove unused LVs manually")
6852

    
6853
  def _ExecDrbd8DiskOnly(self, feedback_fn):
6854
    """Replace a disk on the primary or secondary for DRBD 8.
6855

6856
    The algorithm for replace is quite complicated:
6857

6858
      1. for each disk to be replaced:
6859

6860
        1. create new LVs on the target node with unique names
6861
        1. detach old LVs from the drbd device
6862
        1. rename old LVs to name_replaced.<time_t>
6863
        1. rename new LVs to old LVs
6864
        1. attach the new LVs (with the old names now) to the drbd device
6865

6866
      1. wait for sync across all devices
6867

6868
      1. for each modified disk:
6869

6870
        1. remove old LVs (which have the name name_replaces.<time_t>)
6871

6872
    Failures are not very well handled.
6873

6874
    """
6875
    steps_total = 6
6876

    
6877
    # Step: check device activation
6878
    self.lu.LogStep(1, steps_total, "Check device existence")
6879
    self._CheckDisksExistence([self.other_node, self.target_node])
6880
    self._CheckVolumeGroup([self.target_node, self.other_node])
6881

    
6882
    # Step: check other node consistency
6883
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6884
    self._CheckDisksConsistency(self.other_node,
6885
                                self.other_node == self.instance.primary_node,
6886
                                False)
6887

    
6888
    # Step: create new storage
6889
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6890
    iv_names = self._CreateNewStorage(self.target_node)
6891

    
6892
    # Step: for each lv, detach+rename*2+attach
6893
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6894
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6895
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6896

    
6897
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6898
                                                     old_lvs)
6899
      result.Raise("Can't detach drbd from local storage on node"
6900
                   " %s for device %s" % (self.target_node, dev.iv_name))
6901
      #dev.children = []
6902
      #cfg.Update(instance)
6903

    
6904
      # ok, we created the new LVs, so now we know we have the needed
6905
      # storage; as such, we proceed on the target node to rename
6906
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6907
      # using the assumption that logical_id == physical_id (which in
6908
      # turn is the unique_id on that node)
6909

    
6910
      # FIXME(iustin): use a better name for the replaced LVs
6911
      temp_suffix = int(time.time())
6912
      ren_fn = lambda d, suff: (d.physical_id[0],
6913
                                d.physical_id[1] + "_replaced-%s" % suff)
6914

    
6915
      # Build the rename list based on what LVs exist on the node
6916
      rename_old_to_new = []
6917
      for to_ren in old_lvs:
6918
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6919
        if not result.fail_msg and result.payload:
6920
          # device exists
6921
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6922

    
6923
      self.lu.LogInfo("Renaming the old LVs on the target node")
6924
      result = self.rpc.call_blockdev_rename(self.target_node,
6925
                                             rename_old_to_new)
6926
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6927

    
6928
      # Now we rename the new LVs to the old LVs
6929
      self.lu.LogInfo("Renaming the new LVs on the target node")
6930
      rename_new_to_old = [(new, old.physical_id)
6931
                           for old, new in zip(old_lvs, new_lvs)]
6932
      result = self.rpc.call_blockdev_rename(self.target_node,
6933
                                             rename_new_to_old)
6934
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6935

    
6936
      for old, new in zip(old_lvs, new_lvs):
6937
        new.logical_id = old.logical_id
6938
        self.cfg.SetDiskID(new, self.target_node)
6939

    
6940
      for disk in old_lvs:
6941
        disk.logical_id = ren_fn(disk, temp_suffix)
6942
        self.cfg.SetDiskID(disk, self.target_node)
6943

    
6944
      # Now that the new lvs have the old name, we can add them to the device
6945
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6946
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6947
                                                  new_lvs)
6948
      msg = result.fail_msg
6949
      if msg:
6950
        for new_lv in new_lvs:
6951
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6952
                                               new_lv).fail_msg
6953
          if msg2:
6954
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6955
                               hint=("cleanup manually the unused logical"
6956
                                     "volumes"))
6957
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6958

    
6959
      dev.children = new_lvs
6960

    
6961
      self.cfg.Update(self.instance, feedback_fn)
6962

    
6963
    # Wait for sync
6964
    # This can fail as the old devices are degraded and _WaitForSync
6965
    # does a combined result over all disks, so we don't check its return value
6966
    self.lu.LogStep(5, steps_total, "Sync devices")
6967
    _WaitForSync(self.lu, self.instance)
6968

    
6969
    # Check all devices manually
6970
    self._CheckDevices(self.instance.primary_node, iv_names)
6971

    
6972
    # Step: remove old storage
6973
    self.lu.LogStep(6, steps_total, "Removing old storage")
6974
    self._RemoveOldStorage(self.target_node, iv_names)
6975

    
6976
  def _ExecDrbd8Secondary(self, feedback_fn):
6977
    """Replace the secondary node for DRBD 8.
6978

6979
    The algorithm for replace is quite complicated:
6980
      - for all disks of the instance:
6981
        - create new LVs on the new node with same names
6982
        - shutdown the drbd device on the old secondary
6983
        - disconnect the drbd network on the primary
6984
        - create the drbd device on the new secondary
6985
        - network attach the drbd on the primary, using an artifice:
6986
          the drbd code for Attach() will connect to the network if it
6987
          finds a device which is connected to the good local disks but
6988
          not network enabled
6989
      - wait for sync across all devices
6990
      - remove all disks from the old secondary
6991

6992
    Failures are not very well handled.
6993

6994
    """
6995
    steps_total = 6
6996

    
6997
    # Step: check device activation
6998
    self.lu.LogStep(1, steps_total, "Check device existence")
6999
    self._CheckDisksExistence([self.instance.primary_node])
7000
    self._CheckVolumeGroup([self.instance.primary_node])
7001

    
7002
    # Step: check other node consistency
7003
    self.lu.LogStep(2, steps_total, "Check peer consistency")
7004
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
7005

    
7006
    # Step: create new storage
7007
    self.lu.LogStep(3, steps_total, "Allocate new storage")
7008
    for idx, dev in enumerate(self.instance.disks):
7009
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7010
                      (self.new_node, idx))
7011
      # we pass force_create=True to force LVM creation
7012
      for new_lv in dev.children:
7013
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7014
                        _GetInstanceInfoText(self.instance), False)
7015

    
7016
    # Step 4: dbrd minors and drbd setups changes
7017
    # after this, we must manually remove the drbd minors on both the
7018
    # error and the success paths
7019
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7020
    minors = self.cfg.AllocateDRBDMinor([self.new_node
7021
                                         for dev in self.instance.disks],
7022
                                        self.instance.name)
7023
    logging.debug("Allocated minors %r", minors)
7024

    
7025
    iv_names = {}
7026
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7027
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7028
                      (self.new_node, idx))
7029
      # create new devices on new_node; note that we create two IDs:
7030
      # one without port, so the drbd will be activated without
7031
      # networking information on the new node at this stage, and one
7032
      # with network, for the latter activation in step 4
7033
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7034
      if self.instance.primary_node == o_node1:
7035
        p_minor = o_minor1
7036
      else:
7037
        assert self.instance.primary_node == o_node2, "Three-node instance?"
7038
        p_minor = o_minor2
7039

    
7040
      new_alone_id = (self.instance.primary_node, self.new_node, None,
7041
                      p_minor, new_minor, o_secret)
7042
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
7043
                    p_minor, new_minor, o_secret)
7044

    
7045
      iv_names[idx] = (dev, dev.children, new_net_id)
7046
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7047
                    new_net_id)
7048
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7049
                              logical_id=new_alone_id,
7050
                              children=dev.children,
7051
                              size=dev.size)
7052
      try:
7053
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7054
                              _GetInstanceInfoText(self.instance), False)
7055
      except errors.GenericError:
7056
        self.cfg.ReleaseDRBDMinors(self.instance.name)
7057
        raise
7058

    
7059
    # We have new devices, shutdown the drbd on the old secondary
7060
    for idx, dev in enumerate(self.instance.disks):
7061
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7062
      self.cfg.SetDiskID(dev, self.target_node)
7063
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7064
      if msg:
7065
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7066
                           "node: %s" % (idx, msg),
7067
                           hint=("Please cleanup this device manually as"
7068
                                 " soon as possible"))
7069

    
7070
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7071
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7072
                                               self.node_secondary_ip,
7073
                                               self.instance.disks)\
7074
                                              [self.instance.primary_node]
7075

    
7076
    msg = result.fail_msg
7077
    if msg:
7078
      # detaches didn't succeed (unlikely)
7079
      self.cfg.ReleaseDRBDMinors(self.instance.name)
7080
      raise errors.OpExecError("Can't detach the disks from the network on"
7081
                               " old node: %s" % (msg,))
7082

    
7083
    # if we managed to detach at least one, we update all the disks of
7084
    # the instance to point to the new secondary
7085
    self.lu.LogInfo("Updating instance configuration")
7086
    for dev, _, new_logical_id in iv_names.itervalues():
7087
      dev.logical_id = new_logical_id
7088
      self.cfg.SetDiskID(dev, self.instance.primary_node)
7089

    
7090
    self.cfg.Update(self.instance, feedback_fn)
7091

    
7092
    # and now perform the drbd attach
7093
    self.lu.LogInfo("Attaching primary drbds to new secondary"
7094
                    " (standalone => connected)")
7095
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7096
                                            self.new_node],
7097
                                           self.node_secondary_ip,
7098
                                           self.instance.disks,
7099
                                           self.instance.name,
7100
                                           False)
7101
    for to_node, to_result in result.items():
7102
      msg = to_result.fail_msg
7103
      if msg:
7104
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7105
                           to_node, msg,
7106
                           hint=("please do a gnt-instance info to see the"
7107
                                 " status of disks"))
7108

    
7109
    # Wait for sync
7110
    # This can fail as the old devices are degraded and _WaitForSync
7111
    # does a combined result over all disks, so we don't check its return value
7112
    self.lu.LogStep(5, steps_total, "Sync devices")
7113
    _WaitForSync(self.lu, self.instance)
7114

    
7115
    # Check all devices manually
7116
    self._CheckDevices(self.instance.primary_node, iv_names)
7117

    
7118
    # Step: remove old storage
7119
    self.lu.LogStep(6, steps_total, "Removing old storage")
7120
    self._RemoveOldStorage(self.target_node, iv_names)
7121

    
7122

    
7123
class LURepairNodeStorage(NoHooksLU):
7124
  """Repairs the volume group on a node.
7125

7126
  """
7127
  _OP_REQP = ["node_name"]
7128
  REQ_BGL = False
7129

    
7130
  def CheckArguments(self):
7131
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
7132
    if node_name is None:
7133
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name,
7134
                                 errors.ECODE_NOENT)
7135

    
7136
    self.op.node_name = node_name
7137

    
7138
  def ExpandNames(self):
7139
    self.needed_locks = {
7140
      locking.LEVEL_NODE: [self.op.node_name],
7141
      }
7142

    
7143
  def _CheckFaultyDisks(self, instance, node_name):
7144
    """Ensure faulty disks abort the opcode or at least warn."""
7145
    try:
7146
      if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7147
                                  node_name, True):
7148
        raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7149
                                   " node '%s'" % (instance.name, node_name),
7150
                                   errors.ECODE_STATE)
7151
    except errors.OpPrereqError, err:
7152
      if self.op.ignore_consistency:
7153
        self.proc.LogWarning(str(err.args[0]))
7154
      else:
7155
        raise
7156

    
7157
  def CheckPrereq(self):
7158
    """Check prerequisites.
7159

7160
    """
7161
    storage_type = self.op.storage_type
7162

    
7163
    if (constants.SO_FIX_CONSISTENCY not in
7164
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7165
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
7166
                                 " repaired" % storage_type,
7167
                                 errors.ECODE_INVAL)
7168

    
7169
    # Check whether any instance on this node has faulty disks
7170
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7171
      if not inst.admin_up:
7172
        continue
7173
      check_nodes = set(inst.all_nodes)
7174
      check_nodes.discard(self.op.node_name)
7175
      for inst_node_name in check_nodes:
7176
        self._CheckFaultyDisks(inst, inst_node_name)
7177

    
7178
  def Exec(self, feedback_fn):
7179
    feedback_fn("Repairing storage unit '%s' on %s ..." %
7180
                (self.op.name, self.op.node_name))
7181

    
7182
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
7183
    result = self.rpc.call_storage_execute(self.op.node_name,
7184
                                           self.op.storage_type, st_args,
7185
                                           self.op.name,
7186
                                           constants.SO_FIX_CONSISTENCY)
7187
    result.Raise("Failed to repair storage unit '%s' on %s" %
7188
                 (self.op.name, self.op.node_name))
7189

    
7190

    
7191
class LUGrowDisk(LogicalUnit):
7192
  """Grow a disk of an instance.
7193

7194
  """
7195
  HPATH = "disk-grow"
7196
  HTYPE = constants.HTYPE_INSTANCE
7197
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
7198
  REQ_BGL = False
7199

    
7200
  def ExpandNames(self):
7201
    self._ExpandAndLockInstance()
7202
    self.needed_locks[locking.LEVEL_NODE] = []
7203
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7204

    
7205
  def DeclareLocks(self, level):
7206
    if level == locking.LEVEL_NODE:
7207
      self._LockInstancesNodes()
7208

    
7209
  def BuildHooksEnv(self):
7210
    """Build hooks env.
7211

7212
    This runs on the master, the primary and all the secondaries.
7213

7214
    """
7215
    env = {
7216
      "DISK": self.op.disk,
7217
      "AMOUNT": self.op.amount,
7218
      }
7219
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7220
    nl = [
7221
      self.cfg.GetMasterNode(),
7222
      self.instance.primary_node,
7223
      ]
7224
    return env, nl, nl
7225

    
7226
  def CheckPrereq(self):
7227
    """Check prerequisites.
7228

7229
    This checks that the instance is in the cluster.
7230

7231
    """
7232
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7233
    assert instance is not None, \
7234
      "Cannot retrieve locked instance %s" % self.op.instance_name
7235
    nodenames = list(instance.all_nodes)
7236
    for node in nodenames:
7237
      _CheckNodeOnline(self, node)
7238

    
7239

    
7240
    self.instance = instance
7241

    
7242
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
7243
      raise errors.OpPrereqError("Instance's disk layout does not support"
7244
                                 " growing.", errors.ECODE_INVAL)
7245

    
7246
    self.disk = instance.FindDisk(self.op.disk)
7247

    
7248
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
7249
                                       instance.hypervisor)
7250
    for node in nodenames:
7251
      info = nodeinfo[node]
7252
      info.Raise("Cannot get current information from node %s" % node)
7253
      vg_free = info.payload.get('vg_free', None)
7254
      if not isinstance(vg_free, int):
7255
        raise errors.OpPrereqError("Can't compute free disk space on"
7256
                                   " node %s" % node, errors.ECODE_ENVIRON)
7257
      if self.op.amount > vg_free:
7258
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
7259
                                   " %d MiB available, %d MiB required" %
7260
                                   (node, vg_free, self.op.amount),
7261
                                   errors.ECODE_NORES)
7262

    
7263
  def Exec(self, feedback_fn):
7264
    """Execute disk grow.
7265

7266
    """
7267
    instance = self.instance
7268
    disk = self.disk
7269
    for node in instance.all_nodes:
7270
      self.cfg.SetDiskID(disk, node)
7271
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
7272
      result.Raise("Grow request failed to node %s" % node)
7273

    
7274
      # TODO: Rewrite code to work properly
7275
      # DRBD goes into sync mode for a short amount of time after executing the
7276
      # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
7277
      # calling "resize" in sync mode fails. Sleeping for a short amount of
7278
      # time is a work-around.
7279
      time.sleep(5)
7280

    
7281
    disk.RecordGrow(self.op.amount)
7282
    self.cfg.Update(instance, feedback_fn)
7283
    if self.op.wait_for_sync:
7284
      disk_abort = not _WaitForSync(self, instance)
7285
      if disk_abort:
7286
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
7287
                             " status.\nPlease check the instance.")
7288

    
7289

    
7290
class LUQueryInstanceData(NoHooksLU):
7291
  """Query runtime instance data.
7292

7293
  """
7294
  _OP_REQP = ["instances", "static"]
7295
  REQ_BGL = False
7296

    
7297
  def ExpandNames(self):
7298
    self.needed_locks = {}
7299
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
7300

    
7301
    if not isinstance(self.op.instances, list):
7302
      raise errors.OpPrereqError("Invalid argument type 'instances'",
7303
                                 errors.ECODE_INVAL)
7304

    
7305
    if self.op.instances:
7306
      self.wanted_names = []
7307
      for name in self.op.instances:
7308
        full_name = self.cfg.ExpandInstanceName(name)
7309
        if full_name is None:
7310
          raise errors.OpPrereqError("Instance '%s' not known" % name,
7311
                                     errors.ECODE_NOENT)
7312
        self.wanted_names.append(full_name)
7313
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
7314
    else:
7315
      self.wanted_names = None
7316
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
7317

    
7318
    self.needed_locks[locking.LEVEL_NODE] = []
7319
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7320

    
7321
  def DeclareLocks(self, level):
7322
    if level == locking.LEVEL_NODE:
7323
      self._LockInstancesNodes()
7324

    
7325
  def CheckPrereq(self):
7326
    """Check prerequisites.
7327

7328
    This only checks the optional instance list against the existing names.
7329

7330
    """
7331
    if self.wanted_names is None:
7332
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
7333

    
7334
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
7335
                             in self.wanted_names]
7336
    return
7337

    
7338
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
7339
    """Returns the status of a block device
7340

7341
    """
7342
    if self.op.static or not node:
7343
      return None
7344

    
7345
    self.cfg.SetDiskID(dev, node)
7346

    
7347
    result = self.rpc.call_blockdev_find(node, dev)
7348
    if result.offline:
7349
      return None
7350

    
7351
    result.Raise("Can't compute disk status for %s" % instance_name)
7352

    
7353
    status = result.payload
7354
    if status is None:
7355
      return None
7356

    
7357
    return (status.dev_path, status.major, status.minor,
7358
            status.sync_percent, status.estimated_time,
7359
            status.is_degraded, status.ldisk_status)
7360

    
7361
  def _ComputeDiskStatus(self, instance, snode, dev):
7362
    """Compute block device status.
7363

7364
    """
7365
    if dev.dev_type in constants.LDS_DRBD:
7366
      # we change the snode then (otherwise we use the one passed in)
7367
      if dev.logical_id[0] == instance.primary_node:
7368
        snode = dev.logical_id[1]
7369
      else:
7370
        snode = dev.logical_id[0]
7371

    
7372
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
7373
                                              instance.name, dev)
7374
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
7375

    
7376
    if dev.children:
7377
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
7378
                      for child in dev.children]
7379
    else:
7380
      dev_children = []
7381

    
7382
    data = {
7383
      "iv_name": dev.iv_name,
7384
      "dev_type": dev.dev_type,
7385
      "logical_id": dev.logical_id,
7386
      "physical_id": dev.physical_id,
7387
      "pstatus": dev_pstatus,
7388
      "sstatus": dev_sstatus,
7389
      "children": dev_children,
7390
      "mode": dev.mode,
7391
      "size": dev.size,
7392
      }
7393

    
7394
    return data
7395

    
7396
  def Exec(self, feedback_fn):
7397
    """Gather and return data"""
7398
    result = {}
7399

    
7400
    cluster = self.cfg.GetClusterInfo()
7401

    
7402
    for instance in self.wanted_instances:
7403
      if not self.op.static:
7404
        remote_info = self.rpc.call_instance_info(instance.primary_node,
7405
                                                  instance.name,
7406
                                                  instance.hypervisor)
7407
        remote_info.Raise("Error checking node %s" % instance.primary_node)
7408
        remote_info = remote_info.payload
7409
        if remote_info and "state" in remote_info:
7410
          remote_state = "up"
7411
        else:
7412
          remote_state = "down"
7413
      else:
7414
        remote_state = None
7415
      if instance.admin_up:
7416
        config_state = "up"
7417
      else:
7418
        config_state = "down"
7419

    
7420
      disks = [self._ComputeDiskStatus(instance, None, device)
7421
               for device in instance.disks]
7422

    
7423
      idict = {
7424
        "name": instance.name,
7425
        "config_state": config_state,
7426
        "run_state": remote_state,
7427
        "pnode": instance.primary_node,
7428
        "snodes": instance.secondary_nodes,
7429
        "os": instance.os,
7430
        # this happens to be the same format used for hooks
7431
        "nics": _NICListToTuple(self, instance.nics),
7432
        "disks": disks,
7433
        "hypervisor": instance.hypervisor,
7434
        "network_port": instance.network_port,
7435
        "hv_instance": instance.hvparams,
7436
        "hv_actual": cluster.FillHV(instance, skip_globals=True),
7437
        "be_instance": instance.beparams,
7438
        "be_actual": cluster.FillBE(instance),
7439
        "serial_no": instance.serial_no,
7440
        "mtime": instance.mtime,
7441
        "ctime": instance.ctime,
7442
        "uuid": instance.uuid,
7443
        }
7444

    
7445
      result[instance.name] = idict
7446

    
7447
    return result
7448

    
7449

    
7450
class LUSetInstanceParams(LogicalUnit):
7451
  """Modifies an instances's parameters.
7452

7453
  """
7454
  HPATH = "instance-modify"
7455
  HTYPE = constants.HTYPE_INSTANCE
7456
  _OP_REQP = ["instance_name"]
7457
  REQ_BGL = False
7458

    
7459
  def CheckArguments(self):
7460
    if not hasattr(self.op, 'nics'):
7461
      self.op.nics = []
7462
    if not hasattr(self.op, 'disks'):
7463
      self.op.disks = []
7464
    if not hasattr(self.op, 'beparams'):
7465
      self.op.beparams = {}
7466
    if not hasattr(self.op, 'hvparams'):
7467
      self.op.hvparams = {}
7468
    self.op.force = getattr(self.op, "force", False)
7469
    if not (self.op.nics or self.op.disks or
7470
            self.op.hvparams or self.op.beparams):
7471
      raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
7472

    
7473
    if self.op.hvparams:
7474
      _CheckGlobalHvParams(self.op.hvparams)
7475

    
7476
    # Disk validation
7477
    disk_addremove = 0
7478
    for disk_op, disk_dict in self.op.disks:
7479
      if disk_op == constants.DDM_REMOVE:
7480
        disk_addremove += 1
7481
        continue
7482
      elif disk_op == constants.DDM_ADD:
7483
        disk_addremove += 1
7484
      else:
7485
        if not isinstance(disk_op, int):
7486
          raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
7487
        if not isinstance(disk_dict, dict):
7488
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7489
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7490

    
7491
      if disk_op == constants.DDM_ADD:
7492
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7493
        if mode not in constants.DISK_ACCESS_SET:
7494
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
7495
                                     errors.ECODE_INVAL)
7496
        size = disk_dict.get('size', None)
7497
        if size is None:
7498
          raise errors.OpPrereqError("Required disk parameter size missing",
7499
                                     errors.ECODE_INVAL)
7500
        try:
7501
          size = int(size)
7502
        except ValueError, err:
7503
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7504
                                     str(err), errors.ECODE_INVAL)
7505
        disk_dict['size'] = size
7506
      else:
7507
        # modification of disk
7508
        if 'size' in disk_dict:
7509
          raise errors.OpPrereqError("Disk size change not possible, use"
7510
                                     " grow-disk", errors.ECODE_INVAL)
7511

    
7512
    if disk_addremove > 1:
7513
      raise errors.OpPrereqError("Only one disk add or remove operation"
7514
                                 " supported at a time", errors.ECODE_INVAL)
7515

    
7516
    # NIC validation
7517
    nic_addremove = 0
7518
    for nic_op, nic_dict in self.op.nics:
7519
      if nic_op == constants.DDM_REMOVE:
7520
        nic_addremove += 1
7521
        continue
7522
      elif nic_op == constants.DDM_ADD:
7523
        nic_addremove += 1
7524
      else:
7525
        if not isinstance(nic_op, int):
7526
          raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
7527
        if not isinstance(nic_dict, dict):
7528
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7529
          raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
7530

    
7531
      # nic_dict should be a dict
7532
      nic_ip = nic_dict.get('ip', None)
7533
      if nic_ip is not None:
7534
        if nic_ip.lower() == constants.VALUE_NONE:
7535
          nic_dict['ip'] = None
7536
        else:
7537
          if not utils.IsValidIP(nic_ip):
7538
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
7539
                                       errors.ECODE_INVAL)
7540

    
7541
      nic_bridge = nic_dict.get('bridge', None)
7542
      nic_link = nic_dict.get('link', None)
7543
      if nic_bridge and nic_link:
7544
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7545
                                   " at the same time", errors.ECODE_INVAL)
7546
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7547
        nic_dict['bridge'] = None
7548
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7549
        nic_dict['link'] = None
7550

    
7551
      if nic_op == constants.DDM_ADD:
7552
        nic_mac = nic_dict.get('mac', None)
7553
        if nic_mac is None:
7554
          nic_dict['mac'] = constants.VALUE_AUTO
7555

    
7556
      if 'mac' in nic_dict:
7557
        nic_mac = nic_dict['mac']
7558
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7559
          nic_mac = utils.NormalizeAndValidateMac(nic_mac)
7560

    
7561
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7562
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7563
                                     " modifying an existing nic",
7564
                                     errors.ECODE_INVAL)
7565

    
7566
    if nic_addremove > 1:
7567
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7568
                                 " supported at a time", errors.ECODE_INVAL)
7569

    
7570
  def ExpandNames(self):
7571
    self._ExpandAndLockInstance()
7572
    self.needed_locks[locking.LEVEL_NODE] = []
7573
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7574

    
7575
  def DeclareLocks(self, level):
7576
    if level == locking.LEVEL_NODE:
7577
      self._LockInstancesNodes()
7578

    
7579
  def BuildHooksEnv(self):
7580
    """Build hooks env.
7581

7582
    This runs on the master, primary and secondaries.
7583

7584
    """
7585
    args = dict()
7586
    if constants.BE_MEMORY in self.be_new:
7587
      args['memory'] = self.be_new[constants.BE_MEMORY]
7588
    if constants.BE_VCPUS in self.be_new:
7589
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7590
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7591
    # information at all.
7592
    if self.op.nics:
7593
      args['nics'] = []
7594
      nic_override = dict(self.op.nics)
7595
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7596
      for idx, nic in enumerate(self.instance.nics):
7597
        if idx in nic_override:
7598
          this_nic_override = nic_override[idx]
7599
        else:
7600
          this_nic_override = {}
7601
        if 'ip' in this_nic_override:
7602
          ip = this_nic_override['ip']
7603
        else:
7604
          ip = nic.ip
7605
        if 'mac' in this_nic_override:
7606
          mac = this_nic_override['mac']
7607
        else:
7608
          mac = nic.mac
7609
        if idx in self.nic_pnew:
7610
          nicparams = self.nic_pnew[idx]
7611
        else:
7612
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7613
        mode = nicparams[constants.NIC_MODE]
7614
        link = nicparams[constants.NIC_LINK]
7615
        args['nics'].append((ip, mac, mode, link))
7616
      if constants.DDM_ADD in nic_override:
7617
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7618
        mac = nic_override[constants.DDM_ADD]['mac']
7619
        nicparams = self.nic_pnew[constants.DDM_ADD]
7620
        mode = nicparams[constants.NIC_MODE]
7621
        link = nicparams[constants.NIC_LINK]
7622
        args['nics'].append((ip, mac, mode, link))
7623
      elif constants.DDM_REMOVE in nic_override:
7624
        del args['nics'][-1]
7625

    
7626
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7627
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7628
    return env, nl, nl
7629

    
7630
  @staticmethod
7631
  def _GetUpdatedParams(old_params, update_dict,
7632
                        default_values, parameter_types):
7633
    """Return the new params dict for the given params.
7634

7635
    @type old_params: dict
7636
    @param old_params: old parameters
7637
    @type update_dict: dict
7638
    @param update_dict: dict containing new parameter values,
7639
                        or constants.VALUE_DEFAULT to reset the
7640
                        parameter to its default value
7641
    @type default_values: dict
7642
    @param default_values: default values for the filled parameters
7643
    @type parameter_types: dict
7644
    @param parameter_types: dict mapping target dict keys to types
7645
                            in constants.ENFORCEABLE_TYPES
7646
    @rtype: (dict, dict)
7647
    @return: (new_parameters, filled_parameters)
7648

7649
    """
7650
    params_copy = copy.deepcopy(old_params)
7651
    for key, val in update_dict.iteritems():
7652
      if val == constants.VALUE_DEFAULT:
7653
        try:
7654
          del params_copy[key]
7655
        except KeyError:
7656
          pass
7657
      else:
7658
        params_copy[key] = val
7659
    utils.ForceDictType(params_copy, parameter_types)
7660
    params_filled = objects.FillDict(default_values, params_copy)
7661
    return (params_copy, params_filled)
7662

    
7663
  def CheckPrereq(self):
7664
    """Check prerequisites.
7665

7666
    This only checks the instance list against the existing names.
7667

7668
    """
7669
    self.force = self.op.force
7670

    
7671
    # checking the new params on the primary/secondary nodes
7672

    
7673
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7674
    cluster = self.cluster = self.cfg.GetClusterInfo()
7675
    assert self.instance is not None, \
7676
      "Cannot retrieve locked instance %s" % self.op.instance_name
7677
    pnode = instance.primary_node
7678
    nodelist = list(instance.all_nodes)
7679

    
7680
    # hvparams processing
7681
    if self.op.hvparams:
7682
      i_hvdict, hv_new = self._GetUpdatedParams(
7683
                             instance.hvparams, self.op.hvparams,
7684
                             cluster.hvparams[instance.hypervisor],
7685
                             constants.HVS_PARAMETER_TYPES)
7686
      # local check
7687
      hypervisor.GetHypervisor(
7688
        instance.hypervisor).CheckParameterSyntax(hv_new)
7689
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7690
      self.hv_new = hv_new # the new actual values
7691
      self.hv_inst = i_hvdict # the new dict (without defaults)
7692
    else:
7693
      self.hv_new = self.hv_inst = {}
7694

    
7695
    # beparams processing
7696
    if self.op.beparams:
7697
      i_bedict, be_new = self._GetUpdatedParams(
7698
                             instance.beparams, self.op.beparams,
7699
                             cluster.beparams[constants.PP_DEFAULT],
7700
                             constants.BES_PARAMETER_TYPES)
7701
      self.be_new = be_new # the new actual values
7702
      self.be_inst = i_bedict # the new dict (without defaults)
7703
    else:
7704
      self.be_new = self.be_inst = {}
7705

    
7706
    self.warn = []
7707

    
7708
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7709
      mem_check_list = [pnode]
7710
      if be_new[constants.BE_AUTO_BALANCE]:
7711
        # either we changed auto_balance to yes or it was from before
7712
        mem_check_list.extend(instance.secondary_nodes)
7713
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7714
                                                  instance.hypervisor)
7715
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7716
                                         instance.hypervisor)
7717
      pninfo = nodeinfo[pnode]
7718
      msg = pninfo.fail_msg
7719
      if msg:
7720
        # Assume the primary node is unreachable and go ahead
7721
        self.warn.append("Can't get info from primary node %s: %s" %
7722
                         (pnode,  msg))
7723
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7724
        self.warn.append("Node data from primary node %s doesn't contain"
7725
                         " free memory information" % pnode)
7726
      elif instance_info.fail_msg:
7727
        self.warn.append("Can't get instance runtime information: %s" %
7728
                        instance_info.fail_msg)
7729
      else:
7730
        if instance_info.payload:
7731
          current_mem = int(instance_info.payload['memory'])
7732
        else:
7733
          # Assume instance not running
7734
          # (there is a slight race condition here, but it's not very probable,
7735
          # and we have no other way to check)
7736
          current_mem = 0
7737
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7738
                    pninfo.payload['memory_free'])
7739
        if miss_mem > 0:
7740
          raise errors.OpPrereqError("This change will prevent the instance"
7741
                                     " from starting, due to %d MB of memory"
7742
                                     " missing on its primary node" % miss_mem,
7743
                                     errors.ECODE_NORES)
7744

    
7745
      if be_new[constants.BE_AUTO_BALANCE]:
7746
        for node, nres in nodeinfo.items():
7747
          if node not in instance.secondary_nodes:
7748
            continue
7749
          msg = nres.fail_msg
7750
          if msg:
7751
            self.warn.append("Can't get info from secondary node %s: %s" %
7752
                             (node, msg))
7753
          elif not isinstance(nres.payload.get('memory_free', None), int):
7754
            self.warn.append("Secondary node %s didn't return free"
7755
                             " memory information" % node)
7756
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7757
            self.warn.append("Not enough memory to failover instance to"
7758
                             " secondary node %s" % node)
7759

    
7760
    # NIC processing
7761
    self.nic_pnew = {}
7762
    self.nic_pinst = {}
7763
    for nic_op, nic_dict in self.op.nics:
7764
      if nic_op == constants.DDM_REMOVE:
7765
        if not instance.nics:
7766
          raise errors.OpPrereqError("Instance has no NICs, cannot remove",
7767
                                     errors.ECODE_INVAL)
7768
        continue
7769
      if nic_op != constants.DDM_ADD:
7770
        # an existing nic
7771
        if not instance.nics:
7772
          raise errors.OpPrereqError("Invalid NIC index %s, instance has"
7773
                                     " no NICs" % nic_op,
7774
                                     errors.ECODE_INVAL)
7775
        if nic_op < 0 or nic_op >= len(instance.nics):
7776
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7777
                                     " are 0 to %d" %
7778
                                     (nic_op, len(instance.nics) - 1),
7779
                                     errors.ECODE_INVAL)
7780
        old_nic_params = instance.nics[nic_op].nicparams
7781
        old_nic_ip = instance.nics[nic_op].ip
7782
      else:
7783
        old_nic_params = {}
7784
        old_nic_ip = None
7785

    
7786
      update_params_dict = dict([(key, nic_dict[key])
7787
                                 for key in constants.NICS_PARAMETERS
7788
                                 if key in nic_dict])
7789

    
7790
      if 'bridge' in nic_dict:
7791
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7792

    
7793
      new_nic_params, new_filled_nic_params = \
7794
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7795
                                 cluster.nicparams[constants.PP_DEFAULT],
7796
                                 constants.NICS_PARAMETER_TYPES)
7797
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7798
      self.nic_pinst[nic_op] = new_nic_params
7799
      self.nic_pnew[nic_op] = new_filled_nic_params
7800
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7801

    
7802
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7803
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7804
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7805
        if msg:
7806
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7807
          if self.force:
7808
            self.warn.append(msg)
7809
          else:
7810
            raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
7811
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7812
        if 'ip' in nic_dict:
7813
          nic_ip = nic_dict['ip']
7814
        else:
7815
          nic_ip = old_nic_ip
7816
        if nic_ip is None:
7817
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7818
                                     ' on a routed nic', errors.ECODE_INVAL)
7819
      if 'mac' in nic_dict:
7820
        nic_mac = nic_dict['mac']
7821
        if nic_mac is None:
7822
          raise errors.OpPrereqError('Cannot set the nic mac to None',
7823
                                     errors.ECODE_INVAL)
7824
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7825
          # otherwise generate the mac
7826
          nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
7827
        else:
7828
          # or validate/reserve the current one
7829
          try:
7830
            self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
7831
          except errors.ReservationError:
7832
            raise errors.OpPrereqError("MAC address %s already in use"
7833
                                       " in cluster" % nic_mac,
7834
                                       errors.ECODE_NOTUNIQUE)
7835

    
7836
    # DISK processing
7837
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7838
      raise errors.OpPrereqError("Disk operations not supported for"
7839
                                 " diskless instances",
7840
                                 errors.ECODE_INVAL)
7841
    for disk_op, _ in self.op.disks:
7842
      if disk_op == constants.DDM_REMOVE:
7843
        if len(instance.disks) == 1:
7844
          raise errors.OpPrereqError("Cannot remove the last disk of"
7845
                                     " an instance",
7846
                                     errors.ECODE_INVAL)
7847
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7848
        ins_l = ins_l[pnode]
7849
        msg = ins_l.fail_msg
7850
        if msg:
7851
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7852
                                     (pnode, msg), errors.ECODE_ENVIRON)
7853
        if instance.name in ins_l.payload:
7854
          raise errors.OpPrereqError("Instance is running, can't remove"
7855
                                     " disks.", errors.ECODE_STATE)
7856

    
7857
      if (disk_op == constants.DDM_ADD and
7858
          len(instance.nics) >= constants.MAX_DISKS):
7859
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7860
                                   " add more" % constants.MAX_DISKS,
7861
                                   errors.ECODE_STATE)
7862
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7863
        # an existing disk
7864
        if disk_op < 0 or disk_op >= len(instance.disks):
7865
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7866
                                     " are 0 to %d" %
7867
                                     (disk_op, len(instance.disks)),
7868
                                     errors.ECODE_INVAL)
7869

    
7870
    return
7871

    
7872
  def Exec(self, feedback_fn):
7873
    """Modifies an instance.
7874

7875
    All parameters take effect only at the next restart of the instance.
7876

7877
    """
7878
    # Process here the warnings from CheckPrereq, as we don't have a
7879
    # feedback_fn there.
7880
    for warn in self.warn:
7881
      feedback_fn("WARNING: %s" % warn)
7882

    
7883
    result = []
7884
    instance = self.instance
7885
    # disk changes
7886
    for disk_op, disk_dict in self.op.disks:
7887
      if disk_op == constants.DDM_REMOVE:
7888
        # remove the last disk
7889
        device = instance.disks.pop()
7890
        device_idx = len(instance.disks)
7891
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7892
          self.cfg.SetDiskID(disk, node)
7893
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7894
          if msg:
7895
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7896
                            " continuing anyway", device_idx, node, msg)
7897
        result.append(("disk/%d" % device_idx, "remove"))
7898
      elif disk_op == constants.DDM_ADD:
7899
        # add a new disk
7900
        if instance.disk_template == constants.DT_FILE:
7901
          file_driver, file_path = instance.disks[0].logical_id
7902
          file_path = os.path.dirname(file_path)
7903
        else:
7904
          file_driver = file_path = None
7905
        disk_idx_base = len(instance.disks)
7906
        new_disk = _GenerateDiskTemplate(self,
7907
                                         instance.disk_template,
7908
                                         instance.name, instance.primary_node,
7909
                                         instance.secondary_nodes,
7910
                                         [disk_dict],
7911
                                         file_path,
7912
                                         file_driver,
7913
                                         disk_idx_base)[0]
7914
        instance.disks.append(new_disk)
7915
        info = _GetInstanceInfoText(instance)
7916

    
7917
        logging.info("Creating volume %s for instance %s",
7918
                     new_disk.iv_name, instance.name)
7919
        # Note: this needs to be kept in sync with _CreateDisks
7920
        #HARDCODE
7921
        for node in instance.all_nodes:
7922
          f_create = node == instance.primary_node
7923
          try:
7924
            _CreateBlockDev(self, node, instance, new_disk,
7925
                            f_create, info, f_create)
7926
          except errors.OpExecError, err:
7927
            self.LogWarning("Failed to create volume %s (%s) on"
7928
                            " node %s: %s",
7929
                            new_disk.iv_name, new_disk, node, err)
7930
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7931
                       (new_disk.size, new_disk.mode)))
7932
      else:
7933
        # change a given disk
7934
        instance.disks[disk_op].mode = disk_dict['mode']
7935
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7936
    # NIC changes
7937
    for nic_op, nic_dict in self.op.nics:
7938
      if nic_op == constants.DDM_REMOVE:
7939
        # remove the last nic
7940
        del instance.nics[-1]
7941
        result.append(("nic.%d" % len(instance.nics), "remove"))
7942
      elif nic_op == constants.DDM_ADD:
7943
        # mac and bridge should be set, by now
7944
        mac = nic_dict['mac']
7945
        ip = nic_dict.get('ip', None)
7946
        nicparams = self.nic_pinst[constants.DDM_ADD]
7947
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7948
        instance.nics.append(new_nic)
7949
        result.append(("nic.%d" % (len(instance.nics) - 1),
7950
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7951
                       (new_nic.mac, new_nic.ip,
7952
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7953
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7954
                       )))
7955
      else:
7956
        for key in 'mac', 'ip':
7957
          if key in nic_dict:
7958
            setattr(instance.nics[nic_op], key, nic_dict[key])
7959
        if nic_op in self.nic_pinst:
7960
          instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
7961
        for key, val in nic_dict.iteritems():
7962
          result.append(("nic.%s/%d" % (key, nic_op), val))
7963

    
7964
    # hvparams changes
7965
    if self.op.hvparams:
7966
      instance.hvparams = self.hv_inst
7967
      for key, val in self.op.hvparams.iteritems():
7968
        result.append(("hv/%s" % key, val))
7969

    
7970
    # beparams changes
7971
    if self.op.beparams:
7972
      instance.beparams = self.be_inst
7973
      for key, val in self.op.beparams.iteritems():
7974
        result.append(("be/%s" % key, val))
7975

    
7976
    self.cfg.Update(instance, feedback_fn)
7977

    
7978
    return result
7979

    
7980

    
7981
class LUQueryExports(NoHooksLU):
7982
  """Query the exports list
7983

7984
  """
7985
  _OP_REQP = ['nodes']
7986
  REQ_BGL = False
7987

    
7988
  def ExpandNames(self):
7989
    self.needed_locks = {}
7990
    self.share_locks[locking.LEVEL_NODE] = 1
7991
    if not self.op.nodes:
7992
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7993
    else:
7994
      self.needed_locks[locking.LEVEL_NODE] = \
7995
        _GetWantedNodes(self, self.op.nodes)
7996

    
7997
  def CheckPrereq(self):
7998
    """Check prerequisites.
7999

8000
    """
8001
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
8002

    
8003
  def Exec(self, feedback_fn):
8004
    """Compute the list of all the exported system images.
8005

8006
    @rtype: dict
8007
    @return: a dictionary with the structure node->(export-list)
8008
        where export-list is a list of the instances exported on
8009
        that node.
8010

8011
    """
8012
    rpcresult = self.rpc.call_export_list(self.nodes)
8013
    result = {}
8014
    for node in rpcresult:
8015
      if rpcresult[node].fail_msg:
8016
        result[node] = False
8017
      else:
8018
        result[node] = rpcresult[node].payload
8019

    
8020
    return result
8021

    
8022

    
8023
class LUExportInstance(LogicalUnit):
8024
  """Export an instance to an image in the cluster.
8025

8026
  """
8027
  HPATH = "instance-export"
8028
  HTYPE = constants.HTYPE_INSTANCE
8029
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
8030
  REQ_BGL = False
8031

    
8032
  def CheckArguments(self):
8033
    """Check the arguments.
8034

8035
    """
8036
    self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
8037
                                    constants.DEFAULT_SHUTDOWN_TIMEOUT)
8038

    
8039
  def ExpandNames(self):
8040
    self._ExpandAndLockInstance()
8041
    # FIXME: lock only instance primary and destination node
8042
    #
8043
    # Sad but true, for now we have do lock all nodes, as we don't know where
8044
    # the previous export might be, and and in this LU we search for it and
8045
    # remove it from its current node. In the future we could fix this by:
8046
    #  - making a tasklet to search (share-lock all), then create the new one,
8047
    #    then one to remove, after
8048
    #  - removing the removal operation altogether
8049
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8050

    
8051
  def DeclareLocks(self, level):
8052
    """Last minute lock declaration."""
8053
    # All nodes are locked anyway, so nothing to do here.
8054

    
8055
  def BuildHooksEnv(self):
8056
    """Build hooks env.
8057

8058
    This will run on the master, primary node and target node.
8059

8060
    """
8061
    env = {
8062
      "EXPORT_NODE": self.op.target_node,
8063
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
8064
      "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
8065
      }
8066
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8067
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
8068
          self.op.target_node]
8069
    return env, nl, nl
8070

    
8071
  def CheckPrereq(self):
8072
    """Check prerequisites.
8073

8074
    This checks that the instance and node names are valid.
8075

8076
    """
8077
    instance_name = self.op.instance_name
8078
    self.instance = self.cfg.GetInstanceInfo(instance_name)
8079
    assert self.instance is not None, \
8080
          "Cannot retrieve locked instance %s" % self.op.instance_name
8081
    _CheckNodeOnline(self, self.instance.primary_node)
8082

    
8083
    self.dst_node = self.cfg.GetNodeInfo(
8084
      self.cfg.ExpandNodeName(self.op.target_node))
8085

    
8086
    if self.dst_node is None:
8087
      # This is wrong node name, not a non-locked node
8088
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node,
8089
                                 errors.ECODE_NOENT)
8090
    _CheckNodeOnline(self, self.dst_node.name)
8091
    _CheckNodeNotDrained(self, self.dst_node.name)
8092

    
8093
    # instance disk type verification
8094
    for disk in self.instance.disks:
8095
      if disk.dev_type == constants.LD_FILE:
8096
        raise errors.OpPrereqError("Export not supported for instances with"
8097
                                   " file-based disks", errors.ECODE_INVAL)
8098

    
8099
  def Exec(self, feedback_fn):
8100
    """Export an instance to an image in the cluster.
8101

8102
    """
8103
    instance = self.instance
8104
    dst_node = self.dst_node
8105
    src_node = instance.primary_node
8106

    
8107
    if self.op.shutdown:
8108
      # shutdown the instance, but not the disks
8109
      feedback_fn("Shutting down instance %s" % instance.name)
8110
      result = self.rpc.call_instance_shutdown(src_node, instance,
8111
                                               self.shutdown_timeout)
8112
      result.Raise("Could not shutdown instance %s on"
8113
                   " node %s" % (instance.name, src_node))
8114

    
8115
    vgname = self.cfg.GetVGName()
8116

    
8117
    snap_disks = []
8118

    
8119
    # set the disks ID correctly since call_instance_start needs the
8120
    # correct drbd minor to create the symlinks
8121
    for disk in instance.disks:
8122
      self.cfg.SetDiskID(disk, src_node)
8123

    
8124
    activate_disks = (not instance.admin_up)
8125

    
8126
    if activate_disks:
8127
      # Activate the instance disks if we'exporting a stopped instance
8128
      feedback_fn("Activating disks for %s" % instance.name)
8129
      _StartInstanceDisks(self, instance, None)
8130

    
8131
    try:
8132
      # per-disk results
8133
      dresults = []
8134
      try:
8135
        for idx, disk in enumerate(instance.disks):
8136
          feedback_fn("Creating a snapshot of disk/%s on node %s" %
8137
                      (idx, src_node))
8138

    
8139
          # result.payload will be a snapshot of an lvm leaf of the one we
8140
          # passed
8141
          result = self.rpc.call_blockdev_snapshot(src_node, disk)
8142
          msg = result.fail_msg
8143
          if msg:
8144
            self.LogWarning("Could not snapshot disk/%s on node %s: %s",
8145
                            idx, src_node, msg)
8146
            snap_disks.append(False)
8147
          else:
8148
            disk_id = (vgname, result.payload)
8149
            new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
8150
                                   logical_id=disk_id, physical_id=disk_id,
8151
                                   iv_name=disk.iv_name)
8152
            snap_disks.append(new_dev)
8153

    
8154
      finally:
8155
        if self.op.shutdown and instance.admin_up:
8156
          feedback_fn("Starting instance %s" % instance.name)
8157
          result = self.rpc.call_instance_start(src_node, instance, None, None)
8158
          msg = result.fail_msg
8159
          if msg:
8160
            _ShutdownInstanceDisks(self, instance)
8161
            raise errors.OpExecError("Could not start instance: %s" % msg)
8162

    
8163
      # TODO: check for size
8164

    
8165
      cluster_name = self.cfg.GetClusterName()
8166
      for idx, dev in enumerate(snap_disks):
8167
        feedback_fn("Exporting snapshot %s from %s to %s" %
8168
                    (idx, src_node, dst_node.name))
8169
        if dev:
8170
          # FIXME: pass debug from opcode to backend
8171
          result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
8172
                                                 instance, cluster_name,
8173
                                                 idx, 0)
8174
          msg = result.fail_msg
8175
          if msg:
8176
            self.LogWarning("Could not export disk/%s from node %s to"
8177
                            " node %s: %s", idx, src_node, dst_node.name, msg)
8178
            dresults.append(False)
8179
          else:
8180
            dresults.append(True)
8181
          msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
8182
          if msg:
8183
            self.LogWarning("Could not remove snapshot for disk/%d from node"
8184
                            " %s: %s", idx, src_node, msg)
8185
        else:
8186
          dresults.append(False)
8187

    
8188
      feedback_fn("Finalizing export on %s" % dst_node.name)
8189
      result = self.rpc.call_finalize_export(dst_node.name, instance,
8190
                                             snap_disks)
8191
      fin_resu = True
8192
      msg = result.fail_msg
8193
      if msg:
8194
        self.LogWarning("Could not finalize export for instance %s"
8195
                        " on node %s: %s", instance.name, dst_node.name, msg)
8196
        fin_resu = False
8197

    
8198
    finally:
8199
      if activate_disks:
8200
        feedback_fn("Deactivating disks for %s" % instance.name)
8201
        _ShutdownInstanceDisks(self, instance)
8202

    
8203
    nodelist = self.cfg.GetNodeList()
8204
    nodelist.remove(dst_node.name)
8205

    
8206
    # on one-node clusters nodelist will be empty after the removal
8207
    # if we proceed the backup would be removed because OpQueryExports
8208
    # substitutes an empty list with the full cluster node list.
8209
    iname = instance.name
8210
    if nodelist:
8211
      feedback_fn("Removing old exports for instance %s" % iname)
8212
      exportlist = self.rpc.call_export_list(nodelist)
8213
      for node in exportlist:
8214
        if exportlist[node].fail_msg:
8215
          continue
8216
        if iname in exportlist[node].payload:
8217
          msg = self.rpc.call_export_remove(node, iname).fail_msg
8218
          if msg:
8219
            self.LogWarning("Could not remove older export for instance %s"
8220
                            " on node %s: %s", iname, node, msg)
8221
    return fin_resu, dresults
8222

    
8223

    
8224
class LURemoveExport(NoHooksLU):
8225
  """Remove exports related to the named instance.
8226

8227
  """
8228
  _OP_REQP = ["instance_name"]
8229
  REQ_BGL = False
8230

    
8231
  def ExpandNames(self):
8232
    self.needed_locks = {}
8233
    # We need all nodes to be locked in order for RemoveExport to work, but we
8234
    # don't need to lock the instance itself, as nothing will happen to it (and
8235
    # we can remove exports also for a removed instance)
8236
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8237

    
8238
  def CheckPrereq(self):
8239
    """Check prerequisites.
8240
    """
8241
    pass
8242

    
8243
  def Exec(self, feedback_fn):
8244
    """Remove any export.
8245

8246
    """
8247
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
8248
    # If the instance was not found we'll try with the name that was passed in.
8249
    # This will only work if it was an FQDN, though.
8250
    fqdn_warn = False
8251
    if not instance_name:
8252
      fqdn_warn = True
8253
      instance_name = self.op.instance_name
8254

    
8255
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
8256
    exportlist = self.rpc.call_export_list(locked_nodes)
8257
    found = False
8258
    for node in exportlist:
8259
      msg = exportlist[node].fail_msg
8260
      if msg:
8261
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
8262
        continue
8263
      if instance_name in exportlist[node].payload:
8264
        found = True
8265
        result = self.rpc.call_export_remove(node, instance_name)
8266
        msg = result.fail_msg
8267
        if msg:
8268
          logging.error("Could not remove export for instance %s"
8269
                        " on node %s: %s", instance_name, node, msg)
8270

    
8271
    if fqdn_warn and not found:
8272
      feedback_fn("Export not found. If trying to remove an export belonging"
8273
                  " to a deleted instance please use its Fully Qualified"
8274
                  " Domain Name.")
8275

    
8276

    
8277
class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
8278
  """Generic tags LU.
8279

8280
  This is an abstract class which is the parent of all the other tags LUs.
8281

8282
  """
8283

    
8284
  def ExpandNames(self):
8285
    self.needed_locks = {}
8286
    if self.op.kind == constants.TAG_NODE:
8287
      name = self.cfg.ExpandNodeName(self.op.name)
8288
      if name is None:
8289
        raise errors.OpPrereqError("Invalid node name (%s)" %
8290
                                   (self.op.name,), errors.ECODE_NOENT)
8291
      self.op.name = name
8292
      self.needed_locks[locking.LEVEL_NODE] = name
8293
    elif self.op.kind == constants.TAG_INSTANCE:
8294
      name = self.cfg.ExpandInstanceName(self.op.name)
8295
      if name is None:
8296
        raise errors.OpPrereqError("Invalid instance name (%s)" %
8297
                                   (self.op.name,), errors.ECODE_NOENT)
8298
      self.op.name = name
8299
      self.needed_locks[locking.LEVEL_INSTANCE] = name
8300

    
8301
  def CheckPrereq(self):
8302
    """Check prerequisites.
8303

8304
    """
8305
    if self.op.kind == constants.TAG_CLUSTER:
8306
      self.target = self.cfg.GetClusterInfo()
8307
    elif self.op.kind == constants.TAG_NODE:
8308
      self.target = self.cfg.GetNodeInfo(self.op.name)
8309
    elif self.op.kind == constants.TAG_INSTANCE:
8310
      self.target = self.cfg.GetInstanceInfo(self.op.name)
8311
    else:
8312
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
8313
                                 str(self.op.kind), errors.ECODE_INVAL)
8314

    
8315

    
8316
class LUGetTags(TagsLU):
8317
  """Returns the tags of a given object.
8318

8319
  """
8320
  _OP_REQP = ["kind", "name"]
8321
  REQ_BGL = False
8322

    
8323
  def Exec(self, feedback_fn):
8324
    """Returns the tag list.
8325

8326
    """
8327
    return list(self.target.GetTags())
8328

    
8329

    
8330
class LUSearchTags(NoHooksLU):
8331
  """Searches the tags for a given pattern.
8332

8333
  """
8334
  _OP_REQP = ["pattern"]
8335
  REQ_BGL = False
8336

    
8337
  def ExpandNames(self):
8338
    self.needed_locks = {}
8339

    
8340
  def CheckPrereq(self):
8341
    """Check prerequisites.
8342

8343
    This checks the pattern passed for validity by compiling it.
8344

8345
    """
8346
    try:
8347
      self.re = re.compile(self.op.pattern)
8348
    except re.error, err:
8349
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
8350
                                 (self.op.pattern, err), errors.ECODE_INVAL)
8351

    
8352
  def Exec(self, feedback_fn):
8353
    """Returns the tag list.
8354

8355
    """
8356
    cfg = self.cfg
8357
    tgts = [("/cluster", cfg.GetClusterInfo())]
8358
    ilist = cfg.GetAllInstancesInfo().values()
8359
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
8360
    nlist = cfg.GetAllNodesInfo().values()
8361
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
8362
    results = []
8363
    for path, target in tgts:
8364
      for tag in target.GetTags():
8365
        if self.re.search(tag):
8366
          results.append((path, tag))
8367
    return results
8368

    
8369

    
8370
class LUAddTags(TagsLU):
8371
  """Sets a tag on a given object.
8372

8373
  """
8374
  _OP_REQP = ["kind", "name", "tags"]
8375
  REQ_BGL = False
8376

    
8377
  def CheckPrereq(self):
8378
    """Check prerequisites.
8379

8380
    This checks the type and length of the tag name and value.
8381

8382
    """
8383
    TagsLU.CheckPrereq(self)
8384
    for tag in self.op.tags:
8385
      objects.TaggableObject.ValidateTag(tag)
8386

    
8387
  def Exec(self, feedback_fn):
8388
    """Sets the tag.
8389

8390
    """
8391
    try:
8392
      for tag in self.op.tags:
8393
        self.target.AddTag(tag)
8394
    except errors.TagError, err:
8395
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
8396
    self.cfg.Update(self.target, feedback_fn)
8397

    
8398

    
8399
class LUDelTags(TagsLU):
8400
  """Delete a list of tags from a given object.
8401

8402
  """
8403
  _OP_REQP = ["kind", "name", "tags"]
8404
  REQ_BGL = False
8405

    
8406
  def CheckPrereq(self):
8407
    """Check prerequisites.
8408

8409
    This checks that we have the given tag.
8410

8411
    """
8412
    TagsLU.CheckPrereq(self)
8413
    for tag in self.op.tags:
8414
      objects.TaggableObject.ValidateTag(tag)
8415
    del_tags = frozenset(self.op.tags)
8416
    cur_tags = self.target.GetTags()
8417
    if not del_tags <= cur_tags:
8418
      diff_tags = del_tags - cur_tags
8419
      diff_names = ["'%s'" % tag for tag in diff_tags]
8420
      diff_names.sort()
8421
      raise errors.OpPrereqError("Tag(s) %s not found" %
8422
                                 (",".join(diff_names)), errors.ECODE_NOENT)
8423

    
8424
  def Exec(self, feedback_fn):
8425
    """Remove the tag from the object.
8426

8427
    """
8428
    for tag in self.op.tags:
8429
      self.target.RemoveTag(tag)
8430
    self.cfg.Update(self.target, feedback_fn)
8431

    
8432

    
8433
class LUTestDelay(NoHooksLU):
8434
  """Sleep for a specified amount of time.
8435

8436
  This LU sleeps on the master and/or nodes for a specified amount of
8437
  time.
8438

8439
  """
8440
  _OP_REQP = ["duration", "on_master", "on_nodes"]
8441
  REQ_BGL = False
8442

    
8443
  def ExpandNames(self):
8444
    """Expand names and set required locks.
8445

8446
    This expands the node list, if any.
8447

8448
    """
8449
    self.needed_locks = {}
8450
    if self.op.on_nodes:
8451
      # _GetWantedNodes can be used here, but is not always appropriate to use
8452
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8453
      # more information.
8454
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8455
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8456

    
8457
  def CheckPrereq(self):
8458
    """Check prerequisites.
8459

8460
    """
8461

    
8462
  def Exec(self, feedback_fn):
8463
    """Do the actual sleep.
8464

8465
    """
8466
    if self.op.on_master:
8467
      if not utils.TestDelay(self.op.duration):
8468
        raise errors.OpExecError("Error during master delay test")
8469
    if self.op.on_nodes:
8470
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8471
      for node, node_result in result.items():
8472
        node_result.Raise("Failure during rpc call to node %s" % node)
8473

    
8474

    
8475
class IAllocator(object):
8476
  """IAllocator framework.
8477

8478
  An IAllocator instance has three sets of attributes:
8479
    - cfg that is needed to query the cluster
8480
    - input data (all members of the _KEYS class attribute are required)
8481
    - four buffer attributes (in|out_data|text), that represent the
8482
      input (to the external script) in text and data structure format,
8483
      and the output from it, again in two formats
8484
    - the result variables from the script (success, info, nodes) for
8485
      easy usage
8486

8487
  """
8488
  # pylint: disable-msg=R0902
8489
  # lots of instance attributes
8490
  _ALLO_KEYS = [
8491
    "mem_size", "disks", "disk_template",
8492
    "os", "tags", "nics", "vcpus", "hypervisor",
8493
    ]
8494
  _RELO_KEYS = [
8495
    "relocate_from",
8496
    ]
8497

    
8498
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8499
    self.cfg = cfg
8500
    self.rpc = rpc
8501
    # init buffer variables
8502
    self.in_text = self.out_text = self.in_data = self.out_data = None
8503
    # init all input fields so that pylint is happy
8504
    self.mode = mode
8505
    self.name = name
8506
    self.mem_size = self.disks = self.disk_template = None
8507
    self.os = self.tags = self.nics = self.vcpus = None
8508
    self.hypervisor = None
8509
    self.relocate_from = None
8510
    # computed fields
8511
    self.required_nodes = None
8512
    # init result fields
8513
    self.success = self.info = self.nodes = None
8514
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8515
      keyset = self._ALLO_KEYS
8516
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8517
      keyset = self._RELO_KEYS
8518
    else:
8519
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8520
                                   " IAllocator" % self.mode)
8521
    for key in kwargs:
8522
      if key not in keyset:
8523
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8524
                                     " IAllocator" % key)
8525
      setattr(self, key, kwargs[key])
8526
    for key in keyset:
8527
      if key not in kwargs:
8528
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8529
                                     " IAllocator" % key)
8530
    self._BuildInputData()
8531

    
8532
  def _ComputeClusterData(self):
8533
    """Compute the generic allocator input data.
8534

8535
    This is the data that is independent of the actual operation.
8536

8537
    """
8538
    cfg = self.cfg
8539
    cluster_info = cfg.GetClusterInfo()
8540
    # cluster data
8541
    data = {
8542
      "version": constants.IALLOCATOR_VERSION,
8543
      "cluster_name": cfg.GetClusterName(),
8544
      "cluster_tags": list(cluster_info.GetTags()),
8545
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8546
      # we don't have job IDs
8547
      }
8548
    iinfo = cfg.GetAllInstancesInfo().values()
8549
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8550

    
8551
    # node data
8552
    node_results = {}
8553
    node_list = cfg.GetNodeList()
8554

    
8555
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8556
      hypervisor_name = self.hypervisor
8557
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8558
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8559

    
8560
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8561
                                        hypervisor_name)
8562
    node_iinfo = \
8563
      self.rpc.call_all_instances_info(node_list,
8564
                                       cluster_info.enabled_hypervisors)
8565
    for nname, nresult in node_data.items():
8566
      # first fill in static (config-based) values
8567
      ninfo = cfg.GetNodeInfo(nname)
8568
      pnr = {
8569
        "tags": list(ninfo.GetTags()),
8570
        "primary_ip": ninfo.primary_ip,
8571
        "secondary_ip": ninfo.secondary_ip,
8572
        "offline": ninfo.offline,
8573
        "drained": ninfo.drained,
8574
        "master_candidate": ninfo.master_candidate,
8575
        }
8576

    
8577
      if not (ninfo.offline or ninfo.drained):
8578
        nresult.Raise("Can't get data for node %s" % nname)
8579
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8580
                                nname)
8581
        remote_info = nresult.payload
8582

    
8583
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8584
                     'vg_size', 'vg_free', 'cpu_total']:
8585
          if attr not in remote_info:
8586
            raise errors.OpExecError("Node '%s' didn't return attribute"
8587
                                     " '%s'" % (nname, attr))
8588
          if not isinstance(remote_info[attr], int):
8589
            raise errors.OpExecError("Node '%s' returned invalid value"
8590
                                     " for '%s': %s" %
8591
                                     (nname, attr, remote_info[attr]))
8592
        # compute memory used by primary instances
8593
        i_p_mem = i_p_up_mem = 0
8594
        for iinfo, beinfo in i_list:
8595
          if iinfo.primary_node == nname:
8596
            i_p_mem += beinfo[constants.BE_MEMORY]
8597
            if iinfo.name not in node_iinfo[nname].payload:
8598
              i_used_mem = 0
8599
            else:
8600
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8601
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8602
            remote_info['memory_free'] -= max(0, i_mem_diff)
8603

    
8604
            if iinfo.admin_up:
8605
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8606

    
8607
        # compute memory used by instances
8608
        pnr_dyn = {
8609
          "total_memory": remote_info['memory_total'],
8610
          "reserved_memory": remote_info['memory_dom0'],
8611
          "free_memory": remote_info['memory_free'],
8612
          "total_disk": remote_info['vg_size'],
8613
          "free_disk": remote_info['vg_free'],
8614
          "total_cpus": remote_info['cpu_total'],
8615
          "i_pri_memory": i_p_mem,
8616
          "i_pri_up_memory": i_p_up_mem,
8617
          }
8618
        pnr.update(pnr_dyn)
8619

    
8620
      node_results[nname] = pnr
8621
    data["nodes"] = node_results
8622

    
8623
    # instance data
8624
    instance_data = {}
8625
    for iinfo, beinfo in i_list:
8626
      nic_data = []
8627
      for nic in iinfo.nics:
8628
        filled_params = objects.FillDict(
8629
            cluster_info.nicparams[constants.PP_DEFAULT],
8630
            nic.nicparams)
8631
        nic_dict = {"mac": nic.mac,
8632
                    "ip": nic.ip,
8633
                    "mode": filled_params[constants.NIC_MODE],
8634
                    "link": filled_params[constants.NIC_LINK],
8635
                   }
8636
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8637
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8638
        nic_data.append(nic_dict)
8639
      pir = {
8640
        "tags": list(iinfo.GetTags()),
8641
        "admin_up": iinfo.admin_up,
8642
        "vcpus": beinfo[constants.BE_VCPUS],
8643
        "memory": beinfo[constants.BE_MEMORY],
8644
        "os": iinfo.os,
8645
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8646
        "nics": nic_data,
8647
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8648
        "disk_template": iinfo.disk_template,
8649
        "hypervisor": iinfo.hypervisor,
8650
        }
8651
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8652
                                                 pir["disks"])
8653
      instance_data[iinfo.name] = pir
8654

    
8655
    data["instances"] = instance_data
8656

    
8657
    self.in_data = data
8658

    
8659
  def _AddNewInstance(self):
8660
    """Add new instance data to allocator structure.
8661

8662
    This in combination with _AllocatorGetClusterData will create the
8663
    correct structure needed as input for the allocator.
8664

8665
    The checks for the completeness of the opcode must have already been
8666
    done.
8667

8668
    """
8669
    data = self.in_data
8670

    
8671
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8672

    
8673
    if self.disk_template in constants.DTS_NET_MIRROR:
8674
      self.required_nodes = 2
8675
    else:
8676
      self.required_nodes = 1
8677
    request = {
8678
      "type": "allocate",
8679
      "name": self.name,
8680
      "disk_template": self.disk_template,
8681
      "tags": self.tags,
8682
      "os": self.os,
8683
      "vcpus": self.vcpus,
8684
      "memory": self.mem_size,
8685
      "disks": self.disks,
8686
      "disk_space_total": disk_space,
8687
      "nics": self.nics,
8688
      "required_nodes": self.required_nodes,
8689
      }
8690
    data["request"] = request
8691

    
8692
  def _AddRelocateInstance(self):
8693
    """Add relocate instance data to allocator structure.
8694

8695
    This in combination with _IAllocatorGetClusterData will create the
8696
    correct structure needed as input for the allocator.
8697

8698
    The checks for the completeness of the opcode must have already been
8699
    done.
8700

8701
    """
8702
    instance = self.cfg.GetInstanceInfo(self.name)
8703
    if instance is None:
8704
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8705
                                   " IAllocator" % self.name)
8706

    
8707
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8708
      raise errors.OpPrereqError("Can't relocate non-mirrored instances",
8709
                                 errors.ECODE_INVAL)
8710

    
8711
    if len(instance.secondary_nodes) != 1:
8712
      raise errors.OpPrereqError("Instance has not exactly one secondary node",
8713
                                 errors.ECODE_STATE)
8714

    
8715
    self.required_nodes = 1
8716
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8717
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8718

    
8719
    request = {
8720
      "type": "relocate",
8721
      "name": self.name,
8722
      "disk_space_total": disk_space,
8723
      "required_nodes": self.required_nodes,
8724
      "relocate_from": self.relocate_from,
8725
      }
8726
    self.in_data["request"] = request
8727

    
8728
  def _BuildInputData(self):
8729
    """Build input data structures.
8730

8731
    """
8732
    self._ComputeClusterData()
8733

    
8734
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8735
      self._AddNewInstance()
8736
    else:
8737
      self._AddRelocateInstance()
8738

    
8739
    self.in_text = serializer.Dump(self.in_data)
8740

    
8741
  def Run(self, name, validate=True, call_fn=None):
8742
    """Run an instance allocator and return the results.
8743

8744
    """
8745
    if call_fn is None:
8746
      call_fn = self.rpc.call_iallocator_runner
8747

    
8748
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8749
    result.Raise("Failure while running the iallocator script")
8750

    
8751
    self.out_text = result.payload
8752
    if validate:
8753
      self._ValidateResult()
8754

    
8755
  def _ValidateResult(self):
8756
    """Process the allocator results.
8757

8758
    This will process and if successful save the result in
8759
    self.out_data and the other parameters.
8760

8761
    """
8762
    try:
8763
      rdict = serializer.Load(self.out_text)
8764
    except Exception, err:
8765
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8766

    
8767
    if not isinstance(rdict, dict):
8768
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8769

    
8770
    for key in "success", "info", "nodes":
8771
      if key not in rdict:
8772
        raise errors.OpExecError("Can't parse iallocator results:"
8773
                                 " missing key '%s'" % key)
8774
      setattr(self, key, rdict[key])
8775

    
8776
    if not isinstance(rdict["nodes"], list):
8777
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8778
                               " is not a list")
8779
    self.out_data = rdict
8780

    
8781

    
8782
class LUTestAllocator(NoHooksLU):
8783
  """Run allocator tests.
8784

8785
  This LU runs the allocator tests
8786

8787
  """
8788
  _OP_REQP = ["direction", "mode", "name"]
8789

    
8790
  def CheckPrereq(self):
8791
    """Check prerequisites.
8792

8793
    This checks the opcode parameters depending on the director and mode test.
8794

8795
    """
8796
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8797
      for attr in ["name", "mem_size", "disks", "disk_template",
8798
                   "os", "tags", "nics", "vcpus"]:
8799
        if not hasattr(self.op, attr):
8800
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8801
                                     attr, errors.ECODE_INVAL)
8802
      iname = self.cfg.ExpandInstanceName(self.op.name)
8803
      if iname is not None:
8804
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8805
                                   iname, errors.ECODE_EXISTS)
8806
      if not isinstance(self.op.nics, list):
8807
        raise errors.OpPrereqError("Invalid parameter 'nics'",
8808
                                   errors.ECODE_INVAL)
8809
      for row in self.op.nics:
8810
        if (not isinstance(row, dict) or
8811
            "mac" not in row or
8812
            "ip" not in row or
8813
            "bridge" not in row):
8814
          raise errors.OpPrereqError("Invalid contents of the 'nics'"
8815
                                     " parameter", errors.ECODE_INVAL)
8816
      if not isinstance(self.op.disks, list):
8817
        raise errors.OpPrereqError("Invalid parameter 'disks'",
8818
                                   errors.ECODE_INVAL)
8819
      for row in self.op.disks:
8820
        if (not isinstance(row, dict) or
8821
            "size" not in row or
8822
            not isinstance(row["size"], int) or
8823
            "mode" not in row or
8824
            row["mode"] not in ['r', 'w']):
8825
          raise errors.OpPrereqError("Invalid contents of the 'disks'"
8826
                                     " parameter", errors.ECODE_INVAL)
8827
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8828
        self.op.hypervisor = self.cfg.GetHypervisorType()
8829
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8830
      if not hasattr(self.op, "name"):
8831
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
8832
                                   errors.ECODE_INVAL)
8833
      fname = self.cfg.ExpandInstanceName(self.op.name)
8834
      if fname is None:
8835
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8836
                                   self.op.name, errors.ECODE_NOENT)
8837
      self.op.name = fname
8838
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8839
    else:
8840
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8841
                                 self.op.mode, errors.ECODE_INVAL)
8842

    
8843
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8844
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8845
        raise errors.OpPrereqError("Missing allocator name",
8846
                                   errors.ECODE_INVAL)
8847
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8848
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8849
                                 self.op.direction, errors.ECODE_INVAL)
8850

    
8851
  def Exec(self, feedback_fn):
8852
    """Run the allocator test.
8853

8854
    """
8855
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8856
      ial = IAllocator(self.cfg, self.rpc,
8857
                       mode=self.op.mode,
8858
                       name=self.op.name,
8859
                       mem_size=self.op.mem_size,
8860
                       disks=self.op.disks,
8861
                       disk_template=self.op.disk_template,
8862
                       os=self.op.os,
8863
                       tags=self.op.tags,
8864
                       nics=self.op.nics,
8865
                       vcpus=self.op.vcpus,
8866
                       hypervisor=self.op.hypervisor,
8867
                       )
8868
    else:
8869
      ial = IAllocator(self.cfg, self.rpc,
8870
                       mode=self.op.mode,
8871
                       name=self.op.name,
8872
                       relocate_from=list(self.relocate_from),
8873
                       )
8874

    
8875
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8876
      result = ial.in_text
8877
    else:
8878
      ial.Run(self.op.allocator, validate=False)
8879
      result = ial.out_text
8880
    return result