Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 20777413

History | View | Annotate | Download (250.8 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq
51
    - implement Exec
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overriden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict(((i, 0) for i in locking.LEVELS))
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    # support for dry-run
93
    self.dry_run_result = None
94

    
95
    for attr_name in self._OP_REQP:
96
      attr_val = getattr(op, attr_name, None)
97
      if attr_val is None:
98
        raise errors.OpPrereqError("Required parameter '%s' missing" %
99
                                   attr_name)
100
    self.CheckArguments()
101

    
102
  def __GetSSH(self):
103
    """Returns the SshRunner object
104

105
    """
106
    if not self.__ssh:
107
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
108
    return self.__ssh
109

    
110
  ssh = property(fget=__GetSSH)
111

    
112
  def CheckArguments(self):
113
    """Check syntactic validity for the opcode arguments.
114

115
    This method is for doing a simple syntactic check and ensure
116
    validity of opcode parameters, without any cluster-related
117
    checks. While the same can be accomplished in ExpandNames and/or
118
    CheckPrereq, doing these separate is better because:
119

120
      - ExpandNames is left as as purely a lock-related function
121
      - CheckPrereq is run after we have aquired locks (and possible
122
        waited for them)
123

124
    The function is allowed to change the self.op attribute so that
125
    later methods can no longer worry about missing parameters.
126

127
    """
128
    pass
129

    
130
  def ExpandNames(self):
131
    """Expand names for this LU.
132

133
    This method is called before starting to execute the opcode, and it should
134
    update all the parameters of the opcode to their canonical form (e.g. a
135
    short node name must be fully expanded after this method has successfully
136
    completed). This way locking, hooks, logging, ecc. can work correctly.
137

138
    LUs which implement this method must also populate the self.needed_locks
139
    member, as a dict with lock levels as keys, and a list of needed lock names
140
    as values. Rules:
141

142
      - use an empty dict if you don't need any lock
143
      - if you don't need any lock at a particular level omit that level
144
      - don't put anything for the BGL level
145
      - if you want all locks at a level use locking.ALL_SET as a value
146

147
    If you need to share locks (rather than acquire them exclusively) at one
148
    level you can modify self.share_locks, setting a true value (usually 1) for
149
    that level. By default locks are not shared.
150

151
    Examples::
152

153
      # Acquire all nodes and one instance
154
      self.needed_locks = {
155
        locking.LEVEL_NODE: locking.ALL_SET,
156
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
157
      }
158
      # Acquire just two nodes
159
      self.needed_locks = {
160
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
161
      }
162
      # Acquire no locks
163
      self.needed_locks = {} # No, you can't leave it to the default value None
164

165
    """
166
    # The implementation of this method is mandatory only if the new LU is
167
    # concurrent, so that old LUs don't need to be changed all at the same
168
    # time.
169
    if self.REQ_BGL:
170
      self.needed_locks = {} # Exclusive LUs don't need locks.
171
    else:
172
      raise NotImplementedError
173

    
174
  def DeclareLocks(self, level):
175
    """Declare LU locking needs for a level
176

177
    While most LUs can just declare their locking needs at ExpandNames time,
178
    sometimes there's the need to calculate some locks after having acquired
179
    the ones before. This function is called just before acquiring locks at a
180
    particular level, but after acquiring the ones at lower levels, and permits
181
    such calculations. It can be used to modify self.needed_locks, and by
182
    default it does nothing.
183

184
    This function is only called if you have something already set in
185
    self.needed_locks for the level.
186

187
    @param level: Locking level which is going to be locked
188
    @type level: member of ganeti.locking.LEVELS
189

190
    """
191

    
192
  def CheckPrereq(self):
193
    """Check prerequisites for this LU.
194

195
    This method should check that the prerequisites for the execution
196
    of this LU are fulfilled. It can do internode communication, but
197
    it should be idempotent - no cluster or system changes are
198
    allowed.
199

200
    The method should raise errors.OpPrereqError in case something is
201
    not fulfilled. Its return value is ignored.
202

203
    This method should also update all the parameters of the opcode to
204
    their canonical form if it hasn't been done by ExpandNames before.
205

206
    """
207
    raise NotImplementedError
208

    
209
  def Exec(self, feedback_fn):
210
    """Execute the LU.
211

212
    This method should implement the actual work. It should raise
213
    errors.OpExecError for failures that are somewhat dealt with in
214
    code, or expected.
215

216
    """
217
    raise NotImplementedError
218

    
219
  def BuildHooksEnv(self):
220
    """Build hooks environment for this LU.
221

222
    This method should return a three-node tuple consisting of: a dict
223
    containing the environment that will be used for running the
224
    specific hook for this LU, a list of node names on which the hook
225
    should run before the execution, and a list of node names on which
226
    the hook should run after the execution.
227

228
    The keys of the dict must not have 'GANETI_' prefixed as this will
229
    be handled in the hooks runner. Also note additional keys will be
230
    added by the hooks runner. If the LU doesn't define any
231
    environment, an empty dict (and not None) should be returned.
232

233
    No nodes should be returned as an empty list (and not None).
234

235
    Note that if the HPATH for a LU class is None, this function will
236
    not be called.
237

238
    """
239
    raise NotImplementedError
240

    
241
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
242
    """Notify the LU about the results of its hooks.
243

244
    This method is called every time a hooks phase is executed, and notifies
245
    the Logical Unit about the hooks' result. The LU can then use it to alter
246
    its result based on the hooks.  By default the method does nothing and the
247
    previous result is passed back unchanged but any LU can define it if it
248
    wants to use the local cluster hook-scripts somehow.
249

250
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
251
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
252
    @param hook_results: the results of the multi-node hooks rpc call
253
    @param feedback_fn: function used send feedback back to the caller
254
    @param lu_result: the previous Exec result this LU had, or None
255
        in the PRE phase
256
    @return: the new Exec result, based on the previous result
257
        and hook results
258

259
    """
260
    return lu_result
261

    
262
  def _ExpandAndLockInstance(self):
263
    """Helper function to expand and lock an instance.
264

265
    Many LUs that work on an instance take its name in self.op.instance_name
266
    and need to expand it and then declare the expanded name for locking. This
267
    function does it, and then updates self.op.instance_name to the expanded
268
    name. It also initializes needed_locks as a dict, if this hasn't been done
269
    before.
270

271
    """
272
    if self.needed_locks is None:
273
      self.needed_locks = {}
274
    else:
275
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
276
        "_ExpandAndLockInstance called with instance-level locks set"
277
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
278
    if expanded_name is None:
279
      raise errors.OpPrereqError("Instance '%s' not known" %
280
                                  self.op.instance_name)
281
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
282
    self.op.instance_name = expanded_name
283

    
284
  def _LockInstancesNodes(self, primary_only=False):
285
    """Helper function to declare instances' nodes for locking.
286

287
    This function should be called after locking one or more instances to lock
288
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
289
    with all primary or secondary nodes for instances already locked and
290
    present in self.needed_locks[locking.LEVEL_INSTANCE].
291

292
    It should be called from DeclareLocks, and for safety only works if
293
    self.recalculate_locks[locking.LEVEL_NODE] is set.
294

295
    In the future it may grow parameters to just lock some instance's nodes, or
296
    to just lock primaries or secondary nodes, if needed.
297

298
    If should be called in DeclareLocks in a way similar to::
299

300
      if level == locking.LEVEL_NODE:
301
        self._LockInstancesNodes()
302

303
    @type primary_only: boolean
304
    @param primary_only: only lock primary nodes of locked instances
305

306
    """
307
    assert locking.LEVEL_NODE in self.recalculate_locks, \
308
      "_LockInstancesNodes helper function called with no nodes to recalculate"
309

    
310
    # TODO: check if we're really been called with the instance locks held
311

    
312
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
313
    # future we might want to have different behaviors depending on the value
314
    # of self.recalculate_locks[locking.LEVEL_NODE]
315
    wanted_nodes = []
316
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
317
      instance = self.context.cfg.GetInstanceInfo(instance_name)
318
      wanted_nodes.append(instance.primary_node)
319
      if not primary_only:
320
        wanted_nodes.extend(instance.secondary_nodes)
321

    
322
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
323
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
324
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
325
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
326

    
327
    del self.recalculate_locks[locking.LEVEL_NODE]
328

    
329

    
330
class NoHooksLU(LogicalUnit):
331
  """Simple LU which runs no hooks.
332

333
  This LU is intended as a parent for other LogicalUnits which will
334
  run no hooks, in order to reduce duplicate code.
335

336
  """
337
  HPATH = None
338
  HTYPE = None
339

    
340

    
341
def _GetWantedNodes(lu, nodes):
342
  """Returns list of checked and expanded node names.
343

344
  @type lu: L{LogicalUnit}
345
  @param lu: the logical unit on whose behalf we execute
346
  @type nodes: list
347
  @param nodes: list of node names or None for all nodes
348
  @rtype: list
349
  @return: the list of nodes, sorted
350
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
351

352
  """
353
  if not isinstance(nodes, list):
354
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
355

    
356
  if not nodes:
357
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
358
      " non-empty list of nodes whose name is to be expanded.")
359

    
360
  wanted = []
361
  for name in nodes:
362
    node = lu.cfg.ExpandNodeName(name)
363
    if node is None:
364
      raise errors.OpPrereqError("No such node name '%s'" % name)
365
    wanted.append(node)
366

    
367
  return utils.NiceSort(wanted)
368

    
369

    
370
def _GetWantedInstances(lu, instances):
371
  """Returns list of checked and expanded instance names.
372

373
  @type lu: L{LogicalUnit}
374
  @param lu: the logical unit on whose behalf we execute
375
  @type instances: list
376
  @param instances: list of instance names or None for all instances
377
  @rtype: list
378
  @return: the list of instances, sorted
379
  @raise errors.OpPrereqError: if the instances parameter is wrong type
380
  @raise errors.OpPrereqError: if any of the passed instances is not found
381

382
  """
383
  if not isinstance(instances, list):
384
    raise errors.OpPrereqError("Invalid argument type 'instances'")
385

    
386
  if instances:
387
    wanted = []
388

    
389
    for name in instances:
390
      instance = lu.cfg.ExpandInstanceName(name)
391
      if instance is None:
392
        raise errors.OpPrereqError("No such instance name '%s'" % name)
393
      wanted.append(instance)
394

    
395
  else:
396
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
397
  return wanted
398

    
399

    
400
def _CheckOutputFields(static, dynamic, selected):
401
  """Checks whether all selected fields are valid.
402

403
  @type static: L{utils.FieldSet}
404
  @param static: static fields set
405
  @type dynamic: L{utils.FieldSet}
406
  @param dynamic: dynamic fields set
407

408
  """
409
  f = utils.FieldSet()
410
  f.Extend(static)
411
  f.Extend(dynamic)
412

    
413
  delta = f.NonMatching(selected)
414
  if delta:
415
    raise errors.OpPrereqError("Unknown output fields selected: %s"
416
                               % ",".join(delta))
417

    
418

    
419
def _CheckBooleanOpField(op, name):
420
  """Validates boolean opcode parameters.
421

422
  This will ensure that an opcode parameter is either a boolean value,
423
  or None (but that it always exists).
424

425
  """
426
  val = getattr(op, name, None)
427
  if not (val is None or isinstance(val, bool)):
428
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
429
                               (name, str(val)))
430
  setattr(op, name, val)
431

    
432

    
433
def _CheckNodeOnline(lu, node):
434
  """Ensure that a given node is online.
435

436
  @param lu: the LU on behalf of which we make the check
437
  @param node: the node to check
438
  @raise errors.OpPrereqError: if the node is offline
439

440
  """
441
  if lu.cfg.GetNodeInfo(node).offline:
442
    raise errors.OpPrereqError("Can't use offline node %s" % node)
443

    
444

    
445
def _CheckNodeNotDrained(lu, node):
446
  """Ensure that a given node is not drained.
447

448
  @param lu: the LU on behalf of which we make the check
449
  @param node: the node to check
450
  @raise errors.OpPrereqError: if the node is drained
451

452
  """
453
  if lu.cfg.GetNodeInfo(node).drained:
454
    raise errors.OpPrereqError("Can't use drained node %s" % node)
455

    
456

    
457
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
458
                          memory, vcpus, nics, disk_template, disks,
459
                          bep, hvp, hypervisor):
460
  """Builds instance related env variables for hooks
461

462
  This builds the hook environment from individual variables.
463

464
  @type name: string
465
  @param name: the name of the instance
466
  @type primary_node: string
467
  @param primary_node: the name of the instance's primary node
468
  @type secondary_nodes: list
469
  @param secondary_nodes: list of secondary nodes as strings
470
  @type os_type: string
471
  @param os_type: the name of the instance's OS
472
  @type status: boolean
473
  @param status: the should_run status of the instance
474
  @type memory: string
475
  @param memory: the memory size of the instance
476
  @type vcpus: string
477
  @param vcpus: the count of VCPUs the instance has
478
  @type nics: list
479
  @param nics: list of tuples (ip, mac, mode, link) representing
480
      the NICs the instance has
481
  @type disk_template: string
482
  @param disk_template: the distk template of the instance
483
  @type disks: list
484
  @param disks: the list of (size, mode) pairs
485
  @type bep: dict
486
  @param bep: the backend parameters for the instance
487
  @type hvp: dict
488
  @param hvp: the hypervisor parameters for the instance
489
  @type hypervisor: string
490
  @param hypervisor: the hypervisor for the instance
491
  @rtype: dict
492
  @return: the hook environment for this instance
493

494
  """
495
  if status:
496
    str_status = "up"
497
  else:
498
    str_status = "down"
499
  env = {
500
    "OP_TARGET": name,
501
    "INSTANCE_NAME": name,
502
    "INSTANCE_PRIMARY": primary_node,
503
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
504
    "INSTANCE_OS_TYPE": os_type,
505
    "INSTANCE_STATUS": str_status,
506
    "INSTANCE_MEMORY": memory,
507
    "INSTANCE_VCPUS": vcpus,
508
    "INSTANCE_DISK_TEMPLATE": disk_template,
509
    "INSTANCE_HYPERVISOR": hypervisor,
510
  }
511

    
512
  if nics:
513
    nic_count = len(nics)
514
    for idx, (ip, mac, mode, link) in enumerate(nics):
515
      if ip is None:
516
        ip = ""
517
      env["INSTANCE_NIC%d_IP" % idx] = ip
518
      env["INSTANCE_NIC%d_MAC" % idx] = mac
519
      env["INSTANCE_NIC%d_MODE" % idx] = mode
520
      env["INSTANCE_NIC%d_LINK" % idx] = link
521
      if mode == constants.NIC_MODE_BRIDGED:
522
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
523
  else:
524
    nic_count = 0
525

    
526
  env["INSTANCE_NIC_COUNT"] = nic_count
527

    
528
  if disks:
529
    disk_count = len(disks)
530
    for idx, (size, mode) in enumerate(disks):
531
      env["INSTANCE_DISK%d_SIZE" % idx] = size
532
      env["INSTANCE_DISK%d_MODE" % idx] = mode
533
  else:
534
    disk_count = 0
535

    
536
  env["INSTANCE_DISK_COUNT"] = disk_count
537

    
538
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
539
    for key, value in source.items():
540
      env["INSTANCE_%s_%s" % (kind, key)] = value
541

    
542
  return env
543

    
544
def _NICListToTuple(lu, nics):
545
  """Build a list of nic information tuples.
546

547
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
548
  value in LUQueryInstanceData.
549

550
  @type lu:  L{LogicalUnit}
551
  @param lu: the logical unit on whose behalf we execute
552
  @type nics: list of L{objects.NIC}
553
  @param nics: list of nics to convert to hooks tuples
554

555
  """
556
  hooks_nics = []
557
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
558
  for nic in nics:
559
    ip = nic.ip
560
    mac = nic.mac
561
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
562
    mode = filled_params[constants.NIC_MODE]
563
    link = filled_params[constants.NIC_LINK]
564
    hooks_nics.append((ip, mac, mode, link))
565
  return hooks_nics
566

    
567
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
568
  """Builds instance related env variables for hooks from an object.
569

570
  @type lu: L{LogicalUnit}
571
  @param lu: the logical unit on whose behalf we execute
572
  @type instance: L{objects.Instance}
573
  @param instance: the instance for which we should build the
574
      environment
575
  @type override: dict
576
  @param override: dictionary with key/values that will override
577
      our values
578
  @rtype: dict
579
  @return: the hook environment dictionary
580

581
  """
582
  cluster = lu.cfg.GetClusterInfo()
583
  bep = cluster.FillBE(instance)
584
  hvp = cluster.FillHV(instance)
585
  args = {
586
    'name': instance.name,
587
    'primary_node': instance.primary_node,
588
    'secondary_nodes': instance.secondary_nodes,
589
    'os_type': instance.os,
590
    'status': instance.admin_up,
591
    'memory': bep[constants.BE_MEMORY],
592
    'vcpus': bep[constants.BE_VCPUS],
593
    'nics': _NICListToTuple(lu, instance.nics),
594
    'disk_template': instance.disk_template,
595
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
596
    'bep': bep,
597
    'hvp': hvp,
598
    'hypervisor': instance.hypervisor,
599
  }
600
  if override:
601
    args.update(override)
602
  return _BuildInstanceHookEnv(**args)
603

    
604

    
605
def _AdjustCandidatePool(lu):
606
  """Adjust the candidate pool after node operations.
607

608
  """
609
  mod_list = lu.cfg.MaintainCandidatePool()
610
  if mod_list:
611
    lu.LogInfo("Promoted nodes to master candidate role: %s",
612
               ", ".join(node.name for node in mod_list))
613
    for name in mod_list:
614
      lu.context.ReaddNode(name)
615
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
616
  if mc_now > mc_max:
617
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
618
               (mc_now, mc_max))
619

    
620

    
621
def _CheckNicsBridgesExist(lu, target_nics, target_node,
622
                               profile=constants.PP_DEFAULT):
623
  """Check that the brigdes needed by a list of nics exist.
624

625
  """
626
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
627
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
628
                for nic in target_nics]
629
  brlist = [params[constants.NIC_LINK] for params in paramslist
630
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
631
  if brlist:
632
    result = lu.rpc.call_bridges_exist(target_node, brlist)
633
    result.Raise("Error checking bridges on destination node '%s'" %
634
                 target_node, prereq=True)
635

    
636

    
637
def _CheckInstanceBridgesExist(lu, instance, node=None):
638
  """Check that the brigdes needed by an instance exist.
639

640
  """
641
  if node is None:
642
    node = instance.primary_node
643
  _CheckNicsBridgesExist(lu, instance.nics, node)
644

    
645

    
646
class LUDestroyCluster(NoHooksLU):
647
  """Logical unit for destroying the cluster.
648

649
  """
650
  _OP_REQP = []
651

    
652
  def CheckPrereq(self):
653
    """Check prerequisites.
654

655
    This checks whether the cluster is empty.
656

657
    Any errors are signalled by raising errors.OpPrereqError.
658

659
    """
660
    master = self.cfg.GetMasterNode()
661

    
662
    nodelist = self.cfg.GetNodeList()
663
    if len(nodelist) != 1 or nodelist[0] != master:
664
      raise errors.OpPrereqError("There are still %d node(s) in"
665
                                 " this cluster." % (len(nodelist) - 1))
666
    instancelist = self.cfg.GetInstanceList()
667
    if instancelist:
668
      raise errors.OpPrereqError("There are still %d instance(s) in"
669
                                 " this cluster." % len(instancelist))
670

    
671
  def Exec(self, feedback_fn):
672
    """Destroys the cluster.
673

674
    """
675
    master = self.cfg.GetMasterNode()
676
    result = self.rpc.call_node_stop_master(master, False)
677
    result.Raise("Could not disable the master role")
678
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
679
    utils.CreateBackup(priv_key)
680
    utils.CreateBackup(pub_key)
681
    return master
682

    
683

    
684
class LUVerifyCluster(LogicalUnit):
685
  """Verifies the cluster status.
686

687
  """
688
  HPATH = "cluster-verify"
689
  HTYPE = constants.HTYPE_CLUSTER
690
  _OP_REQP = ["skip_checks"]
691
  REQ_BGL = False
692

    
693
  def ExpandNames(self):
694
    self.needed_locks = {
695
      locking.LEVEL_NODE: locking.ALL_SET,
696
      locking.LEVEL_INSTANCE: locking.ALL_SET,
697
    }
698
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
699

    
700
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
701
                  node_result, feedback_fn, master_files,
702
                  drbd_map, vg_name):
703
    """Run multiple tests against a node.
704

705
    Test list:
706

707
      - compares ganeti version
708
      - checks vg existance and size > 20G
709
      - checks config file checksum
710
      - checks ssh to other nodes
711

712
    @type nodeinfo: L{objects.Node}
713
    @param nodeinfo: the node to check
714
    @param file_list: required list of files
715
    @param local_cksum: dictionary of local files and their checksums
716
    @param node_result: the results from the node
717
    @param feedback_fn: function used to accumulate results
718
    @param master_files: list of files that only masters should have
719
    @param drbd_map: the useddrbd minors for this node, in
720
        form of minor: (instance, must_exist) which correspond to instances
721
        and their running status
722
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
723

724
    """
725
    node = nodeinfo.name
726

    
727
    # main result, node_result should be a non-empty dict
728
    if not node_result or not isinstance(node_result, dict):
729
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
730
      return True
731

    
732
    # compares ganeti version
733
    local_version = constants.PROTOCOL_VERSION
734
    remote_version = node_result.get('version', None)
735
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
736
            len(remote_version) == 2):
737
      feedback_fn("  - ERROR: connection to %s failed" % (node))
738
      return True
739

    
740
    if local_version != remote_version[0]:
741
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
742
                  " node %s %s" % (local_version, node, remote_version[0]))
743
      return True
744

    
745
    # node seems compatible, we can actually try to look into its results
746

    
747
    bad = False
748

    
749
    # full package version
750
    if constants.RELEASE_VERSION != remote_version[1]:
751
      feedback_fn("  - WARNING: software version mismatch: master %s,"
752
                  " node %s %s" %
753
                  (constants.RELEASE_VERSION, node, remote_version[1]))
754

    
755
    # checks vg existence and size > 20G
756
    if vg_name is not None:
757
      vglist = node_result.get(constants.NV_VGLIST, None)
758
      if not vglist:
759
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
760
                        (node,))
761
        bad = True
762
      else:
763
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
764
                                              constants.MIN_VG_SIZE)
765
        if vgstatus:
766
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
767
          bad = True
768

    
769
    # checks config file checksum
770

    
771
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
772
    if not isinstance(remote_cksum, dict):
773
      bad = True
774
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
775
    else:
776
      for file_name in file_list:
777
        node_is_mc = nodeinfo.master_candidate
778
        must_have_file = file_name not in master_files
779
        if file_name not in remote_cksum:
780
          if node_is_mc or must_have_file:
781
            bad = True
782
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
783
        elif remote_cksum[file_name] != local_cksum[file_name]:
784
          if node_is_mc or must_have_file:
785
            bad = True
786
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
787
          else:
788
            # not candidate and this is not a must-have file
789
            bad = True
790
            feedback_fn("  - ERROR: non master-candidate has old/wrong file"
791
                        " '%s'" % file_name)
792
        else:
793
          # all good, except non-master/non-must have combination
794
          if not node_is_mc and not must_have_file:
795
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
796
                        " candidates" % file_name)
797

    
798
    # checks ssh to any
799

    
800
    if constants.NV_NODELIST not in node_result:
801
      bad = True
802
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
803
    else:
804
      if node_result[constants.NV_NODELIST]:
805
        bad = True
806
        for node in node_result[constants.NV_NODELIST]:
807
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
808
                          (node, node_result[constants.NV_NODELIST][node]))
809

    
810
    if constants.NV_NODENETTEST not in node_result:
811
      bad = True
812
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
813
    else:
814
      if node_result[constants.NV_NODENETTEST]:
815
        bad = True
816
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
817
        for node in nlist:
818
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
819
                          (node, node_result[constants.NV_NODENETTEST][node]))
820

    
821
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
822
    if isinstance(hyp_result, dict):
823
      for hv_name, hv_result in hyp_result.iteritems():
824
        if hv_result is not None:
825
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
826
                      (hv_name, hv_result))
827

    
828
    # check used drbd list
829
    if vg_name is not None:
830
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
831
      if not isinstance(used_minors, (tuple, list)):
832
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
833
                    str(used_minors))
834
      else:
835
        for minor, (iname, must_exist) in drbd_map.items():
836
          if minor not in used_minors and must_exist:
837
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
838
                        " not active" % (minor, iname))
839
            bad = True
840
        for minor in used_minors:
841
          if minor not in drbd_map:
842
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
843
                        minor)
844
            bad = True
845

    
846
    return bad
847

    
848
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
849
                      node_instance, feedback_fn, n_offline):
850
    """Verify an instance.
851

852
    This function checks to see if the required block devices are
853
    available on the instance's node.
854

855
    """
856
    bad = False
857

    
858
    node_current = instanceconfig.primary_node
859

    
860
    node_vol_should = {}
861
    instanceconfig.MapLVsByNode(node_vol_should)
862

    
863
    for node in node_vol_should:
864
      if node in n_offline:
865
        # ignore missing volumes on offline nodes
866
        continue
867
      for volume in node_vol_should[node]:
868
        if node not in node_vol_is or volume not in node_vol_is[node]:
869
          feedback_fn("  - ERROR: volume %s missing on node %s" %
870
                          (volume, node))
871
          bad = True
872

    
873
    if instanceconfig.admin_up:
874
      if ((node_current not in node_instance or
875
          not instance in node_instance[node_current]) and
876
          node_current not in n_offline):
877
        feedback_fn("  - ERROR: instance %s not running on node %s" %
878
                        (instance, node_current))
879
        bad = True
880

    
881
    for node in node_instance:
882
      if (not node == node_current):
883
        if instance in node_instance[node]:
884
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
885
                          (instance, node))
886
          bad = True
887

    
888
    return bad
889

    
890
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
891
    """Verify if there are any unknown volumes in the cluster.
892

893
    The .os, .swap and backup volumes are ignored. All other volumes are
894
    reported as unknown.
895

896
    """
897
    bad = False
898

    
899
    for node in node_vol_is:
900
      for volume in node_vol_is[node]:
901
        if node not in node_vol_should or volume not in node_vol_should[node]:
902
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
903
                      (volume, node))
904
          bad = True
905
    return bad
906

    
907
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
908
    """Verify the list of running instances.
909

910
    This checks what instances are running but unknown to the cluster.
911

912
    """
913
    bad = False
914
    for node in node_instance:
915
      for runninginstance in node_instance[node]:
916
        if runninginstance not in instancelist:
917
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
918
                          (runninginstance, node))
919
          bad = True
920
    return bad
921

    
922
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
923
    """Verify N+1 Memory Resilience.
924

925
    Check that if one single node dies we can still start all the instances it
926
    was primary for.
927

928
    """
929
    bad = False
930

    
931
    for node, nodeinfo in node_info.iteritems():
932
      # This code checks that every node which is now listed as secondary has
933
      # enough memory to host all instances it is supposed to should a single
934
      # other node in the cluster fail.
935
      # FIXME: not ready for failover to an arbitrary node
936
      # FIXME: does not support file-backed instances
937
      # WARNING: we currently take into account down instances as well as up
938
      # ones, considering that even if they're down someone might want to start
939
      # them even in the event of a node failure.
940
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
941
        needed_mem = 0
942
        for instance in instances:
943
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
944
          if bep[constants.BE_AUTO_BALANCE]:
945
            needed_mem += bep[constants.BE_MEMORY]
946
        if nodeinfo['mfree'] < needed_mem:
947
          feedback_fn("  - ERROR: not enough memory on node %s to accomodate"
948
                      " failovers should node %s fail" % (node, prinode))
949
          bad = True
950
    return bad
951

    
952
  def CheckPrereq(self):
953
    """Check prerequisites.
954

955
    Transform the list of checks we're going to skip into a set and check that
956
    all its members are valid.
957

958
    """
959
    self.skip_set = frozenset(self.op.skip_checks)
960
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
961
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
962

    
963
  def BuildHooksEnv(self):
964
    """Build hooks env.
965

966
    Cluster-Verify hooks just rone in the post phase and their failure makes
967
    the output be logged in the verify output and the verification to fail.
968

969
    """
970
    all_nodes = self.cfg.GetNodeList()
971
    env = {
972
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
973
      }
974
    for node in self.cfg.GetAllNodesInfo().values():
975
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
976

    
977
    return env, [], all_nodes
978

    
979
  def Exec(self, feedback_fn):
980
    """Verify integrity of cluster, performing various test on nodes.
981

982
    """
983
    bad = False
984
    feedback_fn("* Verifying global settings")
985
    for msg in self.cfg.VerifyConfig():
986
      feedback_fn("  - ERROR: %s" % msg)
987

    
988
    vg_name = self.cfg.GetVGName()
989
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
990
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
991
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
992
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
993
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
994
                        for iname in instancelist)
995
    i_non_redundant = [] # Non redundant instances
996
    i_non_a_balanced = [] # Non auto-balanced instances
997
    n_offline = [] # List of offline nodes
998
    n_drained = [] # List of nodes being drained
999
    node_volume = {}
1000
    node_instance = {}
1001
    node_info = {}
1002
    instance_cfg = {}
1003

    
1004
    # FIXME: verify OS list
1005
    # do local checksums
1006
    master_files = [constants.CLUSTER_CONF_FILE]
1007

    
1008
    file_names = ssconf.SimpleStore().GetFileList()
1009
    file_names.append(constants.SSL_CERT_FILE)
1010
    file_names.append(constants.RAPI_CERT_FILE)
1011
    file_names.extend(master_files)
1012

    
1013
    local_checksums = utils.FingerprintFiles(file_names)
1014

    
1015
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1016
    node_verify_param = {
1017
      constants.NV_FILELIST: file_names,
1018
      constants.NV_NODELIST: [node.name for node in nodeinfo
1019
                              if not node.offline],
1020
      constants.NV_HYPERVISOR: hypervisors,
1021
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1022
                                  node.secondary_ip) for node in nodeinfo
1023
                                 if not node.offline],
1024
      constants.NV_INSTANCELIST: hypervisors,
1025
      constants.NV_VERSION: None,
1026
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1027
      }
1028
    if vg_name is not None:
1029
      node_verify_param[constants.NV_VGLIST] = None
1030
      node_verify_param[constants.NV_LVLIST] = vg_name
1031
      node_verify_param[constants.NV_DRBDLIST] = None
1032
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1033
                                           self.cfg.GetClusterName())
1034

    
1035
    cluster = self.cfg.GetClusterInfo()
1036
    master_node = self.cfg.GetMasterNode()
1037
    all_drbd_map = self.cfg.ComputeDRBDMap()
1038

    
1039
    for node_i in nodeinfo:
1040
      node = node_i.name
1041

    
1042
      if node_i.offline:
1043
        feedback_fn("* Skipping offline node %s" % (node,))
1044
        n_offline.append(node)
1045
        continue
1046

    
1047
      if node == master_node:
1048
        ntype = "master"
1049
      elif node_i.master_candidate:
1050
        ntype = "master candidate"
1051
      elif node_i.drained:
1052
        ntype = "drained"
1053
        n_drained.append(node)
1054
      else:
1055
        ntype = "regular"
1056
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1057

    
1058
      msg = all_nvinfo[node].fail_msg
1059
      if msg:
1060
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1061
        bad = True
1062
        continue
1063

    
1064
      nresult = all_nvinfo[node].payload
1065
      node_drbd = {}
1066
      for minor, instance in all_drbd_map[node].items():
1067
        if instance not in instanceinfo:
1068
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1069
                      instance)
1070
          # ghost instance should not be running, but otherwise we
1071
          # don't give double warnings (both ghost instance and
1072
          # unallocated minor in use)
1073
          node_drbd[minor] = (instance, False)
1074
        else:
1075
          instance = instanceinfo[instance]
1076
          node_drbd[minor] = (instance.name, instance.admin_up)
1077
      result = self._VerifyNode(node_i, file_names, local_checksums,
1078
                                nresult, feedback_fn, master_files,
1079
                                node_drbd, vg_name)
1080
      bad = bad or result
1081

    
1082
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1083
      if vg_name is None:
1084
        node_volume[node] = {}
1085
      elif isinstance(lvdata, basestring):
1086
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1087
                    (node, utils.SafeEncode(lvdata)))
1088
        bad = True
1089
        node_volume[node] = {}
1090
      elif not isinstance(lvdata, dict):
1091
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1092
        bad = True
1093
        continue
1094
      else:
1095
        node_volume[node] = lvdata
1096

    
1097
      # node_instance
1098
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1099
      if not isinstance(idata, list):
1100
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1101
                    (node,))
1102
        bad = True
1103
        continue
1104

    
1105
      node_instance[node] = idata
1106

    
1107
      # node_info
1108
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1109
      if not isinstance(nodeinfo, dict):
1110
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1111
        bad = True
1112
        continue
1113

    
1114
      try:
1115
        node_info[node] = {
1116
          "mfree": int(nodeinfo['memory_free']),
1117
          "pinst": [],
1118
          "sinst": [],
1119
          # dictionary holding all instances this node is secondary for,
1120
          # grouped by their primary node. Each key is a cluster node, and each
1121
          # value is a list of instances which have the key as primary and the
1122
          # current node as secondary.  this is handy to calculate N+1 memory
1123
          # availability if you can only failover from a primary to its
1124
          # secondary.
1125
          "sinst-by-pnode": {},
1126
        }
1127
        # FIXME: devise a free space model for file based instances as well
1128
        if vg_name is not None:
1129
          if (constants.NV_VGLIST not in nresult or
1130
              vg_name not in nresult[constants.NV_VGLIST]):
1131
            feedback_fn("  - ERROR: node %s didn't return data for the"
1132
                        " volume group '%s' - it is either missing or broken" %
1133
                        (node, vg_name))
1134
            bad = True
1135
            continue
1136
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1137
      except (ValueError, KeyError):
1138
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1139
                    " from node %s" % (node,))
1140
        bad = True
1141
        continue
1142

    
1143
    node_vol_should = {}
1144

    
1145
    for instance in instancelist:
1146
      feedback_fn("* Verifying instance %s" % instance)
1147
      inst_config = instanceinfo[instance]
1148
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1149
                                     node_instance, feedback_fn, n_offline)
1150
      bad = bad or result
1151
      inst_nodes_offline = []
1152

    
1153
      inst_config.MapLVsByNode(node_vol_should)
1154

    
1155
      instance_cfg[instance] = inst_config
1156

    
1157
      pnode = inst_config.primary_node
1158
      if pnode in node_info:
1159
        node_info[pnode]['pinst'].append(instance)
1160
      elif pnode not in n_offline:
1161
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1162
                    " %s failed" % (instance, pnode))
1163
        bad = True
1164

    
1165
      if pnode in n_offline:
1166
        inst_nodes_offline.append(pnode)
1167

    
1168
      # If the instance is non-redundant we cannot survive losing its primary
1169
      # node, so we are not N+1 compliant. On the other hand we have no disk
1170
      # templates with more than one secondary so that situation is not well
1171
      # supported either.
1172
      # FIXME: does not support file-backed instances
1173
      if len(inst_config.secondary_nodes) == 0:
1174
        i_non_redundant.append(instance)
1175
      elif len(inst_config.secondary_nodes) > 1:
1176
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1177
                    % instance)
1178

    
1179
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1180
        i_non_a_balanced.append(instance)
1181

    
1182
      for snode in inst_config.secondary_nodes:
1183
        if snode in node_info:
1184
          node_info[snode]['sinst'].append(instance)
1185
          if pnode not in node_info[snode]['sinst-by-pnode']:
1186
            node_info[snode]['sinst-by-pnode'][pnode] = []
1187
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1188
        elif snode not in n_offline:
1189
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1190
                      " %s failed" % (instance, snode))
1191
          bad = True
1192
        if snode in n_offline:
1193
          inst_nodes_offline.append(snode)
1194

    
1195
      if inst_nodes_offline:
1196
        # warn that the instance lives on offline nodes, and set bad=True
1197
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1198
                    ", ".join(inst_nodes_offline))
1199
        bad = True
1200

    
1201
    feedback_fn("* Verifying orphan volumes")
1202
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1203
                                       feedback_fn)
1204
    bad = bad or result
1205

    
1206
    feedback_fn("* Verifying remaining instances")
1207
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1208
                                         feedback_fn)
1209
    bad = bad or result
1210

    
1211
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1212
      feedback_fn("* Verifying N+1 Memory redundancy")
1213
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1214
      bad = bad or result
1215

    
1216
    feedback_fn("* Other Notes")
1217
    if i_non_redundant:
1218
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1219
                  % len(i_non_redundant))
1220

    
1221
    if i_non_a_balanced:
1222
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1223
                  % len(i_non_a_balanced))
1224

    
1225
    if n_offline:
1226
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1227

    
1228
    if n_drained:
1229
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1230

    
1231
    return not bad
1232

    
1233
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1234
    """Analize the post-hooks' result
1235

1236
    This method analyses the hook result, handles it, and sends some
1237
    nicely-formatted feedback back to the user.
1238

1239
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1240
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1241
    @param hooks_results: the results of the multi-node hooks rpc call
1242
    @param feedback_fn: function used send feedback back to the caller
1243
    @param lu_result: previous Exec result
1244
    @return: the new Exec result, based on the previous result
1245
        and hook results
1246

1247
    """
1248
    # We only really run POST phase hooks, and are only interested in
1249
    # their results
1250
    if phase == constants.HOOKS_PHASE_POST:
1251
      # Used to change hooks' output to proper indentation
1252
      indent_re = re.compile('^', re.M)
1253
      feedback_fn("* Hooks Results")
1254
      if not hooks_results:
1255
        feedback_fn("  - ERROR: general communication failure")
1256
        lu_result = 1
1257
      else:
1258
        for node_name in hooks_results:
1259
          show_node_header = True
1260
          res = hooks_results[node_name]
1261
          msg = res.fail_msg
1262
          if msg:
1263
            if res.offline:
1264
              # no need to warn or set fail return value
1265
              continue
1266
            feedback_fn("    Communication failure in hooks execution: %s" %
1267
                        msg)
1268
            lu_result = 1
1269
            continue
1270
          for script, hkr, output in res.payload:
1271
            if hkr == constants.HKR_FAIL:
1272
              # The node header is only shown once, if there are
1273
              # failing hooks on that node
1274
              if show_node_header:
1275
                feedback_fn("  Node %s:" % node_name)
1276
                show_node_header = False
1277
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1278
              output = indent_re.sub('      ', output)
1279
              feedback_fn("%s" % output)
1280
              lu_result = 1
1281

    
1282
      return lu_result
1283

    
1284

    
1285
class LUVerifyDisks(NoHooksLU):
1286
  """Verifies the cluster disks status.
1287

1288
  """
1289
  _OP_REQP = []
1290
  REQ_BGL = False
1291

    
1292
  def ExpandNames(self):
1293
    self.needed_locks = {
1294
      locking.LEVEL_NODE: locking.ALL_SET,
1295
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1296
    }
1297
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1298

    
1299
  def CheckPrereq(self):
1300
    """Check prerequisites.
1301

1302
    This has no prerequisites.
1303

1304
    """
1305
    pass
1306

    
1307
  def Exec(self, feedback_fn):
1308
    """Verify integrity of cluster disks.
1309

1310
    @rtype: tuple of three items
1311
    @return: a tuple of (dict of node-to-node_error, list of instances
1312
        which need activate-disks, dict of instance: (node, volume) for
1313
        missing volumes
1314

1315
    """
1316
    result = res_nodes, res_instances, res_missing = {}, [], {}
1317

    
1318
    vg_name = self.cfg.GetVGName()
1319
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1320
    instances = [self.cfg.GetInstanceInfo(name)
1321
                 for name in self.cfg.GetInstanceList()]
1322

    
1323
    nv_dict = {}
1324
    for inst in instances:
1325
      inst_lvs = {}
1326
      if (not inst.admin_up or
1327
          inst.disk_template not in constants.DTS_NET_MIRROR):
1328
        continue
1329
      inst.MapLVsByNode(inst_lvs)
1330
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1331
      for node, vol_list in inst_lvs.iteritems():
1332
        for vol in vol_list:
1333
          nv_dict[(node, vol)] = inst
1334

    
1335
    if not nv_dict:
1336
      return result
1337

    
1338
    node_lvs = self.rpc.call_volume_list(nodes, vg_name)
1339

    
1340
    to_act = set()
1341
    for node in nodes:
1342
      # node_volume
1343
      node_res = node_lvs[node]
1344
      if node_res.offline:
1345
        continue
1346
      msg = node_res.fail_msg
1347
      if msg:
1348
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1349
        res_nodes[node] = msg
1350
        continue
1351

    
1352
      lvs = node_res.payload
1353
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1354
        inst = nv_dict.pop((node, lv_name), None)
1355
        if (not lv_online and inst is not None
1356
            and inst.name not in res_instances):
1357
          res_instances.append(inst.name)
1358

    
1359
    # any leftover items in nv_dict are missing LVs, let's arrange the
1360
    # data better
1361
    for key, inst in nv_dict.iteritems():
1362
      if inst.name not in res_missing:
1363
        res_missing[inst.name] = []
1364
      res_missing[inst.name].append(key)
1365

    
1366
    return result
1367

    
1368

    
1369
class LURenameCluster(LogicalUnit):
1370
  """Rename the cluster.
1371

1372
  """
1373
  HPATH = "cluster-rename"
1374
  HTYPE = constants.HTYPE_CLUSTER
1375
  _OP_REQP = ["name"]
1376

    
1377
  def BuildHooksEnv(self):
1378
    """Build hooks env.
1379

1380
    """
1381
    env = {
1382
      "OP_TARGET": self.cfg.GetClusterName(),
1383
      "NEW_NAME": self.op.name,
1384
      }
1385
    mn = self.cfg.GetMasterNode()
1386
    return env, [mn], [mn]
1387

    
1388
  def CheckPrereq(self):
1389
    """Verify that the passed name is a valid one.
1390

1391
    """
1392
    hostname = utils.HostInfo(self.op.name)
1393

    
1394
    new_name = hostname.name
1395
    self.ip = new_ip = hostname.ip
1396
    old_name = self.cfg.GetClusterName()
1397
    old_ip = self.cfg.GetMasterIP()
1398
    if new_name == old_name and new_ip == old_ip:
1399
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1400
                                 " cluster has changed")
1401
    if new_ip != old_ip:
1402
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1403
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1404
                                   " reachable on the network. Aborting." %
1405
                                   new_ip)
1406

    
1407
    self.op.name = new_name
1408

    
1409
  def Exec(self, feedback_fn):
1410
    """Rename the cluster.
1411

1412
    """
1413
    clustername = self.op.name
1414
    ip = self.ip
1415

    
1416
    # shutdown the master IP
1417
    master = self.cfg.GetMasterNode()
1418
    result = self.rpc.call_node_stop_master(master, False)
1419
    result.Raise("Could not disable the master role")
1420

    
1421
    try:
1422
      cluster = self.cfg.GetClusterInfo()
1423
      cluster.cluster_name = clustername
1424
      cluster.master_ip = ip
1425
      self.cfg.Update(cluster)
1426

    
1427
      # update the known hosts file
1428
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1429
      node_list = self.cfg.GetNodeList()
1430
      try:
1431
        node_list.remove(master)
1432
      except ValueError:
1433
        pass
1434
      result = self.rpc.call_upload_file(node_list,
1435
                                         constants.SSH_KNOWN_HOSTS_FILE)
1436
      for to_node, to_result in result.iteritems():
1437
        msg = to_result.fail_msg
1438
        if msg:
1439
          msg = ("Copy of file %s to node %s failed: %s" %
1440
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1441
          self.proc.LogWarning(msg)
1442

    
1443
    finally:
1444
      result = self.rpc.call_node_start_master(master, False)
1445
      msg = result.fail_msg
1446
      if msg:
1447
        self.LogWarning("Could not re-enable the master role on"
1448
                        " the master, please restart manually: %s", msg)
1449

    
1450

    
1451
def _RecursiveCheckIfLVMBased(disk):
1452
  """Check if the given disk or its children are lvm-based.
1453

1454
  @type disk: L{objects.Disk}
1455
  @param disk: the disk to check
1456
  @rtype: booleean
1457
  @return: boolean indicating whether a LD_LV dev_type was found or not
1458

1459
  """
1460
  if disk.children:
1461
    for chdisk in disk.children:
1462
      if _RecursiveCheckIfLVMBased(chdisk):
1463
        return True
1464
  return disk.dev_type == constants.LD_LV
1465

    
1466

    
1467
class LUSetClusterParams(LogicalUnit):
1468
  """Change the parameters of the cluster.
1469

1470
  """
1471
  HPATH = "cluster-modify"
1472
  HTYPE = constants.HTYPE_CLUSTER
1473
  _OP_REQP = []
1474
  REQ_BGL = False
1475

    
1476
  def CheckArguments(self):
1477
    """Check parameters
1478

1479
    """
1480
    if not hasattr(self.op, "candidate_pool_size"):
1481
      self.op.candidate_pool_size = None
1482
    if self.op.candidate_pool_size is not None:
1483
      try:
1484
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1485
      except (ValueError, TypeError), err:
1486
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1487
                                   str(err))
1488
      if self.op.candidate_pool_size < 1:
1489
        raise errors.OpPrereqError("At least one master candidate needed")
1490

    
1491
  def ExpandNames(self):
1492
    # FIXME: in the future maybe other cluster params won't require checking on
1493
    # all nodes to be modified.
1494
    self.needed_locks = {
1495
      locking.LEVEL_NODE: locking.ALL_SET,
1496
    }
1497
    self.share_locks[locking.LEVEL_NODE] = 1
1498

    
1499
  def BuildHooksEnv(self):
1500
    """Build hooks env.
1501

1502
    """
1503
    env = {
1504
      "OP_TARGET": self.cfg.GetClusterName(),
1505
      "NEW_VG_NAME": self.op.vg_name,
1506
      }
1507
    mn = self.cfg.GetMasterNode()
1508
    return env, [mn], [mn]
1509

    
1510
  def CheckPrereq(self):
1511
    """Check prerequisites.
1512

1513
    This checks whether the given params don't conflict and
1514
    if the given volume group is valid.
1515

1516
    """
1517
    if self.op.vg_name is not None and not self.op.vg_name:
1518
      instances = self.cfg.GetAllInstancesInfo().values()
1519
      for inst in instances:
1520
        for disk in inst.disks:
1521
          if _RecursiveCheckIfLVMBased(disk):
1522
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1523
                                       " lvm-based instances exist")
1524

    
1525
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1526

    
1527
    # if vg_name not None, checks given volume group on all nodes
1528
    if self.op.vg_name:
1529
      vglist = self.rpc.call_vg_list(node_list)
1530
      for node in node_list:
1531
        msg = vglist[node].fail_msg
1532
        if msg:
1533
          # ignoring down node
1534
          self.LogWarning("Error while gathering data on node %s"
1535
                          " (ignoring node): %s", node, msg)
1536
          continue
1537
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1538
                                              self.op.vg_name,
1539
                                              constants.MIN_VG_SIZE)
1540
        if vgstatus:
1541
          raise errors.OpPrereqError("Error on node '%s': %s" %
1542
                                     (node, vgstatus))
1543

    
1544
    self.cluster = cluster = self.cfg.GetClusterInfo()
1545
    # validate params changes
1546
    if self.op.beparams:
1547
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1548
      self.new_beparams = objects.FillDict(
1549
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1550

    
1551
    if self.op.nicparams:
1552
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1553
      self.new_nicparams = objects.FillDict(
1554
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1555
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1556

    
1557
    # hypervisor list/parameters
1558
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1559
    if self.op.hvparams:
1560
      if not isinstance(self.op.hvparams, dict):
1561
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1562
      for hv_name, hv_dict in self.op.hvparams.items():
1563
        if hv_name not in self.new_hvparams:
1564
          self.new_hvparams[hv_name] = hv_dict
1565
        else:
1566
          self.new_hvparams[hv_name].update(hv_dict)
1567

    
1568
    if self.op.enabled_hypervisors is not None:
1569
      self.hv_list = self.op.enabled_hypervisors
1570
    else:
1571
      self.hv_list = cluster.enabled_hypervisors
1572

    
1573
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1574
      # either the enabled list has changed, or the parameters have, validate
1575
      for hv_name, hv_params in self.new_hvparams.items():
1576
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1577
            (self.op.enabled_hypervisors and
1578
             hv_name in self.op.enabled_hypervisors)):
1579
          # either this is a new hypervisor, or its parameters have changed
1580
          hv_class = hypervisor.GetHypervisor(hv_name)
1581
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1582
          hv_class.CheckParameterSyntax(hv_params)
1583
          _CheckHVParams(self, node_list, hv_name, hv_params)
1584

    
1585
  def Exec(self, feedback_fn):
1586
    """Change the parameters of the cluster.
1587

1588
    """
1589
    if self.op.vg_name is not None:
1590
      new_volume = self.op.vg_name
1591
      if not new_volume:
1592
        new_volume = None
1593
      if new_volume != self.cfg.GetVGName():
1594
        self.cfg.SetVGName(new_volume)
1595
      else:
1596
        feedback_fn("Cluster LVM configuration already in desired"
1597
                    " state, not changing")
1598
    if self.op.hvparams:
1599
      self.cluster.hvparams = self.new_hvparams
1600
    if self.op.enabled_hypervisors is not None:
1601
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1602
    if self.op.beparams:
1603
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1604
    if self.op.nicparams:
1605
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1606

    
1607
    if self.op.candidate_pool_size is not None:
1608
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1609

    
1610
    self.cfg.Update(self.cluster)
1611

    
1612
    # we want to update nodes after the cluster so that if any errors
1613
    # happen, we have recorded and saved the cluster info
1614
    if self.op.candidate_pool_size is not None:
1615
      _AdjustCandidatePool(self)
1616

    
1617

    
1618
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1619
  """Distribute additional files which are part of the cluster configuration.
1620

1621
  ConfigWriter takes care of distributing the config and ssconf files, but
1622
  there are more files which should be distributed to all nodes. This function
1623
  makes sure those are copied.
1624

1625
  @param lu: calling logical unit
1626
  @param additional_nodes: list of nodes not in the config to distribute to
1627

1628
  """
1629
  # 1. Gather target nodes
1630
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1631
  dist_nodes = lu.cfg.GetNodeList()
1632
  if additional_nodes is not None:
1633
    dist_nodes.extend(additional_nodes)
1634
  if myself.name in dist_nodes:
1635
    dist_nodes.remove(myself.name)
1636
  # 2. Gather files to distribute
1637
  dist_files = set([constants.ETC_HOSTS,
1638
                    constants.SSH_KNOWN_HOSTS_FILE,
1639
                    constants.RAPI_CERT_FILE,
1640
                    constants.RAPI_USERS_FILE,
1641
                   ])
1642

    
1643
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1644
  for hv_name in enabled_hypervisors:
1645
    hv_class = hypervisor.GetHypervisor(hv_name)
1646
    dist_files.update(hv_class.GetAncillaryFiles())
1647

    
1648
  # 3. Perform the files upload
1649
  for fname in dist_files:
1650
    if os.path.exists(fname):
1651
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1652
      for to_node, to_result in result.items():
1653
        msg = to_result.fail_msg
1654
        if msg:
1655
          msg = ("Copy of file %s to node %s failed: %s" %
1656
                 (fname, to_node, msg))
1657
          lu.proc.LogWarning(msg)
1658

    
1659

    
1660
class LURedistributeConfig(NoHooksLU):
1661
  """Force the redistribution of cluster configuration.
1662

1663
  This is a very simple LU.
1664

1665
  """
1666
  _OP_REQP = []
1667
  REQ_BGL = False
1668

    
1669
  def ExpandNames(self):
1670
    self.needed_locks = {
1671
      locking.LEVEL_NODE: locking.ALL_SET,
1672
    }
1673
    self.share_locks[locking.LEVEL_NODE] = 1
1674

    
1675
  def CheckPrereq(self):
1676
    """Check prerequisites.
1677

1678
    """
1679

    
1680
  def Exec(self, feedback_fn):
1681
    """Redistribute the configuration.
1682

1683
    """
1684
    self.cfg.Update(self.cfg.GetClusterInfo())
1685
    _RedistributeAncillaryFiles(self)
1686

    
1687

    
1688
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1689
  """Sleep and poll for an instance's disk to sync.
1690

1691
  """
1692
  if not instance.disks:
1693
    return True
1694

    
1695
  if not oneshot:
1696
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1697

    
1698
  node = instance.primary_node
1699

    
1700
  for dev in instance.disks:
1701
    lu.cfg.SetDiskID(dev, node)
1702

    
1703
  retries = 0
1704
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1705
  while True:
1706
    max_time = 0
1707
    done = True
1708
    cumul_degraded = False
1709
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1710
    msg = rstats.fail_msg
1711
    if msg:
1712
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1713
      retries += 1
1714
      if retries >= 10:
1715
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1716
                                 " aborting." % node)
1717
      time.sleep(6)
1718
      continue
1719
    rstats = rstats.payload
1720
    retries = 0
1721
    for i, mstat in enumerate(rstats):
1722
      if mstat is None:
1723
        lu.LogWarning("Can't compute data for node %s/%s",
1724
                           node, instance.disks[i].iv_name)
1725
        continue
1726
      # we ignore the ldisk parameter
1727
      perc_done, est_time, is_degraded, _ = mstat
1728
      cumul_degraded = cumul_degraded or (is_degraded and perc_done is None)
1729
      if perc_done is not None:
1730
        done = False
1731
        if est_time is not None:
1732
          rem_time = "%d estimated seconds remaining" % est_time
1733
          max_time = est_time
1734
        else:
1735
          rem_time = "no time estimate"
1736
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1737
                        (instance.disks[i].iv_name, perc_done, rem_time))
1738

    
1739
    # if we're done but degraded, let's do a few small retries, to
1740
    # make sure we see a stable and not transient situation; therefore
1741
    # we force restart of the loop
1742
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1743
      logging.info("Degraded disks found, %d retries left", degr_retries)
1744
      degr_retries -= 1
1745
      time.sleep(1)
1746
      continue
1747

    
1748
    if done or oneshot:
1749
      break
1750

    
1751
    time.sleep(min(60, max_time))
1752

    
1753
  if done:
1754
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
1755
  return not cumul_degraded
1756

    
1757

    
1758
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
1759
  """Check that mirrors are not degraded.
1760

1761
  The ldisk parameter, if True, will change the test from the
1762
  is_degraded attribute (which represents overall non-ok status for
1763
  the device(s)) to the ldisk (representing the local storage status).
1764

1765
  """
1766
  lu.cfg.SetDiskID(dev, node)
1767
  if ldisk:
1768
    idx = 6
1769
  else:
1770
    idx = 5
1771

    
1772
  result = True
1773
  if on_primary or dev.AssembleOnSecondary():
1774
    rstats = lu.rpc.call_blockdev_find(node, dev)
1775
    msg = rstats.fail_msg
1776
    if msg:
1777
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
1778
      result = False
1779
    elif not rstats.payload:
1780
      lu.LogWarning("Can't find disk on node %s", node)
1781
      result = False
1782
    else:
1783
      result = result and (not rstats.payload[idx])
1784
  if dev.children:
1785
    for child in dev.children:
1786
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
1787

    
1788
  return result
1789

    
1790

    
1791
class LUDiagnoseOS(NoHooksLU):
1792
  """Logical unit for OS diagnose/query.
1793

1794
  """
1795
  _OP_REQP = ["output_fields", "names"]
1796
  REQ_BGL = False
1797
  _FIELDS_STATIC = utils.FieldSet()
1798
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
1799

    
1800
  def ExpandNames(self):
1801
    if self.op.names:
1802
      raise errors.OpPrereqError("Selective OS query not supported")
1803

    
1804
    _CheckOutputFields(static=self._FIELDS_STATIC,
1805
                       dynamic=self._FIELDS_DYNAMIC,
1806
                       selected=self.op.output_fields)
1807

    
1808
    # Lock all nodes, in shared mode
1809
    # Temporary removal of locks, should be reverted later
1810
    # TODO: reintroduce locks when they are lighter-weight
1811
    self.needed_locks = {}
1812
    #self.share_locks[locking.LEVEL_NODE] = 1
1813
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
1814

    
1815
  def CheckPrereq(self):
1816
    """Check prerequisites.
1817

1818
    """
1819

    
1820
  @staticmethod
1821
  def _DiagnoseByOS(node_list, rlist):
1822
    """Remaps a per-node return list into an a per-os per-node dictionary
1823

1824
    @param node_list: a list with the names of all nodes
1825
    @param rlist: a map with node names as keys and OS objects as values
1826

1827
    @rtype: dict
1828
    @return: a dictionary with osnames as keys and as value another map, with
1829
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
1830

1831
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
1832
                                     (/srv/..., False, "invalid api")],
1833
                           "node2": [(/srv/..., True, "")]}
1834
          }
1835

1836
    """
1837
    all_os = {}
1838
    # we build here the list of nodes that didn't fail the RPC (at RPC
1839
    # level), so that nodes with a non-responding node daemon don't
1840
    # make all OSes invalid
1841
    good_nodes = [node_name for node_name in rlist
1842
                  if not rlist[node_name].fail_msg]
1843
    for node_name, nr in rlist.items():
1844
      if nr.fail_msg or not nr.payload:
1845
        continue
1846
      for name, path, status, diagnose in nr.payload:
1847
        if name not in all_os:
1848
          # build a list of nodes for this os containing empty lists
1849
          # for each node in node_list
1850
          all_os[name] = {}
1851
          for nname in good_nodes:
1852
            all_os[name][nname] = []
1853
        all_os[name][node_name].append((path, status, diagnose))
1854
    return all_os
1855

    
1856
  def Exec(self, feedback_fn):
1857
    """Compute the list of OSes.
1858

1859
    """
1860
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
1861
    node_data = self.rpc.call_os_diagnose(valid_nodes)
1862
    pol = self._DiagnoseByOS(valid_nodes, node_data)
1863
    output = []
1864
    for os_name, os_data in pol.items():
1865
      row = []
1866
      for field in self.op.output_fields:
1867
        if field == "name":
1868
          val = os_name
1869
        elif field == "valid":
1870
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
1871
        elif field == "node_status":
1872
          # this is just a copy of the dict
1873
          val = {}
1874
          for node_name, nos_list in os_data.items():
1875
            val[node_name] = nos_list
1876
        else:
1877
          raise errors.ParameterError(field)
1878
        row.append(val)
1879
      output.append(row)
1880

    
1881
    return output
1882

    
1883

    
1884
class LURemoveNode(LogicalUnit):
1885
  """Logical unit for removing a node.
1886

1887
  """
1888
  HPATH = "node-remove"
1889
  HTYPE = constants.HTYPE_NODE
1890
  _OP_REQP = ["node_name"]
1891

    
1892
  def BuildHooksEnv(self):
1893
    """Build hooks env.
1894

1895
    This doesn't run on the target node in the pre phase as a failed
1896
    node would then be impossible to remove.
1897

1898
    """
1899
    env = {
1900
      "OP_TARGET": self.op.node_name,
1901
      "NODE_NAME": self.op.node_name,
1902
      }
1903
    all_nodes = self.cfg.GetNodeList()
1904
    all_nodes.remove(self.op.node_name)
1905
    return env, all_nodes, all_nodes
1906

    
1907
  def CheckPrereq(self):
1908
    """Check prerequisites.
1909

1910
    This checks:
1911
     - the node exists in the configuration
1912
     - it does not have primary or secondary instances
1913
     - it's not the master
1914

1915
    Any errors are signalled by raising errors.OpPrereqError.
1916

1917
    """
1918
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
1919
    if node is None:
1920
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
1921

    
1922
    instance_list = self.cfg.GetInstanceList()
1923

    
1924
    masternode = self.cfg.GetMasterNode()
1925
    if node.name == masternode:
1926
      raise errors.OpPrereqError("Node is the master node,"
1927
                                 " you need to failover first.")
1928

    
1929
    for instance_name in instance_list:
1930
      instance = self.cfg.GetInstanceInfo(instance_name)
1931
      if node.name in instance.all_nodes:
1932
        raise errors.OpPrereqError("Instance %s is still running on the node,"
1933
                                   " please remove first." % instance_name)
1934
    self.op.node_name = node.name
1935
    self.node = node
1936

    
1937
  def Exec(self, feedback_fn):
1938
    """Removes the node from the cluster.
1939

1940
    """
1941
    node = self.node
1942
    logging.info("Stopping the node daemon and removing configs from node %s",
1943
                 node.name)
1944

    
1945
    self.context.RemoveNode(node.name)
1946

    
1947
    result = self.rpc.call_node_leave_cluster(node.name)
1948
    msg = result.fail_msg
1949
    if msg:
1950
      self.LogWarning("Errors encountered on the remote node while leaving"
1951
                      " the cluster: %s", msg)
1952

    
1953
    # Promote nodes to master candidate as needed
1954
    _AdjustCandidatePool(self)
1955

    
1956

    
1957
class LUQueryNodes(NoHooksLU):
1958
  """Logical unit for querying nodes.
1959

1960
  """
1961
  _OP_REQP = ["output_fields", "names", "use_locking"]
1962
  REQ_BGL = False
1963
  _FIELDS_DYNAMIC = utils.FieldSet(
1964
    "dtotal", "dfree",
1965
    "mtotal", "mnode", "mfree",
1966
    "bootid",
1967
    "ctotal", "cnodes", "csockets",
1968
    )
1969

    
1970
  _FIELDS_STATIC = utils.FieldSet(
1971
    "name", "pinst_cnt", "sinst_cnt",
1972
    "pinst_list", "sinst_list",
1973
    "pip", "sip", "tags",
1974
    "serial_no",
1975
    "master_candidate",
1976
    "master",
1977
    "offline",
1978
    "drained",
1979
    )
1980

    
1981
  def ExpandNames(self):
1982
    _CheckOutputFields(static=self._FIELDS_STATIC,
1983
                       dynamic=self._FIELDS_DYNAMIC,
1984
                       selected=self.op.output_fields)
1985

    
1986
    self.needed_locks = {}
1987
    self.share_locks[locking.LEVEL_NODE] = 1
1988

    
1989
    if self.op.names:
1990
      self.wanted = _GetWantedNodes(self, self.op.names)
1991
    else:
1992
      self.wanted = locking.ALL_SET
1993

    
1994
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
1995
    self.do_locking = self.do_node_query and self.op.use_locking
1996
    if self.do_locking:
1997
      # if we don't request only static fields, we need to lock the nodes
1998
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
1999

    
2000

    
2001
  def CheckPrereq(self):
2002
    """Check prerequisites.
2003

2004
    """
2005
    # The validation of the node list is done in the _GetWantedNodes,
2006
    # if non empty, and if empty, there's no validation to do
2007
    pass
2008

    
2009
  def Exec(self, feedback_fn):
2010
    """Computes the list of nodes and their attributes.
2011

2012
    """
2013
    all_info = self.cfg.GetAllNodesInfo()
2014
    if self.do_locking:
2015
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2016
    elif self.wanted != locking.ALL_SET:
2017
      nodenames = self.wanted
2018
      missing = set(nodenames).difference(all_info.keys())
2019
      if missing:
2020
        raise errors.OpExecError(
2021
          "Some nodes were removed before retrieving their data: %s" % missing)
2022
    else:
2023
      nodenames = all_info.keys()
2024

    
2025
    nodenames = utils.NiceSort(nodenames)
2026
    nodelist = [all_info[name] for name in nodenames]
2027

    
2028
    # begin data gathering
2029

    
2030
    if self.do_node_query:
2031
      live_data = {}
2032
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2033
                                          self.cfg.GetHypervisorType())
2034
      for name in nodenames:
2035
        nodeinfo = node_data[name]
2036
        if not nodeinfo.fail_msg and nodeinfo.payload:
2037
          nodeinfo = nodeinfo.payload
2038
          fn = utils.TryConvert
2039
          live_data[name] = {
2040
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2041
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2042
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2043
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2044
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2045
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2046
            "bootid": nodeinfo.get('bootid', None),
2047
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2048
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2049
            }
2050
        else:
2051
          live_data[name] = {}
2052
    else:
2053
      live_data = dict.fromkeys(nodenames, {})
2054

    
2055
    node_to_primary = dict([(name, set()) for name in nodenames])
2056
    node_to_secondary = dict([(name, set()) for name in nodenames])
2057

    
2058
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2059
                             "sinst_cnt", "sinst_list"))
2060
    if inst_fields & frozenset(self.op.output_fields):
2061
      instancelist = self.cfg.GetInstanceList()
2062

    
2063
      for instance_name in instancelist:
2064
        inst = self.cfg.GetInstanceInfo(instance_name)
2065
        if inst.primary_node in node_to_primary:
2066
          node_to_primary[inst.primary_node].add(inst.name)
2067
        for secnode in inst.secondary_nodes:
2068
          if secnode in node_to_secondary:
2069
            node_to_secondary[secnode].add(inst.name)
2070

    
2071
    master_node = self.cfg.GetMasterNode()
2072

    
2073
    # end data gathering
2074

    
2075
    output = []
2076
    for node in nodelist:
2077
      node_output = []
2078
      for field in self.op.output_fields:
2079
        if field == "name":
2080
          val = node.name
2081
        elif field == "pinst_list":
2082
          val = list(node_to_primary[node.name])
2083
        elif field == "sinst_list":
2084
          val = list(node_to_secondary[node.name])
2085
        elif field == "pinst_cnt":
2086
          val = len(node_to_primary[node.name])
2087
        elif field == "sinst_cnt":
2088
          val = len(node_to_secondary[node.name])
2089
        elif field == "pip":
2090
          val = node.primary_ip
2091
        elif field == "sip":
2092
          val = node.secondary_ip
2093
        elif field == "tags":
2094
          val = list(node.GetTags())
2095
        elif field == "serial_no":
2096
          val = node.serial_no
2097
        elif field == "master_candidate":
2098
          val = node.master_candidate
2099
        elif field == "master":
2100
          val = node.name == master_node
2101
        elif field == "offline":
2102
          val = node.offline
2103
        elif field == "drained":
2104
          val = node.drained
2105
        elif self._FIELDS_DYNAMIC.Matches(field):
2106
          val = live_data[node.name].get(field, None)
2107
        else:
2108
          raise errors.ParameterError(field)
2109
        node_output.append(val)
2110
      output.append(node_output)
2111

    
2112
    return output
2113

    
2114

    
2115
class LUQueryNodeVolumes(NoHooksLU):
2116
  """Logical unit for getting volumes on node(s).
2117

2118
  """
2119
  _OP_REQP = ["nodes", "output_fields"]
2120
  REQ_BGL = False
2121
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2122
  _FIELDS_STATIC = utils.FieldSet("node")
2123

    
2124
  def ExpandNames(self):
2125
    _CheckOutputFields(static=self._FIELDS_STATIC,
2126
                       dynamic=self._FIELDS_DYNAMIC,
2127
                       selected=self.op.output_fields)
2128

    
2129
    self.needed_locks = {}
2130
    self.share_locks[locking.LEVEL_NODE] = 1
2131
    if not self.op.nodes:
2132
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2133
    else:
2134
      self.needed_locks[locking.LEVEL_NODE] = \
2135
        _GetWantedNodes(self, self.op.nodes)
2136

    
2137
  def CheckPrereq(self):
2138
    """Check prerequisites.
2139

2140
    This checks that the fields required are valid output fields.
2141

2142
    """
2143
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2144

    
2145
  def Exec(self, feedback_fn):
2146
    """Computes the list of nodes and their attributes.
2147

2148
    """
2149
    nodenames = self.nodes
2150
    volumes = self.rpc.call_node_volumes(nodenames)
2151

    
2152
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2153
             in self.cfg.GetInstanceList()]
2154

    
2155
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2156

    
2157
    output = []
2158
    for node in nodenames:
2159
      nresult = volumes[node]
2160
      if nresult.offline:
2161
        continue
2162
      msg = nresult.fail_msg
2163
      if msg:
2164
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2165
        continue
2166

    
2167
      node_vols = nresult.payload[:]
2168
      node_vols.sort(key=lambda vol: vol['dev'])
2169

    
2170
      for vol in node_vols:
2171
        node_output = []
2172
        for field in self.op.output_fields:
2173
          if field == "node":
2174
            val = node
2175
          elif field == "phys":
2176
            val = vol['dev']
2177
          elif field == "vg":
2178
            val = vol['vg']
2179
          elif field == "name":
2180
            val = vol['name']
2181
          elif field == "size":
2182
            val = int(float(vol['size']))
2183
          elif field == "instance":
2184
            for inst in ilist:
2185
              if node not in lv_by_node[inst]:
2186
                continue
2187
              if vol['name'] in lv_by_node[inst][node]:
2188
                val = inst.name
2189
                break
2190
            else:
2191
              val = '-'
2192
          else:
2193
            raise errors.ParameterError(field)
2194
          node_output.append(str(val))
2195

    
2196
        output.append(node_output)
2197

    
2198
    return output
2199

    
2200

    
2201
class LUAddNode(LogicalUnit):
2202
  """Logical unit for adding node to the cluster.
2203

2204
  """
2205
  HPATH = "node-add"
2206
  HTYPE = constants.HTYPE_NODE
2207
  _OP_REQP = ["node_name"]
2208

    
2209
  def BuildHooksEnv(self):
2210
    """Build hooks env.
2211

2212
    This will run on all nodes before, and on all nodes + the new node after.
2213

2214
    """
2215
    env = {
2216
      "OP_TARGET": self.op.node_name,
2217
      "NODE_NAME": self.op.node_name,
2218
      "NODE_PIP": self.op.primary_ip,
2219
      "NODE_SIP": self.op.secondary_ip,
2220
      }
2221
    nodes_0 = self.cfg.GetNodeList()
2222
    nodes_1 = nodes_0 + [self.op.node_name, ]
2223
    return env, nodes_0, nodes_1
2224

    
2225
  def CheckPrereq(self):
2226
    """Check prerequisites.
2227

2228
    This checks:
2229
     - the new node is not already in the config
2230
     - it is resolvable
2231
     - its parameters (single/dual homed) matches the cluster
2232

2233
    Any errors are signalled by raising errors.OpPrereqError.
2234

2235
    """
2236
    node_name = self.op.node_name
2237
    cfg = self.cfg
2238

    
2239
    dns_data = utils.HostInfo(node_name)
2240

    
2241
    node = dns_data.name
2242
    primary_ip = self.op.primary_ip = dns_data.ip
2243
    secondary_ip = getattr(self.op, "secondary_ip", None)
2244
    if secondary_ip is None:
2245
      secondary_ip = primary_ip
2246
    if not utils.IsValidIP(secondary_ip):
2247
      raise errors.OpPrereqError("Invalid secondary IP given")
2248
    self.op.secondary_ip = secondary_ip
2249

    
2250
    node_list = cfg.GetNodeList()
2251
    if not self.op.readd and node in node_list:
2252
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2253
                                 node)
2254
    elif self.op.readd and node not in node_list:
2255
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2256

    
2257
    for existing_node_name in node_list:
2258
      existing_node = cfg.GetNodeInfo(existing_node_name)
2259

    
2260
      if self.op.readd and node == existing_node_name:
2261
        if (existing_node.primary_ip != primary_ip or
2262
            existing_node.secondary_ip != secondary_ip):
2263
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2264
                                     " address configuration as before")
2265
        continue
2266

    
2267
      if (existing_node.primary_ip == primary_ip or
2268
          existing_node.secondary_ip == primary_ip or
2269
          existing_node.primary_ip == secondary_ip or
2270
          existing_node.secondary_ip == secondary_ip):
2271
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2272
                                   " existing node %s" % existing_node.name)
2273

    
2274
    # check that the type of the node (single versus dual homed) is the
2275
    # same as for the master
2276
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2277
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2278
    newbie_singlehomed = secondary_ip == primary_ip
2279
    if master_singlehomed != newbie_singlehomed:
2280
      if master_singlehomed:
2281
        raise errors.OpPrereqError("The master has no private ip but the"
2282
                                   " new node has one")
2283
      else:
2284
        raise errors.OpPrereqError("The master has a private ip but the"
2285
                                   " new node doesn't have one")
2286

    
2287
    # checks reachablity
2288
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2289
      raise errors.OpPrereqError("Node not reachable by ping")
2290

    
2291
    if not newbie_singlehomed:
2292
      # check reachability from my secondary ip to newbie's secondary ip
2293
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2294
                           source=myself.secondary_ip):
2295
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2296
                                   " based ping to noded port")
2297

    
2298
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2299
    mc_now, _ = self.cfg.GetMasterCandidateStats()
2300
    master_candidate = mc_now < cp_size
2301

    
2302
    self.new_node = objects.Node(name=node,
2303
                                 primary_ip=primary_ip,
2304
                                 secondary_ip=secondary_ip,
2305
                                 master_candidate=master_candidate,
2306
                                 offline=False, drained=False)
2307

    
2308
  def Exec(self, feedback_fn):
2309
    """Adds the new node to the cluster.
2310

2311
    """
2312
    new_node = self.new_node
2313
    node = new_node.name
2314

    
2315
    # check connectivity
2316
    result = self.rpc.call_version([node])[node]
2317
    result.Raise("Can't get version information from node %s" % node)
2318
    if constants.PROTOCOL_VERSION == result.payload:
2319
      logging.info("Communication to node %s fine, sw version %s match",
2320
                   node, result.payload)
2321
    else:
2322
      raise errors.OpExecError("Version mismatch master version %s,"
2323
                               " node version %s" %
2324
                               (constants.PROTOCOL_VERSION, result.payload))
2325

    
2326
    # setup ssh on node
2327
    logging.info("Copy ssh key to node %s", node)
2328
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2329
    keyarray = []
2330
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2331
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2332
                priv_key, pub_key]
2333

    
2334
    for i in keyfiles:
2335
      f = open(i, 'r')
2336
      try:
2337
        keyarray.append(f.read())
2338
      finally:
2339
        f.close()
2340

    
2341
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2342
                                    keyarray[2],
2343
                                    keyarray[3], keyarray[4], keyarray[5])
2344
    result.Raise("Cannot transfer ssh keys to the new node")
2345

    
2346
    # Add node to our /etc/hosts, and add key to known_hosts
2347
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2348
      utils.AddHostToEtcHosts(new_node.name)
2349

    
2350
    if new_node.secondary_ip != new_node.primary_ip:
2351
      result = self.rpc.call_node_has_ip_address(new_node.name,
2352
                                                 new_node.secondary_ip)
2353
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2354
                   prereq=True)
2355
      if not result.payload:
2356
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2357
                                 " you gave (%s). Please fix and re-run this"
2358
                                 " command." % new_node.secondary_ip)
2359

    
2360
    node_verify_list = [self.cfg.GetMasterNode()]
2361
    node_verify_param = {
2362
      'nodelist': [node],
2363
      # TODO: do a node-net-test as well?
2364
    }
2365

    
2366
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2367
                                       self.cfg.GetClusterName())
2368
    for verifier in node_verify_list:
2369
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2370
      nl_payload = result[verifier].payload['nodelist']
2371
      if nl_payload:
2372
        for failed in nl_payload:
2373
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2374
                      (verifier, nl_payload[failed]))
2375
        raise errors.OpExecError("ssh/hostname verification failed.")
2376

    
2377
    if self.op.readd:
2378
      _RedistributeAncillaryFiles(self)
2379
      self.context.ReaddNode(new_node)
2380
    else:
2381
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2382
      self.context.AddNode(new_node)
2383

    
2384

    
2385
class LUSetNodeParams(LogicalUnit):
2386
  """Modifies the parameters of a node.
2387

2388
  """
2389
  HPATH = "node-modify"
2390
  HTYPE = constants.HTYPE_NODE
2391
  _OP_REQP = ["node_name"]
2392
  REQ_BGL = False
2393

    
2394
  def CheckArguments(self):
2395
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2396
    if node_name is None:
2397
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2398
    self.op.node_name = node_name
2399
    _CheckBooleanOpField(self.op, 'master_candidate')
2400
    _CheckBooleanOpField(self.op, 'offline')
2401
    _CheckBooleanOpField(self.op, 'drained')
2402
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2403
    if all_mods.count(None) == 3:
2404
      raise errors.OpPrereqError("Please pass at least one modification")
2405
    if all_mods.count(True) > 1:
2406
      raise errors.OpPrereqError("Can't set the node into more than one"
2407
                                 " state at the same time")
2408

    
2409
  def ExpandNames(self):
2410
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2411

    
2412
  def BuildHooksEnv(self):
2413
    """Build hooks env.
2414

2415
    This runs on the master node.
2416

2417
    """
2418
    env = {
2419
      "OP_TARGET": self.op.node_name,
2420
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2421
      "OFFLINE": str(self.op.offline),
2422
      "DRAINED": str(self.op.drained),
2423
      }
2424
    nl = [self.cfg.GetMasterNode(),
2425
          self.op.node_name]
2426
    return env, nl, nl
2427

    
2428
  def CheckPrereq(self):
2429
    """Check prerequisites.
2430

2431
    This only checks the instance list against the existing names.
2432

2433
    """
2434
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2435

    
2436
    if ((self.op.master_candidate == False or self.op.offline == True or
2437
         self.op.drained == True) and node.master_candidate):
2438
      # we will demote the node from master_candidate
2439
      if self.op.node_name == self.cfg.GetMasterNode():
2440
        raise errors.OpPrereqError("The master node has to be a"
2441
                                   " master candidate, online and not drained")
2442
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2443
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2444
      if num_candidates <= cp_size:
2445
        msg = ("Not enough master candidates (desired"
2446
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2447
        if self.op.force:
2448
          self.LogWarning(msg)
2449
        else:
2450
          raise errors.OpPrereqError(msg)
2451

    
2452
    if (self.op.master_candidate == True and
2453
        ((node.offline and not self.op.offline == False) or
2454
         (node.drained and not self.op.drained == False))):
2455
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2456
                                 " to master_candidate" % node.name)
2457

    
2458
    return
2459

    
2460
  def Exec(self, feedback_fn):
2461
    """Modifies a node.
2462

2463
    """
2464
    node = self.node
2465

    
2466
    result = []
2467
    changed_mc = False
2468

    
2469
    if self.op.offline is not None:
2470
      node.offline = self.op.offline
2471
      result.append(("offline", str(self.op.offline)))
2472
      if self.op.offline == True:
2473
        if node.master_candidate:
2474
          node.master_candidate = False
2475
          changed_mc = True
2476
          result.append(("master_candidate", "auto-demotion due to offline"))
2477
        if node.drained:
2478
          node.drained = False
2479
          result.append(("drained", "clear drained status due to offline"))
2480

    
2481
    if self.op.master_candidate is not None:
2482
      node.master_candidate = self.op.master_candidate
2483
      changed_mc = True
2484
      result.append(("master_candidate", str(self.op.master_candidate)))
2485
      if self.op.master_candidate == False:
2486
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2487
        msg = rrc.fail_msg
2488
        if msg:
2489
          self.LogWarning("Node failed to demote itself: %s" % msg)
2490

    
2491
    if self.op.drained is not None:
2492
      node.drained = self.op.drained
2493
      result.append(("drained", str(self.op.drained)))
2494
      if self.op.drained == True:
2495
        if node.master_candidate:
2496
          node.master_candidate = False
2497
          changed_mc = True
2498
          result.append(("master_candidate", "auto-demotion due to drain"))
2499
        if node.offline:
2500
          node.offline = False
2501
          result.append(("offline", "clear offline status due to drain"))
2502

    
2503
    # this will trigger configuration file update, if needed
2504
    self.cfg.Update(node)
2505
    # this will trigger job queue propagation or cleanup
2506
    if changed_mc:
2507
      self.context.ReaddNode(node)
2508

    
2509
    return result
2510

    
2511

    
2512
class LUPowercycleNode(NoHooksLU):
2513
  """Powercycles a node.
2514

2515
  """
2516
  _OP_REQP = ["node_name", "force"]
2517
  REQ_BGL = False
2518

    
2519
  def CheckArguments(self):
2520
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2521
    if node_name is None:
2522
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2523
    self.op.node_name = node_name
2524
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2525
      raise errors.OpPrereqError("The node is the master and the force"
2526
                                 " parameter was not set")
2527

    
2528
  def ExpandNames(self):
2529
    """Locking for PowercycleNode.
2530

2531
    This is a last-resource option and shouldn't block on other
2532
    jobs. Therefore, we grab no locks.
2533

2534
    """
2535
    self.needed_locks = {}
2536

    
2537
  def CheckPrereq(self):
2538
    """Check prerequisites.
2539

2540
    This LU has no prereqs.
2541

2542
    """
2543
    pass
2544

    
2545
  def Exec(self, feedback_fn):
2546
    """Reboots a node.
2547

2548
    """
2549
    result = self.rpc.call_node_powercycle(self.op.node_name,
2550
                                           self.cfg.GetHypervisorType())
2551
    result.Raise("Failed to schedule the reboot")
2552
    return result.payload
2553

    
2554

    
2555
class LUQueryClusterInfo(NoHooksLU):
2556
  """Query cluster configuration.
2557

2558
  """
2559
  _OP_REQP = []
2560
  REQ_BGL = False
2561

    
2562
  def ExpandNames(self):
2563
    self.needed_locks = {}
2564

    
2565
  def CheckPrereq(self):
2566
    """No prerequsites needed for this LU.
2567

2568
    """
2569
    pass
2570

    
2571
  def Exec(self, feedback_fn):
2572
    """Return cluster config.
2573

2574
    """
2575
    cluster = self.cfg.GetClusterInfo()
2576
    result = {
2577
      "software_version": constants.RELEASE_VERSION,
2578
      "protocol_version": constants.PROTOCOL_VERSION,
2579
      "config_version": constants.CONFIG_VERSION,
2580
      "os_api_version": constants.OS_API_VERSION,
2581
      "export_version": constants.EXPORT_VERSION,
2582
      "architecture": (platform.architecture()[0], platform.machine()),
2583
      "name": cluster.cluster_name,
2584
      "master": cluster.master_node,
2585
      "default_hypervisor": cluster.default_hypervisor,
2586
      "enabled_hypervisors": cluster.enabled_hypervisors,
2587
      "hvparams": dict([(hvname, cluster.hvparams[hvname])
2588
                        for hvname in cluster.enabled_hypervisors]),
2589
      "beparams": cluster.beparams,
2590
      "nicparams": cluster.nicparams,
2591
      "candidate_pool_size": cluster.candidate_pool_size,
2592
      "master_netdev": cluster.master_netdev,
2593
      "volume_group_name": cluster.volume_group_name,
2594
      "file_storage_dir": cluster.file_storage_dir,
2595
      }
2596

    
2597
    return result
2598

    
2599

    
2600
class LUQueryConfigValues(NoHooksLU):
2601
  """Return configuration values.
2602

2603
  """
2604
  _OP_REQP = []
2605
  REQ_BGL = False
2606
  _FIELDS_DYNAMIC = utils.FieldSet()
2607
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
2608

    
2609
  def ExpandNames(self):
2610
    self.needed_locks = {}
2611

    
2612
    _CheckOutputFields(static=self._FIELDS_STATIC,
2613
                       dynamic=self._FIELDS_DYNAMIC,
2614
                       selected=self.op.output_fields)
2615

    
2616
  def CheckPrereq(self):
2617
    """No prerequisites.
2618

2619
    """
2620
    pass
2621

    
2622
  def Exec(self, feedback_fn):
2623
    """Dump a representation of the cluster config to the standard output.
2624

2625
    """
2626
    values = []
2627
    for field in self.op.output_fields:
2628
      if field == "cluster_name":
2629
        entry = self.cfg.GetClusterName()
2630
      elif field == "master_node":
2631
        entry = self.cfg.GetMasterNode()
2632
      elif field == "drain_flag":
2633
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
2634
      else:
2635
        raise errors.ParameterError(field)
2636
      values.append(entry)
2637
    return values
2638

    
2639

    
2640
class LUActivateInstanceDisks(NoHooksLU):
2641
  """Bring up an instance's disks.
2642

2643
  """
2644
  _OP_REQP = ["instance_name"]
2645
  REQ_BGL = False
2646

    
2647
  def ExpandNames(self):
2648
    self._ExpandAndLockInstance()
2649
    self.needed_locks[locking.LEVEL_NODE] = []
2650
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2651

    
2652
  def DeclareLocks(self, level):
2653
    if level == locking.LEVEL_NODE:
2654
      self._LockInstancesNodes()
2655

    
2656
  def CheckPrereq(self):
2657
    """Check prerequisites.
2658

2659
    This checks that the instance is in the cluster.
2660

2661
    """
2662
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2663
    assert self.instance is not None, \
2664
      "Cannot retrieve locked instance %s" % self.op.instance_name
2665
    _CheckNodeOnline(self, self.instance.primary_node)
2666

    
2667
  def Exec(self, feedback_fn):
2668
    """Activate the disks.
2669

2670
    """
2671
    disks_ok, disks_info = _AssembleInstanceDisks(self, self.instance)
2672
    if not disks_ok:
2673
      raise errors.OpExecError("Cannot activate block devices")
2674

    
2675
    return disks_info
2676

    
2677

    
2678
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False):
2679
  """Prepare the block devices for an instance.
2680

2681
  This sets up the block devices on all nodes.
2682

2683
  @type lu: L{LogicalUnit}
2684
  @param lu: the logical unit on whose behalf we execute
2685
  @type instance: L{objects.Instance}
2686
  @param instance: the instance for whose disks we assemble
2687
  @type ignore_secondaries: boolean
2688
  @param ignore_secondaries: if true, errors on secondary nodes
2689
      won't result in an error return from the function
2690
  @return: False if the operation failed, otherwise a list of
2691
      (host, instance_visible_name, node_visible_name)
2692
      with the mapping from node devices to instance devices
2693

2694
  """
2695
  device_info = []
2696
  disks_ok = True
2697
  iname = instance.name
2698
  # With the two passes mechanism we try to reduce the window of
2699
  # opportunity for the race condition of switching DRBD to primary
2700
  # before handshaking occured, but we do not eliminate it
2701

    
2702
  # The proper fix would be to wait (with some limits) until the
2703
  # connection has been made and drbd transitions from WFConnection
2704
  # into any other network-connected state (Connected, SyncTarget,
2705
  # SyncSource, etc.)
2706

    
2707
  # 1st pass, assemble on all nodes in secondary mode
2708
  for inst_disk in instance.disks:
2709
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2710
      lu.cfg.SetDiskID(node_disk, node)
2711
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
2712
      msg = result.fail_msg
2713
      if msg:
2714
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2715
                           " (is_primary=False, pass=1): %s",
2716
                           inst_disk.iv_name, node, msg)
2717
        if not ignore_secondaries:
2718
          disks_ok = False
2719

    
2720
  # FIXME: race condition on drbd migration to primary
2721

    
2722
  # 2nd pass, do only the primary node
2723
  for inst_disk in instance.disks:
2724
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
2725
      if node != instance.primary_node:
2726
        continue
2727
      lu.cfg.SetDiskID(node_disk, node)
2728
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
2729
      msg = result.fail_msg
2730
      if msg:
2731
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
2732
                           " (is_primary=True, pass=2): %s",
2733
                           inst_disk.iv_name, node, msg)
2734
        disks_ok = False
2735
    device_info.append((instance.primary_node, inst_disk.iv_name,
2736
                        result.payload))
2737

    
2738
  # leave the disks configured for the primary node
2739
  # this is a workaround that would be fixed better by
2740
  # improving the logical/physical id handling
2741
  for disk in instance.disks:
2742
    lu.cfg.SetDiskID(disk, instance.primary_node)
2743

    
2744
  return disks_ok, device_info
2745

    
2746

    
2747
def _StartInstanceDisks(lu, instance, force):
2748
  """Start the disks of an instance.
2749

2750
  """
2751
  disks_ok, dummy = _AssembleInstanceDisks(lu, instance,
2752
                                           ignore_secondaries=force)
2753
  if not disks_ok:
2754
    _ShutdownInstanceDisks(lu, instance)
2755
    if force is not None and not force:
2756
      lu.proc.LogWarning("", hint="If the message above refers to a"
2757
                         " secondary node,"
2758
                         " you can retry the operation using '--force'.")
2759
    raise errors.OpExecError("Disk consistency error")
2760

    
2761

    
2762
class LUDeactivateInstanceDisks(NoHooksLU):
2763
  """Shutdown an instance's disks.
2764

2765
  """
2766
  _OP_REQP = ["instance_name"]
2767
  REQ_BGL = False
2768

    
2769
  def ExpandNames(self):
2770
    self._ExpandAndLockInstance()
2771
    self.needed_locks[locking.LEVEL_NODE] = []
2772
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2773

    
2774
  def DeclareLocks(self, level):
2775
    if level == locking.LEVEL_NODE:
2776
      self._LockInstancesNodes()
2777

    
2778
  def CheckPrereq(self):
2779
    """Check prerequisites.
2780

2781
    This checks that the instance is in the cluster.
2782

2783
    """
2784
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2785
    assert self.instance is not None, \
2786
      "Cannot retrieve locked instance %s" % self.op.instance_name
2787

    
2788
  def Exec(self, feedback_fn):
2789
    """Deactivate the disks
2790

2791
    """
2792
    instance = self.instance
2793
    _SafeShutdownInstanceDisks(self, instance)
2794

    
2795

    
2796
def _SafeShutdownInstanceDisks(lu, instance):
2797
  """Shutdown block devices of an instance.
2798

2799
  This function checks if an instance is running, before calling
2800
  _ShutdownInstanceDisks.
2801

2802
  """
2803
  pnode = instance.primary_node
2804
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
2805
  ins_l.Raise("Can't contact node %s" % pnode)
2806

    
2807
  if instance.name in ins_l.payload:
2808
    raise errors.OpExecError("Instance is running, can't shutdown"
2809
                             " block devices.")
2810

    
2811
  _ShutdownInstanceDisks(lu, instance)
2812

    
2813

    
2814
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
2815
  """Shutdown block devices of an instance.
2816

2817
  This does the shutdown on all nodes of the instance.
2818

2819
  If the ignore_primary is false, errors on the primary node are
2820
  ignored.
2821

2822
  """
2823
  all_result = True
2824
  for disk in instance.disks:
2825
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
2826
      lu.cfg.SetDiskID(top_disk, node)
2827
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
2828
      msg = result.fail_msg
2829
      if msg:
2830
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
2831
                      disk.iv_name, node, msg)
2832
        if not ignore_primary or node != instance.primary_node:
2833
          all_result = False
2834
  return all_result
2835

    
2836

    
2837
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
2838
  """Checks if a node has enough free memory.
2839

2840
  This function check if a given node has the needed amount of free
2841
  memory. In case the node has less memory or we cannot get the
2842
  information from the node, this function raise an OpPrereqError
2843
  exception.
2844

2845
  @type lu: C{LogicalUnit}
2846
  @param lu: a logical unit from which we get configuration data
2847
  @type node: C{str}
2848
  @param node: the node to check
2849
  @type reason: C{str}
2850
  @param reason: string to use in the error message
2851
  @type requested: C{int}
2852
  @param requested: the amount of memory in MiB to check for
2853
  @type hypervisor_name: C{str}
2854
  @param hypervisor_name: the hypervisor to ask for memory stats
2855
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
2856
      we cannot check the node
2857

2858
  """
2859
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
2860
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
2861
  free_mem = nodeinfo[node].payload.get('memory_free', None)
2862
  if not isinstance(free_mem, int):
2863
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
2864
                               " was '%s'" % (node, free_mem))
2865
  if requested > free_mem:
2866
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
2867
                               " needed %s MiB, available %s MiB" %
2868
                               (node, reason, requested, free_mem))
2869

    
2870

    
2871
class LUStartupInstance(LogicalUnit):
2872
  """Starts an instance.
2873

2874
  """
2875
  HPATH = "instance-start"
2876
  HTYPE = constants.HTYPE_INSTANCE
2877
  _OP_REQP = ["instance_name", "force"]
2878
  REQ_BGL = False
2879

    
2880
  def ExpandNames(self):
2881
    self._ExpandAndLockInstance()
2882

    
2883
  def BuildHooksEnv(self):
2884
    """Build hooks env.
2885

2886
    This runs on master, primary and secondary nodes of the instance.
2887

2888
    """
2889
    env = {
2890
      "FORCE": self.op.force,
2891
      }
2892
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
2893
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
2894
    return env, nl, nl
2895

    
2896
  def CheckPrereq(self):
2897
    """Check prerequisites.
2898

2899
    This checks that the instance is in the cluster.
2900

2901
    """
2902
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
2903
    assert self.instance is not None, \
2904
      "Cannot retrieve locked instance %s" % self.op.instance_name
2905

    
2906
    # extra beparams
2907
    self.beparams = getattr(self.op, "beparams", {})
2908
    if self.beparams:
2909
      if not isinstance(self.beparams, dict):
2910
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
2911
                                   " dict" % (type(self.beparams), ))
2912
      # fill the beparams dict
2913
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
2914
      self.op.beparams = self.beparams
2915

    
2916
    # extra hvparams
2917
    self.hvparams = getattr(self.op, "hvparams", {})
2918
    if self.hvparams:
2919
      if not isinstance(self.hvparams, dict):
2920
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
2921
                                   " dict" % (type(self.hvparams), ))
2922

    
2923
      # check hypervisor parameter syntax (locally)
2924
      cluster = self.cfg.GetClusterInfo()
2925
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
2926
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
2927
                                    instance.hvparams)
2928
      filled_hvp.update(self.hvparams)
2929
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
2930
      hv_type.CheckParameterSyntax(filled_hvp)
2931
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
2932
      self.op.hvparams = self.hvparams
2933

    
2934
    _CheckNodeOnline(self, instance.primary_node)
2935

    
2936
    bep = self.cfg.GetClusterInfo().FillBE(instance)
2937
    # check bridges existance
2938
    _CheckInstanceBridgesExist(self, instance)
2939

    
2940
    remote_info = self.rpc.call_instance_info(instance.primary_node,
2941
                                              instance.name,
2942
                                              instance.hypervisor)
2943
    remote_info.Raise("Error checking node %s" % instance.primary_node,
2944
                      prereq=True)
2945
    if not remote_info.payload: # not running already
2946
      _CheckNodeFreeMemory(self, instance.primary_node,
2947
                           "starting instance %s" % instance.name,
2948
                           bep[constants.BE_MEMORY], instance.hypervisor)
2949

    
2950
  def Exec(self, feedback_fn):
2951
    """Start the instance.
2952

2953
    """
2954
    instance = self.instance
2955
    force = self.op.force
2956

    
2957
    self.cfg.MarkInstanceUp(instance.name)
2958

    
2959
    node_current = instance.primary_node
2960

    
2961
    _StartInstanceDisks(self, instance, force)
2962

    
2963
    result = self.rpc.call_instance_start(node_current, instance,
2964
                                          self.hvparams, self.beparams)
2965
    msg = result.fail_msg
2966
    if msg:
2967
      _ShutdownInstanceDisks(self, instance)
2968
      raise errors.OpExecError("Could not start instance: %s" % msg)
2969

    
2970

    
2971
class LURebootInstance(LogicalUnit):
2972
  """Reboot an instance.
2973

2974
  """
2975
  HPATH = "instance-reboot"
2976
  HTYPE = constants.HTYPE_INSTANCE
2977
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
2978
  REQ_BGL = False
2979

    
2980
  def ExpandNames(self):
2981
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
2982
                                   constants.INSTANCE_REBOOT_HARD,
2983
                                   constants.INSTANCE_REBOOT_FULL]:
2984
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
2985
                                  (constants.INSTANCE_REBOOT_SOFT,
2986
                                   constants.INSTANCE_REBOOT_HARD,
2987
                                   constants.INSTANCE_REBOOT_FULL))
2988
    self._ExpandAndLockInstance()
2989

    
2990
  def BuildHooksEnv(self):
2991
    """Build hooks env.
2992

2993
    This runs on master, primary and secondary nodes of the instance.
2994

2995
    """
2996
    env = {
2997
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
2998
      "REBOOT_TYPE": self.op.reboot_type,
2999
      }
3000
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3001
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3002
    return env, nl, nl
3003

    
3004
  def CheckPrereq(self):
3005
    """Check prerequisites.
3006

3007
    This checks that the instance is in the cluster.
3008

3009
    """
3010
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3011
    assert self.instance is not None, \
3012
      "Cannot retrieve locked instance %s" % self.op.instance_name
3013

    
3014
    _CheckNodeOnline(self, instance.primary_node)
3015

    
3016
    # check bridges existance
3017
    _CheckInstanceBridgesExist(self, instance)
3018

    
3019
  def Exec(self, feedback_fn):
3020
    """Reboot the instance.
3021

3022
    """
3023
    instance = self.instance
3024
    ignore_secondaries = self.op.ignore_secondaries
3025
    reboot_type = self.op.reboot_type
3026

    
3027
    node_current = instance.primary_node
3028

    
3029
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3030
                       constants.INSTANCE_REBOOT_HARD]:
3031
      for disk in instance.disks:
3032
        self.cfg.SetDiskID(disk, node_current)
3033
      result = self.rpc.call_instance_reboot(node_current, instance,
3034
                                             reboot_type)
3035
      result.Raise("Could not reboot instance")
3036
    else:
3037
      result = self.rpc.call_instance_shutdown(node_current, instance)
3038
      result.Raise("Could not shutdown instance for full reboot")
3039
      _ShutdownInstanceDisks(self, instance)
3040
      _StartInstanceDisks(self, instance, ignore_secondaries)
3041
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3042
      msg = result.fail_msg
3043
      if msg:
3044
        _ShutdownInstanceDisks(self, instance)
3045
        raise errors.OpExecError("Could not start instance for"
3046
                                 " full reboot: %s" % msg)
3047

    
3048
    self.cfg.MarkInstanceUp(instance.name)
3049

    
3050

    
3051
class LUShutdownInstance(LogicalUnit):
3052
  """Shutdown an instance.
3053

3054
  """
3055
  HPATH = "instance-stop"
3056
  HTYPE = constants.HTYPE_INSTANCE
3057
  _OP_REQP = ["instance_name"]
3058
  REQ_BGL = False
3059

    
3060
  def ExpandNames(self):
3061
    self._ExpandAndLockInstance()
3062

    
3063
  def BuildHooksEnv(self):
3064
    """Build hooks env.
3065

3066
    This runs on master, primary and secondary nodes of the instance.
3067

3068
    """
3069
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3070
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3071
    return env, nl, nl
3072

    
3073
  def CheckPrereq(self):
3074
    """Check prerequisites.
3075

3076
    This checks that the instance is in the cluster.
3077

3078
    """
3079
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3080
    assert self.instance is not None, \
3081
      "Cannot retrieve locked instance %s" % self.op.instance_name
3082
    _CheckNodeOnline(self, self.instance.primary_node)
3083

    
3084
  def Exec(self, feedback_fn):
3085
    """Shutdown the instance.
3086

3087
    """
3088
    instance = self.instance
3089
    node_current = instance.primary_node
3090
    self.cfg.MarkInstanceDown(instance.name)
3091
    result = self.rpc.call_instance_shutdown(node_current, instance)
3092
    msg = result.fail_msg
3093
    if msg:
3094
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3095

    
3096
    _ShutdownInstanceDisks(self, instance)
3097

    
3098

    
3099
class LUReinstallInstance(LogicalUnit):
3100
  """Reinstall an instance.
3101

3102
  """
3103
  HPATH = "instance-reinstall"
3104
  HTYPE = constants.HTYPE_INSTANCE
3105
  _OP_REQP = ["instance_name"]
3106
  REQ_BGL = False
3107

    
3108
  def ExpandNames(self):
3109
    self._ExpandAndLockInstance()
3110

    
3111
  def BuildHooksEnv(self):
3112
    """Build hooks env.
3113

3114
    This runs on master, primary and secondary nodes of the instance.
3115

3116
    """
3117
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3118
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3119
    return env, nl, nl
3120

    
3121
  def CheckPrereq(self):
3122
    """Check prerequisites.
3123

3124
    This checks that the instance is in the cluster and is not running.
3125

3126
    """
3127
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3128
    assert instance is not None, \
3129
      "Cannot retrieve locked instance %s" % self.op.instance_name
3130
    _CheckNodeOnline(self, instance.primary_node)
3131

    
3132
    if instance.disk_template == constants.DT_DISKLESS:
3133
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3134
                                 self.op.instance_name)
3135
    if instance.admin_up:
3136
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3137
                                 self.op.instance_name)
3138
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3139
                                              instance.name,
3140
                                              instance.hypervisor)
3141
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3142
                      prereq=True)
3143
    if remote_info.payload:
3144
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3145
                                 (self.op.instance_name,
3146
                                  instance.primary_node))
3147

    
3148
    self.op.os_type = getattr(self.op, "os_type", None)
3149
    if self.op.os_type is not None:
3150
      # OS verification
3151
      pnode = self.cfg.GetNodeInfo(
3152
        self.cfg.ExpandNodeName(instance.primary_node))
3153
      if pnode is None:
3154
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3155
                                   self.op.pnode)
3156
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3157
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3158
                   (self.op.os_type, pnode.name), prereq=True)
3159

    
3160
    self.instance = instance
3161

    
3162
  def Exec(self, feedback_fn):
3163
    """Reinstall the instance.
3164

3165
    """
3166
    inst = self.instance
3167

    
3168
    if self.op.os_type is not None:
3169
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3170
      inst.os = self.op.os_type
3171
      self.cfg.Update(inst)
3172

    
3173
    _StartInstanceDisks(self, inst, None)
3174
    try:
3175
      feedback_fn("Running the instance OS create scripts...")
3176
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3177
      result.Raise("Could not install OS for instance %s on node %s" %
3178
                   (inst.name, inst.primary_node))
3179
    finally:
3180
      _ShutdownInstanceDisks(self, inst)
3181

    
3182

    
3183
class LURenameInstance(LogicalUnit):
3184
  """Rename an instance.
3185

3186
  """
3187
  HPATH = "instance-rename"
3188
  HTYPE = constants.HTYPE_INSTANCE
3189
  _OP_REQP = ["instance_name", "new_name"]
3190

    
3191
  def BuildHooksEnv(self):
3192
    """Build hooks env.
3193

3194
    This runs on master, primary and secondary nodes of the instance.
3195

3196
    """
3197
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3198
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3199
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3200
    return env, nl, nl
3201

    
3202
  def CheckPrereq(self):
3203
    """Check prerequisites.
3204

3205
    This checks that the instance is in the cluster and is not running.
3206

3207
    """
3208
    instance = self.cfg.GetInstanceInfo(
3209
      self.cfg.ExpandInstanceName(self.op.instance_name))
3210
    if instance is None:
3211
      raise errors.OpPrereqError("Instance '%s' not known" %
3212
                                 self.op.instance_name)
3213
    _CheckNodeOnline(self, instance.primary_node)
3214

    
3215
    if instance.admin_up:
3216
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3217
                                 self.op.instance_name)
3218
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3219
                                              instance.name,
3220
                                              instance.hypervisor)
3221
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3222
                      prereq=True)
3223
    if remote_info.payload:
3224
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3225
                                 (self.op.instance_name,
3226
                                  instance.primary_node))
3227
    self.instance = instance
3228

    
3229
    # new name verification
3230
    name_info = utils.HostInfo(self.op.new_name)
3231

    
3232
    self.op.new_name = new_name = name_info.name
3233
    instance_list = self.cfg.GetInstanceList()
3234
    if new_name in instance_list:
3235
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3236
                                 new_name)
3237

    
3238
    if not getattr(self.op, "ignore_ip", False):
3239
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3240
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3241
                                   (name_info.ip, new_name))
3242

    
3243

    
3244
  def Exec(self, feedback_fn):
3245
    """Reinstall the instance.
3246

3247
    """
3248
    inst = self.instance
3249
    old_name = inst.name
3250

    
3251
    if inst.disk_template == constants.DT_FILE:
3252
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3253

    
3254
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3255
    # Change the instance lock. This is definitely safe while we hold the BGL
3256
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3257
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3258

    
3259
    # re-read the instance from the configuration after rename
3260
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3261

    
3262
    if inst.disk_template == constants.DT_FILE:
3263
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3264
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3265
                                                     old_file_storage_dir,
3266
                                                     new_file_storage_dir)
3267
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3268
                   " (but the instance has been renamed in Ganeti)" %
3269
                   (inst.primary_node, old_file_storage_dir,
3270
                    new_file_storage_dir))
3271

    
3272
    _StartInstanceDisks(self, inst, None)
3273
    try:
3274
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3275
                                                 old_name)
3276
      msg = result.fail_msg
3277
      if msg:
3278
        msg = ("Could not run OS rename script for instance %s on node %s"
3279
               " (but the instance has been renamed in Ganeti): %s" %
3280
               (inst.name, inst.primary_node, msg))
3281
        self.proc.LogWarning(msg)
3282
    finally:
3283
      _ShutdownInstanceDisks(self, inst)
3284

    
3285

    
3286
class LURemoveInstance(LogicalUnit):
3287
  """Remove an instance.
3288

3289
  """
3290
  HPATH = "instance-remove"
3291
  HTYPE = constants.HTYPE_INSTANCE
3292
  _OP_REQP = ["instance_name", "ignore_failures"]
3293
  REQ_BGL = False
3294

    
3295
  def ExpandNames(self):
3296
    self._ExpandAndLockInstance()
3297
    self.needed_locks[locking.LEVEL_NODE] = []
3298
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3299

    
3300
  def DeclareLocks(self, level):
3301
    if level == locking.LEVEL_NODE:
3302
      self._LockInstancesNodes()
3303

    
3304
  def BuildHooksEnv(self):
3305
    """Build hooks env.
3306

3307
    This runs on master, primary and secondary nodes of the instance.
3308

3309
    """
3310
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3311
    nl = [self.cfg.GetMasterNode()]
3312
    return env, nl, nl
3313

    
3314
  def CheckPrereq(self):
3315
    """Check prerequisites.
3316

3317
    This checks that the instance is in the cluster.
3318

3319
    """
3320
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3321
    assert self.instance is not None, \
3322
      "Cannot retrieve locked instance %s" % self.op.instance_name
3323

    
3324
  def Exec(self, feedback_fn):
3325
    """Remove the instance.
3326

3327
    """
3328
    instance = self.instance
3329
    logging.info("Shutting down instance %s on node %s",
3330
                 instance.name, instance.primary_node)
3331

    
3332
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3333
    msg = result.fail_msg
3334
    if msg:
3335
      if self.op.ignore_failures:
3336
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3337
      else:
3338
        raise errors.OpExecError("Could not shutdown instance %s on"
3339
                                 " node %s: %s" %
3340
                                 (instance.name, instance.primary_node, msg))
3341

    
3342
    logging.info("Removing block devices for instance %s", instance.name)
3343

    
3344
    if not _RemoveDisks(self, instance):
3345
      if self.op.ignore_failures:
3346
        feedback_fn("Warning: can't remove instance's disks")
3347
      else:
3348
        raise errors.OpExecError("Can't remove instance's disks")
3349

    
3350
    logging.info("Removing instance %s out of cluster config", instance.name)
3351

    
3352
    self.cfg.RemoveInstance(instance.name)
3353
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3354

    
3355

    
3356
class LUQueryInstances(NoHooksLU):
3357
  """Logical unit for querying instances.
3358

3359
  """
3360
  _OP_REQP = ["output_fields", "names", "use_locking"]
3361
  REQ_BGL = False
3362
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3363
                                    "admin_state",
3364
                                    "disk_template", "ip", "mac", "bridge",
3365
                                    "nic_mode", "nic_link",
3366
                                    "sda_size", "sdb_size", "vcpus", "tags",
3367
                                    "network_port", "beparams",
3368
                                    r"(disk)\.(size)/([0-9]+)",
3369
                                    r"(disk)\.(sizes)", "disk_usage",
3370
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3371
                                    r"(nic)\.(bridge)/([0-9]+)",
3372
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3373
                                    r"(disk|nic)\.(count)",
3374
                                    "serial_no", "hypervisor", "hvparams",] +
3375
                                  ["hv/%s" % name
3376
                                   for name in constants.HVS_PARAMETERS] +
3377
                                  ["be/%s" % name
3378
                                   for name in constants.BES_PARAMETERS])
3379
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3380

    
3381

    
3382
  def ExpandNames(self):
3383
    _CheckOutputFields(static=self._FIELDS_STATIC,
3384
                       dynamic=self._FIELDS_DYNAMIC,
3385
                       selected=self.op.output_fields)
3386

    
3387
    self.needed_locks = {}
3388
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3389
    self.share_locks[locking.LEVEL_NODE] = 1
3390

    
3391
    if self.op.names:
3392
      self.wanted = _GetWantedInstances(self, self.op.names)
3393
    else:
3394
      self.wanted = locking.ALL_SET
3395

    
3396
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3397
    self.do_locking = self.do_node_query and self.op.use_locking
3398
    if self.do_locking:
3399
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3400
      self.needed_locks[locking.LEVEL_NODE] = []
3401
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3402

    
3403
  def DeclareLocks(self, level):
3404
    if level == locking.LEVEL_NODE and self.do_locking:
3405
      self._LockInstancesNodes()
3406

    
3407
  def CheckPrereq(self):
3408
    """Check prerequisites.
3409

3410
    """
3411
    pass
3412

    
3413
  def Exec(self, feedback_fn):
3414
    """Computes the list of nodes and their attributes.
3415

3416
    """
3417
    all_info = self.cfg.GetAllInstancesInfo()
3418
    if self.wanted == locking.ALL_SET:
3419
      # caller didn't specify instance names, so ordering is not important
3420
      if self.do_locking:
3421
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3422
      else:
3423
        instance_names = all_info.keys()
3424
      instance_names = utils.NiceSort(instance_names)
3425
    else:
3426
      # caller did specify names, so we must keep the ordering
3427
      if self.do_locking:
3428
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3429
      else:
3430
        tgt_set = all_info.keys()
3431
      missing = set(self.wanted).difference(tgt_set)
3432
      if missing:
3433
        raise errors.OpExecError("Some instances were removed before"
3434
                                 " retrieving their data: %s" % missing)
3435
      instance_names = self.wanted
3436

    
3437
    instance_list = [all_info[iname] for iname in instance_names]
3438

    
3439
    # begin data gathering
3440

    
3441
    nodes = frozenset([inst.primary_node for inst in instance_list])
3442
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3443

    
3444
    bad_nodes = []
3445
    off_nodes = []
3446
    if self.do_node_query:
3447
      live_data = {}
3448
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3449
      for name in nodes:
3450
        result = node_data[name]
3451
        if result.offline:
3452
          # offline nodes will be in both lists
3453
          off_nodes.append(name)
3454
        if result.failed or result.fail_msg:
3455
          bad_nodes.append(name)
3456
        else:
3457
          if result.payload:
3458
            live_data.update(result.payload)
3459
          # else no instance is alive
3460
    else:
3461
      live_data = dict([(name, {}) for name in instance_names])
3462

    
3463
    # end data gathering
3464

    
3465
    HVPREFIX = "hv/"
3466
    BEPREFIX = "be/"
3467
    output = []
3468
    cluster = self.cfg.GetClusterInfo()
3469
    for instance in instance_list:
3470
      iout = []
3471
      i_hv = cluster.FillHV(instance)
3472
      i_be = cluster.FillBE(instance)
3473
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
3474
                                 nic.nicparams) for nic in instance.nics]
3475
      for field in self.op.output_fields:
3476
        st_match = self._FIELDS_STATIC.Matches(field)
3477
        if field == "name":
3478
          val = instance.name
3479
        elif field == "os":
3480
          val = instance.os
3481
        elif field == "pnode":
3482
          val = instance.primary_node
3483
        elif field == "snodes":
3484
          val = list(instance.secondary_nodes)
3485
        elif field == "admin_state":
3486
          val = instance.admin_up
3487
        elif field == "oper_state":
3488
          if instance.primary_node in bad_nodes:
3489
            val = None
3490
          else:
3491
            val = bool(live_data.get(instance.name))
3492
        elif field == "status":
3493
          if instance.primary_node in off_nodes:
3494
            val = "ERROR_nodeoffline"
3495
          elif instance.primary_node in bad_nodes:
3496
            val = "ERROR_nodedown"
3497
          else:
3498
            running = bool(live_data.get(instance.name))
3499
            if running:
3500
              if instance.admin_up:
3501
                val = "running"
3502
              else:
3503
                val = "ERROR_up"
3504
            else:
3505
              if instance.admin_up:
3506
                val = "ERROR_down"
3507
              else:
3508
                val = "ADMIN_down"
3509
        elif field == "oper_ram":
3510
          if instance.primary_node in bad_nodes:
3511
            val = None
3512
          elif instance.name in live_data:
3513
            val = live_data[instance.name].get("memory", "?")
3514
          else:
3515
            val = "-"
3516
        elif field == "disk_template":
3517
          val = instance.disk_template
3518
        elif field == "ip":
3519
          if instance.nics:
3520
            val = instance.nics[0].ip
3521
          else:
3522
            val = None
3523
        elif field == "nic_mode":
3524
          if instance.nics:
3525
            val = i_nicp[0][constants.NIC_MODE]
3526
          else:
3527
            val = None
3528
        elif field == "nic_link":
3529
          if instance.nics:
3530
            val = i_nicp[0][constants.NIC_LINK]
3531
          else:
3532
            val = None
3533
        elif field == "bridge":
3534
          if (instance.nics and
3535
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
3536
            val = i_nicp[0][constants.NIC_LINK]
3537
          else:
3538
            val = None
3539
        elif field == "mac":
3540
          if instance.nics:
3541
            val = instance.nics[0].mac
3542
          else:
3543
            val = None
3544
        elif field == "sda_size" or field == "sdb_size":
3545
          idx = ord(field[2]) - ord('a')
3546
          try:
3547
            val = instance.FindDisk(idx).size
3548
          except errors.OpPrereqError:
3549
            val = None
3550
        elif field == "disk_usage": # total disk usage per node
3551
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
3552
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
3553
        elif field == "tags":
3554
          val = list(instance.GetTags())
3555
        elif field == "serial_no":
3556
          val = instance.serial_no
3557
        elif field == "network_port":
3558
          val = instance.network_port
3559
        elif field == "hypervisor":
3560
          val = instance.hypervisor
3561
        elif field == "hvparams":
3562
          val = i_hv
3563
        elif (field.startswith(HVPREFIX) and
3564
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
3565
          val = i_hv.get(field[len(HVPREFIX):], None)
3566
        elif field == "beparams":
3567
          val = i_be
3568
        elif (field.startswith(BEPREFIX) and
3569
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
3570
          val = i_be.get(field[len(BEPREFIX):], None)
3571
        elif st_match and st_match.groups():
3572
          # matches a variable list
3573
          st_groups = st_match.groups()
3574
          if st_groups and st_groups[0] == "disk":
3575
            if st_groups[1] == "count":
3576
              val = len(instance.disks)
3577
            elif st_groups[1] == "sizes":
3578
              val = [disk.size for disk in instance.disks]
3579
            elif st_groups[1] == "size":
3580
              try:
3581
                val = instance.FindDisk(st_groups[2]).size
3582
              except errors.OpPrereqError:
3583
                val = None
3584
            else:
3585
              assert False, "Unhandled disk parameter"
3586
          elif st_groups[0] == "nic":
3587
            if st_groups[1] == "count":
3588
              val = len(instance.nics)
3589
            elif st_groups[1] == "macs":
3590
              val = [nic.mac for nic in instance.nics]
3591
            elif st_groups[1] == "ips":
3592
              val = [nic.ip for nic in instance.nics]
3593
            elif st_groups[1] == "modes":
3594
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
3595
            elif st_groups[1] == "links":
3596
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
3597
            elif st_groups[1] == "bridges":
3598
              val = []
3599
              for nicp in i_nicp:
3600
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
3601
                  val.append(nicp[constants.NIC_LINK])
3602
                else:
3603
                  val.append(None)
3604
            else:
3605
              # index-based item
3606
              nic_idx = int(st_groups[2])
3607
              if nic_idx >= len(instance.nics):
3608
                val = None
3609
              else:
3610
                if st_groups[1] == "mac":
3611
                  val = instance.nics[nic_idx].mac
3612
                elif st_groups[1] == "ip":
3613
                  val = instance.nics[nic_idx].ip
3614
                elif st_groups[1] == "mode":
3615
                  val = i_nicp[nic_idx][constants.NIC_MODE]
3616
                elif st_groups[1] == "link":
3617
                  val = i_nicp[nic_idx][constants.NIC_LINK]
3618
                elif st_groups[1] == "bridge":
3619
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
3620
                  if nic_mode == constants.NIC_MODE_BRIDGED:
3621
                    val = i_nicp[nic_idx][constants.NIC_LINK]
3622
                  else:
3623
                    val = None
3624
                else:
3625
                  assert False, "Unhandled NIC parameter"
3626
          else:
3627
            assert False, "Unhandled variable parameter"
3628
        else:
3629
          raise errors.ParameterError(field)
3630
        iout.append(val)
3631
      output.append(iout)
3632

    
3633
    return output
3634

    
3635

    
3636
class LUFailoverInstance(LogicalUnit):
3637
  """Failover an instance.
3638

3639
  """
3640
  HPATH = "instance-failover"
3641
  HTYPE = constants.HTYPE_INSTANCE
3642
  _OP_REQP = ["instance_name", "ignore_consistency"]
3643
  REQ_BGL = False
3644

    
3645
  def ExpandNames(self):
3646
    self._ExpandAndLockInstance()
3647
    self.needed_locks[locking.LEVEL_NODE] = []
3648
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3649

    
3650
  def DeclareLocks(self, level):
3651
    if level == locking.LEVEL_NODE:
3652
      self._LockInstancesNodes()
3653

    
3654
  def BuildHooksEnv(self):
3655
    """Build hooks env.
3656

3657
    This runs on master, primary and secondary nodes of the instance.
3658

3659
    """
3660
    env = {
3661
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
3662
      }
3663
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3664
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3665
    return env, nl, nl
3666

    
3667
  def CheckPrereq(self):
3668
    """Check prerequisites.
3669

3670
    This checks that the instance is in the cluster.
3671

3672
    """
3673
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3674
    assert self.instance is not None, \
3675
      "Cannot retrieve locked instance %s" % self.op.instance_name
3676

    
3677
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3678
    if instance.disk_template not in constants.DTS_NET_MIRROR:
3679
      raise errors.OpPrereqError("Instance's disk layout is not"
3680
                                 " network mirrored, cannot failover.")
3681

    
3682
    secondary_nodes = instance.secondary_nodes
3683
    if not secondary_nodes:
3684
      raise errors.ProgrammerError("no secondary node but using "
3685
                                   "a mirrored disk template")
3686

    
3687
    target_node = secondary_nodes[0]
3688
    _CheckNodeOnline(self, target_node)
3689
    _CheckNodeNotDrained(self, target_node)
3690
    if instance.admin_up:
3691
      # check memory requirements on the secondary node
3692
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
3693
                           instance.name, bep[constants.BE_MEMORY],
3694
                           instance.hypervisor)
3695
    else:
3696
      self.LogInfo("Not checking memory on the secondary node as"
3697
                   " instance will not be started")
3698

    
3699
    # check bridge existance
3700
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3701

    
3702
  def Exec(self, feedback_fn):
3703
    """Failover an instance.
3704

3705
    The failover is done by shutting it down on its present node and
3706
    starting it on the secondary.
3707

3708
    """
3709
    instance = self.instance
3710

    
3711
    source_node = instance.primary_node
3712
    target_node = instance.secondary_nodes[0]
3713

    
3714
    feedback_fn("* checking disk consistency between source and target")
3715
    for dev in instance.disks:
3716
      # for drbd, these are drbd over lvm
3717
      if not _CheckDiskConsistency(self, dev, target_node, False):
3718
        if instance.admin_up and not self.op.ignore_consistency:
3719
          raise errors.OpExecError("Disk %s is degraded on target node,"
3720
                                   " aborting failover." % dev.iv_name)
3721

    
3722
    feedback_fn("* shutting down instance on source node")
3723
    logging.info("Shutting down instance %s on node %s",
3724
                 instance.name, source_node)
3725

    
3726
    result = self.rpc.call_instance_shutdown(source_node, instance)
3727
    msg = result.fail_msg
3728
    if msg:
3729
      if self.op.ignore_consistency:
3730
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
3731
                             " Proceeding anyway. Please make sure node"
3732
                             " %s is down. Error details: %s",
3733
                             instance.name, source_node, source_node, msg)
3734
      else:
3735
        raise errors.OpExecError("Could not shutdown instance %s on"
3736
                                 " node %s: %s" %
3737
                                 (instance.name, source_node, msg))
3738

    
3739
    feedback_fn("* deactivating the instance's disks on source node")
3740
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
3741
      raise errors.OpExecError("Can't shut down the instance's disks.")
3742

    
3743
    instance.primary_node = target_node
3744
    # distribute new instance config to the other nodes
3745
    self.cfg.Update(instance)
3746

    
3747
    # Only start the instance if it's marked as up
3748
    if instance.admin_up:
3749
      feedback_fn("* activating the instance's disks on target node")
3750
      logging.info("Starting instance %s on node %s",
3751
                   instance.name, target_node)
3752

    
3753
      disks_ok, dummy = _AssembleInstanceDisks(self, instance,
3754
                                               ignore_secondaries=True)
3755
      if not disks_ok:
3756
        _ShutdownInstanceDisks(self, instance)
3757
        raise errors.OpExecError("Can't activate the instance's disks")
3758

    
3759
      feedback_fn("* starting the instance on the target node")
3760
      result = self.rpc.call_instance_start(target_node, instance, None, None)
3761
      msg = result.fail_msg
3762
      if msg:
3763
        _ShutdownInstanceDisks(self, instance)
3764
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
3765
                                 (instance.name, target_node, msg))
3766

    
3767

    
3768
class LUMigrateInstance(LogicalUnit):
3769
  """Migrate an instance.
3770

3771
  This is migration without shutting down, compared to the failover,
3772
  which is done with shutdown.
3773

3774
  """
3775
  HPATH = "instance-migrate"
3776
  HTYPE = constants.HTYPE_INSTANCE
3777
  _OP_REQP = ["instance_name", "live", "cleanup"]
3778

    
3779
  REQ_BGL = False
3780

    
3781
  def ExpandNames(self):
3782
    self._ExpandAndLockInstance()
3783
    self.needed_locks[locking.LEVEL_NODE] = []
3784
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3785

    
3786
  def DeclareLocks(self, level):
3787
    if level == locking.LEVEL_NODE:
3788
      self._LockInstancesNodes()
3789

    
3790
  def BuildHooksEnv(self):
3791
    """Build hooks env.
3792

3793
    This runs on master, primary and secondary nodes of the instance.
3794

3795
    """
3796
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3797
    env["MIGRATE_LIVE"] = self.op.live
3798
    env["MIGRATE_CLEANUP"] = self.op.cleanup
3799
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
3800
    return env, nl, nl
3801

    
3802
  def CheckPrereq(self):
3803
    """Check prerequisites.
3804

3805
    This checks that the instance is in the cluster.
3806

3807
    """
3808
    instance = self.cfg.GetInstanceInfo(
3809
      self.cfg.ExpandInstanceName(self.op.instance_name))
3810
    if instance is None:
3811
      raise errors.OpPrereqError("Instance '%s' not known" %
3812
                                 self.op.instance_name)
3813

    
3814
    if instance.disk_template != constants.DT_DRBD8:
3815
      raise errors.OpPrereqError("Instance's disk layout is not"
3816
                                 " drbd8, cannot migrate.")
3817

    
3818
    secondary_nodes = instance.secondary_nodes
3819
    if not secondary_nodes:
3820
      raise errors.ConfigurationError("No secondary node but using"
3821
                                      " drbd8 disk template")
3822

    
3823
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
3824

    
3825
    target_node = secondary_nodes[0]
3826
    # check memory requirements on the secondary node
3827
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
3828
                         instance.name, i_be[constants.BE_MEMORY],
3829
                         instance.hypervisor)
3830

    
3831
    # check bridge existance
3832
    _CheckInstanceBridgesExist(self, instance, node=target_node)
3833

    
3834
    if not self.op.cleanup:
3835
      _CheckNodeNotDrained(self, target_node)
3836
      result = self.rpc.call_instance_migratable(instance.primary_node,
3837
                                                 instance)
3838
      result.Raise("Can't migrate, please use failover", prereq=True)
3839

    
3840
    self.instance = instance
3841

    
3842
  def _WaitUntilSync(self):
3843
    """Poll with custom rpc for disk sync.
3844

3845
    This uses our own step-based rpc call.
3846

3847
    """
3848
    self.feedback_fn("* wait until resync is done")
3849
    all_done = False
3850
    while not all_done:
3851
      all_done = True
3852
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
3853
                                            self.nodes_ip,
3854
                                            self.instance.disks)
3855
      min_percent = 100
3856
      for node, nres in result.items():
3857
        nres.Raise("Cannot resync disks on node %s" % node)
3858
        node_done, node_percent = nres.payload
3859
        all_done = all_done and node_done
3860
        if node_percent is not None:
3861
          min_percent = min(min_percent, node_percent)
3862
      if not all_done:
3863
        if min_percent < 100:
3864
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
3865
        time.sleep(2)
3866

    
3867
  def _EnsureSecondary(self, node):
3868
    """Demote a node to secondary.
3869

3870
    """
3871
    self.feedback_fn("* switching node %s to secondary mode" % node)
3872

    
3873
    for dev in self.instance.disks:
3874
      self.cfg.SetDiskID(dev, node)
3875

    
3876
    result = self.rpc.call_blockdev_close(node, self.instance.name,
3877
                                          self.instance.disks)
3878
    result.Raise("Cannot change disk to secondary on node %s" % node)
3879

    
3880
  def _GoStandalone(self):
3881
    """Disconnect from the network.
3882

3883
    """
3884
    self.feedback_fn("* changing into standalone mode")
3885
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
3886
                                               self.instance.disks)
3887
    for node, nres in result.items():
3888
      nres.Raise("Cannot disconnect disks node %s" % node)
3889

    
3890
  def _GoReconnect(self, multimaster):
3891
    """Reconnect to the network.
3892

3893
    """
3894
    if multimaster:
3895
      msg = "dual-master"
3896
    else:
3897
      msg = "single-master"
3898
    self.feedback_fn("* changing disks into %s mode" % msg)
3899
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
3900
                                           self.instance.disks,
3901
                                           self.instance.name, multimaster)
3902
    for node, nres in result.items():
3903
      nres.Raise("Cannot change disks config on node %s" % node)
3904

    
3905
  def _ExecCleanup(self):
3906
    """Try to cleanup after a failed migration.
3907

3908
    The cleanup is done by:
3909
      - check that the instance is running only on one node
3910
        (and update the config if needed)
3911
      - change disks on its secondary node to secondary
3912
      - wait until disks are fully synchronized
3913
      - disconnect from the network
3914
      - change disks into single-master mode
3915
      - wait again until disks are fully synchronized
3916

3917
    """
3918
    instance = self.instance
3919
    target_node = self.target_node
3920
    source_node = self.source_node
3921

    
3922
    # check running on only one node
3923
    self.feedback_fn("* checking where the instance actually runs"
3924
                     " (if this hangs, the hypervisor might be in"
3925
                     " a bad state)")
3926
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
3927
    for node, result in ins_l.items():
3928
      result.Raise("Can't contact node %s" % node)
3929

    
3930
    runningon_source = instance.name in ins_l[source_node].payload
3931
    runningon_target = instance.name in ins_l[target_node].payload
3932

    
3933
    if runningon_source and runningon_target:
3934
      raise errors.OpExecError("Instance seems to be running on two nodes,"
3935
                               " or the hypervisor is confused. You will have"
3936
                               " to ensure manually that it runs only on one"
3937
                               " and restart this operation.")
3938

    
3939
    if not (runningon_source or runningon_target):
3940
      raise errors.OpExecError("Instance does not seem to be running at all."
3941
                               " In this case, it's safer to repair by"
3942
                               " running 'gnt-instance stop' to ensure disk"
3943
                               " shutdown, and then restarting it.")
3944

    
3945
    if runningon_target:
3946
      # the migration has actually succeeded, we need to update the config
3947
      self.feedback_fn("* instance running on secondary node (%s),"
3948
                       " updating config" % target_node)
3949
      instance.primary_node = target_node
3950
      self.cfg.Update(instance)
3951
      demoted_node = source_node
3952
    else:
3953
      self.feedback_fn("* instance confirmed to be running on its"
3954
                       " primary node (%s)" % source_node)
3955
      demoted_node = target_node
3956

    
3957
    self._EnsureSecondary(demoted_node)
3958
    try:
3959
      self._WaitUntilSync()
3960
    except errors.OpExecError:
3961
      # we ignore here errors, since if the device is standalone, it
3962
      # won't be able to sync
3963
      pass
3964
    self._GoStandalone()
3965
    self._GoReconnect(False)
3966
    self._WaitUntilSync()
3967

    
3968
    self.feedback_fn("* done")
3969

    
3970
  def _RevertDiskStatus(self):
3971
    """Try to revert the disk status after a failed migration.
3972

3973
    """
3974
    target_node = self.target_node
3975
    try:
3976
      self._EnsureSecondary(target_node)
3977
      self._GoStandalone()
3978
      self._GoReconnect(False)
3979
      self._WaitUntilSync()
3980
    except errors.OpExecError, err:
3981
      self.LogWarning("Migration failed and I can't reconnect the"
3982
                      " drives: error '%s'\n"
3983
                      "Please look and recover the instance status" %
3984
                      str(err))
3985

    
3986
  def _AbortMigration(self):
3987
    """Call the hypervisor code to abort a started migration.
3988

3989
    """
3990
    instance = self.instance
3991
    target_node = self.target_node
3992
    migration_info = self.migration_info
3993

    
3994
    abort_result = self.rpc.call_finalize_migration(target_node,
3995
                                                    instance,
3996
                                                    migration_info,
3997
                                                    False)
3998
    abort_msg = abort_result.fail_msg
3999
    if abort_msg:
4000
      logging.error("Aborting migration failed on target node %s: %s" %
4001
                    (target_node, abort_msg))
4002
      # Don't raise an exception here, as we stil have to try to revert the
4003
      # disk status, even if this step failed.
4004

    
4005
  def _ExecMigration(self):
4006
    """Migrate an instance.
4007

4008
    The migrate is done by:
4009
      - change the disks into dual-master mode
4010
      - wait until disks are fully synchronized again
4011
      - migrate the instance
4012
      - change disks on the new secondary node (the old primary) to secondary
4013
      - wait until disks are fully synchronized
4014
      - change disks into single-master mode
4015

4016
    """
4017
    instance = self.instance
4018
    target_node = self.target_node
4019
    source_node = self.source_node
4020

    
4021
    self.feedback_fn("* checking disk consistency between source and target")
4022
    for dev in instance.disks:
4023
      if not _CheckDiskConsistency(self, dev, target_node, False):
4024
        raise errors.OpExecError("Disk %s is degraded or not fully"
4025
                                 " synchronized on target node,"
4026
                                 " aborting migrate." % dev.iv_name)
4027

    
4028
    # First get the migration information from the remote node
4029
    result = self.rpc.call_migration_info(source_node, instance)
4030
    msg = result.fail_msg
4031
    if msg:
4032
      log_err = ("Failed fetching source migration information from %s: %s" %
4033
                 (source_node, msg))
4034
      logging.error(log_err)
4035
      raise errors.OpExecError(log_err)
4036

    
4037
    self.migration_info = migration_info = result.payload
4038

    
4039
    # Then switch the disks to master/master mode
4040
    self._EnsureSecondary(target_node)
4041
    self._GoStandalone()
4042
    self._GoReconnect(True)
4043
    self._WaitUntilSync()
4044

    
4045
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4046
    result = self.rpc.call_accept_instance(target_node,
4047
                                           instance,
4048
                                           migration_info,
4049
                                           self.nodes_ip[target_node])
4050

    
4051
    msg = result.fail_msg
4052
    if msg:
4053
      logging.error("Instance pre-migration failed, trying to revert"
4054
                    " disk status: %s", msg)
4055
      self._AbortMigration()
4056
      self._RevertDiskStatus()
4057
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4058
                               (instance.name, msg))
4059

    
4060
    self.feedback_fn("* migrating instance to %s" % target_node)
4061
    time.sleep(10)
4062
    result = self.rpc.call_instance_migrate(source_node, instance,
4063
                                            self.nodes_ip[target_node],
4064
                                            self.op.live)
4065
    msg = result.fail_msg
4066
    if msg:
4067
      logging.error("Instance migration failed, trying to revert"
4068
                    " disk status: %s", msg)
4069
      self._AbortMigration()
4070
      self._RevertDiskStatus()
4071
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4072
                               (instance.name, msg))
4073
    time.sleep(10)
4074

    
4075
    instance.primary_node = target_node
4076
    # distribute new instance config to the other nodes
4077
    self.cfg.Update(instance)
4078

    
4079
    result = self.rpc.call_finalize_migration(target_node,
4080
                                              instance,
4081
                                              migration_info,
4082
                                              True)
4083
    msg = result.fail_msg
4084
    if msg:
4085
      logging.error("Instance migration succeeded, but finalization failed:"
4086
                    " %s" % msg)
4087
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4088
                               msg)
4089

    
4090
    self._EnsureSecondary(source_node)
4091
    self._WaitUntilSync()
4092
    self._GoStandalone()
4093
    self._GoReconnect(False)
4094
    self._WaitUntilSync()
4095

    
4096
    self.feedback_fn("* done")
4097

    
4098
  def Exec(self, feedback_fn):
4099
    """Perform the migration.
4100

4101
    """
4102
    self.feedback_fn = feedback_fn
4103

    
4104
    self.source_node = self.instance.primary_node
4105
    self.target_node = self.instance.secondary_nodes[0]
4106
    self.all_nodes = [self.source_node, self.target_node]
4107
    self.nodes_ip = {
4108
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4109
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4110
      }
4111
    if self.op.cleanup:
4112
      return self._ExecCleanup()
4113
    else:
4114
      return self._ExecMigration()
4115

    
4116

    
4117
def _CreateBlockDev(lu, node, instance, device, force_create,
4118
                    info, force_open):
4119
  """Create a tree of block devices on a given node.
4120

4121
  If this device type has to be created on secondaries, create it and
4122
  all its children.
4123

4124
  If not, just recurse to children keeping the same 'force' value.
4125

4126
  @param lu: the lu on whose behalf we execute
4127
  @param node: the node on which to create the device
4128
  @type instance: L{objects.Instance}
4129
  @param instance: the instance which owns the device
4130
  @type device: L{objects.Disk}
4131
  @param device: the device to create
4132
  @type force_create: boolean
4133
  @param force_create: whether to force creation of this device; this
4134
      will be change to True whenever we find a device which has
4135
      CreateOnSecondary() attribute
4136
  @param info: the extra 'metadata' we should attach to the device
4137
      (this will be represented as a LVM tag)
4138
  @type force_open: boolean
4139
  @param force_open: this parameter will be passes to the
4140
      L{backend.BlockdevCreate} function where it specifies
4141
      whether we run on primary or not, and it affects both
4142
      the child assembly and the device own Open() execution
4143

4144
  """
4145
  if device.CreateOnSecondary():
4146
    force_create = True
4147

    
4148
  if device.children:
4149
    for child in device.children:
4150
      _CreateBlockDev(lu, node, instance, child, force_create,
4151
                      info, force_open)
4152

    
4153
  if not force_create:
4154
    return
4155

    
4156
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4157

    
4158

    
4159
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4160
  """Create a single block device on a given node.
4161

4162
  This will not recurse over children of the device, so they must be
4163
  created in advance.
4164

4165
  @param lu: the lu on whose behalf we execute
4166
  @param node: the node on which to create the device
4167
  @type instance: L{objects.Instance}
4168
  @param instance: the instance which owns the device
4169
  @type device: L{objects.Disk}
4170
  @param device: the device to create
4171
  @param info: the extra 'metadata' we should attach to the device
4172
      (this will be represented as a LVM tag)
4173
  @type force_open: boolean
4174
  @param force_open: this parameter will be passes to the
4175
      L{backend.BlockdevCreate} function where it specifies
4176
      whether we run on primary or not, and it affects both
4177
      the child assembly and the device own Open() execution
4178

4179
  """
4180
  lu.cfg.SetDiskID(device, node)
4181
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4182
                                       instance.name, force_open, info)
4183
  result.Raise("Can't create block device %s on"
4184
               " node %s for instance %s" % (device, node, instance.name))
4185
  if device.physical_id is None:
4186
    device.physical_id = result.payload
4187

    
4188

    
4189
def _GenerateUniqueNames(lu, exts):
4190
  """Generate a suitable LV name.
4191

4192
  This will generate a logical volume name for the given instance.
4193

4194
  """
4195
  results = []
4196
  for val in exts:
4197
    new_id = lu.cfg.GenerateUniqueID()
4198
    results.append("%s%s" % (new_id, val))
4199
  return results
4200

    
4201

    
4202
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4203
                         p_minor, s_minor):
4204
  """Generate a drbd8 device complete with its children.
4205

4206
  """
4207
  port = lu.cfg.AllocatePort()
4208
  vgname = lu.cfg.GetVGName()
4209
  shared_secret = lu.cfg.GenerateDRBDSecret()
4210
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4211
                          logical_id=(vgname, names[0]))
4212
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4213
                          logical_id=(vgname, names[1]))
4214
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4215
                          logical_id=(primary, secondary, port,
4216
                                      p_minor, s_minor,
4217
                                      shared_secret),
4218
                          children=[dev_data, dev_meta],
4219
                          iv_name=iv_name)
4220
  return drbd_dev
4221

    
4222

    
4223
def _GenerateDiskTemplate(lu, template_name,
4224
                          instance_name, primary_node,
4225
                          secondary_nodes, disk_info,
4226
                          file_storage_dir, file_driver,
4227
                          base_index):
4228
  """Generate the entire disk layout for a given template type.
4229

4230
  """
4231
  #TODO: compute space requirements
4232

    
4233
  vgname = lu.cfg.GetVGName()
4234
  disk_count = len(disk_info)
4235
  disks = []
4236
  if template_name == constants.DT_DISKLESS:
4237
    pass
4238
  elif template_name == constants.DT_PLAIN:
4239
    if len(secondary_nodes) != 0:
4240
      raise errors.ProgrammerError("Wrong template configuration")
4241

    
4242
    names = _GenerateUniqueNames(lu, [".disk%d" % i
4243
                                      for i in range(disk_count)])
4244
    for idx, disk in enumerate(disk_info):
4245
      disk_index = idx + base_index
4246
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4247
                              logical_id=(vgname, names[idx]),
4248
                              iv_name="disk/%d" % disk_index,
4249
                              mode=disk["mode"])
4250
      disks.append(disk_dev)
4251
  elif template_name == constants.DT_DRBD8:
4252
    if len(secondary_nodes) != 1:
4253
      raise errors.ProgrammerError("Wrong template configuration")
4254
    remote_node = secondary_nodes[0]
4255
    minors = lu.cfg.AllocateDRBDMinor(
4256
      [primary_node, remote_node] * len(disk_info), instance_name)
4257

    
4258
    names = []
4259
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % i
4260
                                               for i in range(disk_count)]):
4261
      names.append(lv_prefix + "_data")
4262
      names.append(lv_prefix + "_meta")
4263
    for idx, disk in enumerate(disk_info):
4264
      disk_index = idx + base_index
4265
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4266
                                      disk["size"], names[idx*2:idx*2+2],
4267
                                      "disk/%d" % disk_index,
4268
                                      minors[idx*2], minors[idx*2+1])
4269
      disk_dev.mode = disk["mode"]
4270
      disks.append(disk_dev)
4271
  elif template_name == constants.DT_FILE:
4272
    if len(secondary_nodes) != 0:
4273
      raise errors.ProgrammerError("Wrong template configuration")
4274

    
4275
    for idx, disk in enumerate(disk_info):
4276
      disk_index = idx + base_index
4277
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4278
                              iv_name="disk/%d" % disk_index,
4279
                              logical_id=(file_driver,
4280
                                          "%s/disk%d" % (file_storage_dir,
4281
                                                         disk_index)),
4282
                              mode=disk["mode"])
4283
      disks.append(disk_dev)
4284
  else:
4285
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4286
  return disks
4287

    
4288

    
4289
def _GetInstanceInfoText(instance):
4290
  """Compute that text that should be added to the disk's metadata.
4291

4292
  """
4293
  return "originstname+%s" % instance.name
4294

    
4295

    
4296
def _CreateDisks(lu, instance):
4297
  """Create all disks for an instance.
4298

4299
  This abstracts away some work from AddInstance.
4300

4301
  @type lu: L{LogicalUnit}
4302
  @param lu: the logical unit on whose behalf we execute
4303
  @type instance: L{objects.Instance}
4304
  @param instance: the instance whose disks we should create
4305
  @rtype: boolean
4306
  @return: the success of the creation
4307

4308
  """
4309
  info = _GetInstanceInfoText(instance)
4310
  pnode = instance.primary_node
4311

    
4312
  if instance.disk_template == constants.DT_FILE:
4313
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4314
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4315

    
4316
    result.Raise("Failed to create directory '%s' on"
4317
                 " node %s: %s" % (file_storage_dir, pnode))
4318

    
4319
  # Note: this needs to be kept in sync with adding of disks in
4320
  # LUSetInstanceParams
4321
  for device in instance.disks:
4322
    logging.info("Creating volume %s for instance %s",
4323
                 device.iv_name, instance.name)
4324
    #HARDCODE
4325
    for node in instance.all_nodes:
4326
      f_create = node == pnode
4327
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4328

    
4329

    
4330
def _RemoveDisks(lu, instance):
4331
  """Remove all disks for an instance.
4332

4333
  This abstracts away some work from `AddInstance()` and
4334
  `RemoveInstance()`. Note that in case some of the devices couldn't
4335
  be removed, the removal will continue with the other ones (compare
4336
  with `_CreateDisks()`).
4337

4338
  @type lu: L{LogicalUnit}
4339
  @param lu: the logical unit on whose behalf we execute
4340
  @type instance: L{objects.Instance}
4341
  @param instance: the instance whose disks we should remove
4342
  @rtype: boolean
4343
  @return: the success of the removal
4344

4345
  """
4346
  logging.info("Removing block devices for instance %s", instance.name)
4347

    
4348
  all_result = True
4349
  for device in instance.disks:
4350
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4351
      lu.cfg.SetDiskID(disk, node)
4352
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
4353
      if msg:
4354
        lu.LogWarning("Could not remove block device %s on node %s,"
4355
                      " continuing anyway: %s", device.iv_name, node, msg)
4356
        all_result = False
4357

    
4358
  if instance.disk_template == constants.DT_FILE:
4359
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4360
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4361
                                                 file_storage_dir)
4362
    msg = result.fail_msg
4363
    if msg:
4364
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
4365
                    file_storage_dir, instance.primary_node, msg)
4366
      all_result = False
4367

    
4368
  return all_result
4369

    
4370

    
4371
def _ComputeDiskSize(disk_template, disks):
4372
  """Compute disk size requirements in the volume group
4373

4374
  """
4375
  # Required free disk space as a function of disk and swap space
4376
  req_size_dict = {
4377
    constants.DT_DISKLESS: None,
4378
    constants.DT_PLAIN: sum(d["size"] for d in disks),
4379
    # 128 MB are added for drbd metadata for each disk
4380
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
4381
    constants.DT_FILE: None,
4382
  }
4383

    
4384
  if disk_template not in req_size_dict:
4385
    raise errors.ProgrammerError("Disk template '%s' size requirement"
4386
                                 " is unknown" %  disk_template)
4387

    
4388
  return req_size_dict[disk_template]
4389

    
4390

    
4391
def _CheckHVParams(lu, nodenames, hvname, hvparams):
4392
  """Hypervisor parameter validation.
4393

4394
  This function abstract the hypervisor parameter validation to be
4395
  used in both instance create and instance modify.
4396

4397
  @type lu: L{LogicalUnit}
4398
  @param lu: the logical unit for which we check
4399
  @type nodenames: list
4400
  @param nodenames: the list of nodes on which we should check
4401
  @type hvname: string
4402
  @param hvname: the name of the hypervisor we should use
4403
  @type hvparams: dict
4404
  @param hvparams: the parameters which we need to check
4405
  @raise errors.OpPrereqError: if the parameters are not valid
4406

4407
  """
4408
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
4409
                                                  hvname,
4410
                                                  hvparams)
4411
  for node in nodenames:
4412
    info = hvinfo[node]
4413
    if info.offline:
4414
      continue
4415
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
4416

    
4417

    
4418
class LUCreateInstance(LogicalUnit):
4419
  """Create an instance.
4420

4421
  """
4422
  HPATH = "instance-add"
4423
  HTYPE = constants.HTYPE_INSTANCE
4424
  _OP_REQP = ["instance_name", "disks", "disk_template",
4425
              "mode", "start",
4426
              "wait_for_sync", "ip_check", "nics",
4427
              "hvparams", "beparams"]
4428
  REQ_BGL = False
4429

    
4430
  def _ExpandNode(self, node):
4431
    """Expands and checks one node name.
4432

4433
    """
4434
    node_full = self.cfg.ExpandNodeName(node)
4435
    if node_full is None:
4436
      raise errors.OpPrereqError("Unknown node %s" % node)
4437
    return node_full
4438

    
4439
  def ExpandNames(self):
4440
    """ExpandNames for CreateInstance.
4441

4442
    Figure out the right locks for instance creation.
4443

4444
    """
4445
    self.needed_locks = {}
4446

    
4447
    # set optional parameters to none if they don't exist
4448
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
4449
      if not hasattr(self.op, attr):
4450
        setattr(self.op, attr, None)
4451

    
4452
    # cheap checks, mostly valid constants given
4453

    
4454
    # verify creation mode
4455
    if self.op.mode not in (constants.INSTANCE_CREATE,
4456
                            constants.INSTANCE_IMPORT):
4457
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
4458
                                 self.op.mode)
4459

    
4460
    # disk template and mirror node verification
4461
    if self.op.disk_template not in constants.DISK_TEMPLATES:
4462
      raise errors.OpPrereqError("Invalid disk template name")
4463

    
4464
    if self.op.hypervisor is None:
4465
      self.op.hypervisor = self.cfg.GetHypervisorType()
4466

    
4467
    cluster = self.cfg.GetClusterInfo()
4468
    enabled_hvs = cluster.enabled_hypervisors
4469
    if self.op.hypervisor not in enabled_hvs:
4470
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
4471
                                 " cluster (%s)" % (self.op.hypervisor,
4472
                                  ",".join(enabled_hvs)))
4473

    
4474
    # check hypervisor parameter syntax (locally)
4475
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4476
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
4477
                                  self.op.hvparams)
4478
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
4479
    hv_type.CheckParameterSyntax(filled_hvp)
4480
    self.hv_full = filled_hvp
4481

    
4482
    # fill and remember the beparams dict
4483
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4484
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
4485
                                    self.op.beparams)
4486

    
4487
    #### instance parameters check
4488

    
4489
    # instance name verification
4490
    hostname1 = utils.HostInfo(self.op.instance_name)
4491
    self.op.instance_name = instance_name = hostname1.name
4492

    
4493
    # this is just a preventive check, but someone might still add this
4494
    # instance in the meantime, and creation will fail at lock-add time
4495
    if instance_name in self.cfg.GetInstanceList():
4496
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4497
                                 instance_name)
4498

    
4499
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
4500

    
4501
    # NIC buildup
4502
    self.nics = []
4503
    for idx, nic in enumerate(self.op.nics):
4504
      nic_mode_req = nic.get("mode", None)
4505
      nic_mode = nic_mode_req
4506
      if nic_mode is None:
4507
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
4508

    
4509
      # in routed mode, for the first nic, the default ip is 'auto'
4510
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
4511
        default_ip_mode = constants.VALUE_AUTO
4512
      else:
4513
        default_ip_mode = constants.VALUE_NONE
4514

    
4515
      # ip validity checks
4516
      ip = nic.get("ip", default_ip_mode)
4517
      if ip is None or ip.lower() == constants.VALUE_NONE:
4518
        nic_ip = None
4519
      elif ip.lower() == constants.VALUE_AUTO:
4520
        nic_ip = hostname1.ip
4521
      else:
4522
        if not utils.IsValidIP(ip):
4523
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
4524
                                     " like a valid IP" % ip)
4525
        nic_ip = ip
4526

    
4527
      # TODO: check the ip for uniqueness !!
4528
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
4529
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
4530

    
4531
      # MAC address verification
4532
      mac = nic.get("mac", constants.VALUE_AUTO)
4533
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4534
        if not utils.IsValidMac(mac.lower()):
4535
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
4536
                                     mac)
4537
      # bridge verification
4538
      bridge = nic.get("bridge", None)
4539
      link = nic.get("link", None)
4540
      if bridge and link:
4541
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
4542
                                   " at the same time")
4543
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
4544
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
4545
      elif bridge:
4546
        link = bridge
4547

    
4548
      nicparams = {}
4549
      if nic_mode_req:
4550
        nicparams[constants.NIC_MODE] = nic_mode_req
4551
      if link:
4552
        nicparams[constants.NIC_LINK] = link
4553

    
4554
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4555
                                      nicparams)
4556
      objects.NIC.CheckParameterSyntax(check_params)
4557
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
4558

    
4559
    # disk checks/pre-build
4560
    self.disks = []
4561
    for disk in self.op.disks:
4562
      mode = disk.get("mode", constants.DISK_RDWR)
4563
      if mode not in constants.DISK_ACCESS_SET:
4564
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
4565
                                   mode)
4566
      size = disk.get("size", None)
4567
      if size is None:
4568
        raise errors.OpPrereqError("Missing disk size")
4569
      try:
4570
        size = int(size)
4571
      except ValueError:
4572
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
4573
      self.disks.append({"size": size, "mode": mode})
4574

    
4575
    # used in CheckPrereq for ip ping check
4576
    self.check_ip = hostname1.ip
4577

    
4578
    # file storage checks
4579
    if (self.op.file_driver and
4580
        not self.op.file_driver in constants.FILE_DRIVER):
4581
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
4582
                                 self.op.file_driver)
4583

    
4584
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
4585
      raise errors.OpPrereqError("File storage directory path not absolute")
4586

    
4587
    ### Node/iallocator related checks
4588
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
4589
      raise errors.OpPrereqError("One and only one of iallocator and primary"
4590
                                 " node must be given")
4591

    
4592
    if self.op.iallocator:
4593
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4594
    else:
4595
      self.op.pnode = self._ExpandNode(self.op.pnode)
4596
      nodelist = [self.op.pnode]
4597
      if self.op.snode is not None:
4598
        self.op.snode = self._ExpandNode(self.op.snode)
4599
        nodelist.append(self.op.snode)
4600
      self.needed_locks[locking.LEVEL_NODE] = nodelist
4601

    
4602
    # in case of import lock the source node too
4603
    if self.op.mode == constants.INSTANCE_IMPORT:
4604
      src_node = getattr(self.op, "src_node", None)
4605
      src_path = getattr(self.op, "src_path", None)
4606

    
4607
      if src_path is None:
4608
        self.op.src_path = src_path = self.op.instance_name
4609

    
4610
      if src_node is None:
4611
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4612
        self.op.src_node = None
4613
        if os.path.isabs(src_path):
4614
          raise errors.OpPrereqError("Importing an instance from an absolute"
4615
                                     " path requires a source node option.")
4616
      else:
4617
        self.op.src_node = src_node = self._ExpandNode(src_node)
4618
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4619
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
4620
        if not os.path.isabs(src_path):
4621
          self.op.src_path = src_path = \
4622
            os.path.join(constants.EXPORT_DIR, src_path)
4623

    
4624
    else: # INSTANCE_CREATE
4625
      if getattr(self.op, "os_type", None) is None:
4626
        raise errors.OpPrereqError("No guest OS specified")
4627

    
4628
  def _RunAllocator(self):
4629
    """Run the allocator based on input opcode.
4630

4631
    """
4632
    nics = [n.ToDict() for n in self.nics]
4633
    ial = IAllocator(self,
4634
                     mode=constants.IALLOCATOR_MODE_ALLOC,
4635
                     name=self.op.instance_name,
4636
                     disk_template=self.op.disk_template,
4637
                     tags=[],
4638
                     os=self.op.os_type,
4639
                     vcpus=self.be_full[constants.BE_VCPUS],
4640
                     mem_size=self.be_full[constants.BE_MEMORY],
4641
                     disks=self.disks,
4642
                     nics=nics,
4643
                     hypervisor=self.op.hypervisor,
4644
                     )
4645

    
4646
    ial.Run(self.op.iallocator)
4647

    
4648
    if not ial.success:
4649
      raise errors.OpPrereqError("Can't compute nodes using"
4650
                                 " iallocator '%s': %s" % (self.op.iallocator,
4651
                                                           ial.info))
4652
    if len(ial.nodes) != ial.required_nodes:
4653
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
4654
                                 " of nodes (%s), required %s" %
4655
                                 (self.op.iallocator, len(ial.nodes),
4656
                                  ial.required_nodes))
4657
    self.op.pnode = ial.nodes[0]
4658
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
4659
                 self.op.instance_name, self.op.iallocator,
4660
                 ", ".join(ial.nodes))
4661
    if ial.required_nodes == 2:
4662
      self.op.snode = ial.nodes[1]
4663

    
4664
  def BuildHooksEnv(self):
4665
    """Build hooks env.
4666

4667
    This runs on master, primary and secondary nodes of the instance.
4668

4669
    """
4670
    env = {
4671
      "ADD_MODE": self.op.mode,
4672
      }
4673
    if self.op.mode == constants.INSTANCE_IMPORT:
4674
      env["SRC_NODE"] = self.op.src_node
4675
      env["SRC_PATH"] = self.op.src_path
4676
      env["SRC_IMAGES"] = self.src_images
4677

    
4678
    env.update(_BuildInstanceHookEnv(
4679
      name=self.op.instance_name,
4680
      primary_node=self.op.pnode,
4681
      secondary_nodes=self.secondaries,
4682
      status=self.op.start,
4683
      os_type=self.op.os_type,
4684
      memory=self.be_full[constants.BE_MEMORY],
4685
      vcpus=self.be_full[constants.BE_VCPUS],
4686
      nics=_NICListToTuple(self, self.nics),
4687
      disk_template=self.op.disk_template,
4688
      disks=[(d["size"], d["mode"]) for d in self.disks],
4689
      bep=self.be_full,
4690
      hvp=self.hv_full,
4691
      hypervisor=self.op.hypervisor,
4692
    ))
4693

    
4694
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
4695
          self.secondaries)
4696
    return env, nl, nl
4697

    
4698

    
4699
  def CheckPrereq(self):
4700
    """Check prerequisites.
4701

4702
    """
4703
    if (not self.cfg.GetVGName() and
4704
        self.op.disk_template not in constants.DTS_NOT_LVM):
4705
      raise errors.OpPrereqError("Cluster does not support lvm-based"
4706
                                 " instances")
4707

    
4708
    if self.op.mode == constants.INSTANCE_IMPORT:
4709
      src_node = self.op.src_node
4710
      src_path = self.op.src_path
4711

    
4712
      if src_node is None:
4713
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
4714
        exp_list = self.rpc.call_export_list(locked_nodes)
4715
        found = False
4716
        for node in exp_list:
4717
          if exp_list[node].fail_msg:
4718
            continue
4719
          if src_path in exp_list[node].payload:
4720
            found = True
4721
            self.op.src_node = src_node = node
4722
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
4723
                                                       src_path)
4724
            break
4725
        if not found:
4726
          raise errors.OpPrereqError("No export found for relative path %s" %
4727
                                      src_path)
4728

    
4729
      _CheckNodeOnline(self, src_node)
4730
      result = self.rpc.call_export_info(src_node, src_path)
4731
      result.Raise("No export or invalid export found in dir %s" % src_path)
4732

    
4733
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
4734
      if not export_info.has_section(constants.INISECT_EXP):
4735
        raise errors.ProgrammerError("Corrupted export config")
4736

    
4737
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
4738
      if (int(ei_version) != constants.EXPORT_VERSION):
4739
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
4740
                                   (ei_version, constants.EXPORT_VERSION))
4741

    
4742
      # Check that the new instance doesn't have less disks than the export
4743
      instance_disks = len(self.disks)
4744
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
4745
      if instance_disks < export_disks:
4746
        raise errors.OpPrereqError("Not enough disks to import."
4747
                                   " (instance: %d, export: %d)" %
4748
                                   (instance_disks, export_disks))
4749

    
4750
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
4751
      disk_images = []
4752
      for idx in range(export_disks):
4753
        option = 'disk%d_dump' % idx
4754
        if export_info.has_option(constants.INISECT_INS, option):
4755
          # FIXME: are the old os-es, disk sizes, etc. useful?
4756
          export_name = export_info.get(constants.INISECT_INS, option)
4757
          image = os.path.join(src_path, export_name)
4758
          disk_images.append(image)
4759
        else:
4760
          disk_images.append(False)
4761

    
4762
      self.src_images = disk_images
4763

    
4764
      old_name = export_info.get(constants.INISECT_INS, 'name')
4765
      # FIXME: int() here could throw a ValueError on broken exports
4766
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
4767
      if self.op.instance_name == old_name:
4768
        for idx, nic in enumerate(self.nics):
4769
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
4770
            nic_mac_ini = 'nic%d_mac' % idx
4771
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
4772

    
4773
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
4774
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
4775
    if self.op.start and not self.op.ip_check:
4776
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
4777
                                 " adding an instance in start mode")
4778

    
4779
    if self.op.ip_check:
4780
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
4781
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
4782
                                   (self.check_ip, self.op.instance_name))
4783

    
4784
    #### mac address generation
4785
    # By generating here the mac address both the allocator and the hooks get
4786
    # the real final mac address rather than the 'auto' or 'generate' value.
4787
    # There is a race condition between the generation and the instance object
4788
    # creation, which means that we know the mac is valid now, but we're not
4789
    # sure it will be when we actually add the instance. If things go bad
4790
    # adding the instance will abort because of a duplicate mac, and the
4791
    # creation job will fail.
4792
    for nic in self.nics:
4793
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
4794
        nic.mac = self.cfg.GenerateMAC()
4795

    
4796
    #### allocator run
4797

    
4798
    if self.op.iallocator is not None:
4799
      self._RunAllocator()
4800

    
4801
    #### node related checks
4802

    
4803
    # check primary node
4804
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
4805
    assert self.pnode is not None, \
4806
      "Cannot retrieve locked node %s" % self.op.pnode
4807
    if pnode.offline:
4808
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
4809
                                 pnode.name)
4810
    if pnode.drained:
4811
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
4812
                                 pnode.name)
4813

    
4814
    self.secondaries = []
4815

    
4816
    # mirror node verification
4817
    if self.op.disk_template in constants.DTS_NET_MIRROR:
4818
      if self.op.snode is None:
4819
        raise errors.OpPrereqError("The networked disk templates need"
4820
                                   " a mirror node")
4821
      if self.op.snode == pnode.name:
4822
        raise errors.OpPrereqError("The secondary node cannot be"
4823
                                   " the primary node.")
4824
      _CheckNodeOnline(self, self.op.snode)
4825
      _CheckNodeNotDrained(self, self.op.snode)
4826
      self.secondaries.append(self.op.snode)
4827

    
4828
    nodenames = [pnode.name] + self.secondaries
4829

    
4830
    req_size = _ComputeDiskSize(self.op.disk_template,
4831
                                self.disks)
4832

    
4833
    # Check lv size requirements
4834
    if req_size is not None:
4835
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
4836
                                         self.op.hypervisor)
4837
      for node in nodenames:
4838
        info = nodeinfo[node]
4839
        info.Raise("Cannot get current information from node %s" % node)
4840
        info = info.payload
4841
        vg_free = info.get('vg_free', None)
4842
        if not isinstance(vg_free, int):
4843
          raise errors.OpPrereqError("Can't compute free disk space on"
4844
                                     " node %s" % node)
4845
        if req_size > vg_free:
4846
          raise errors.OpPrereqError("Not enough disk space on target node %s."
4847
                                     " %d MB available, %d MB required" %
4848
                                     (node, vg_free, req_size))
4849

    
4850
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
4851

    
4852
    # os verification
4853
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
4854
    result.Raise("OS '%s' not in supported os list for primary node %s" %
4855
                 (self.op.os_type, pnode.name), prereq=True)
4856

    
4857
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
4858

    
4859
    # memory check on primary node
4860
    if self.op.start:
4861
      _CheckNodeFreeMemory(self, self.pnode.name,
4862
                           "creating instance %s" % self.op.instance_name,
4863
                           self.be_full[constants.BE_MEMORY],
4864
                           self.op.hypervisor)
4865

    
4866
  def Exec(self, feedback_fn):
4867
    """Create and add the instance to the cluster.
4868

4869
    """
4870
    instance = self.op.instance_name
4871
    pnode_name = self.pnode.name
4872

    
4873
    ht_kind = self.op.hypervisor
4874
    if ht_kind in constants.HTS_REQ_PORT:
4875
      network_port = self.cfg.AllocatePort()
4876
    else:
4877
      network_port = None
4878

    
4879
    ##if self.op.vnc_bind_address is None:
4880
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
4881

    
4882
    # this is needed because os.path.join does not accept None arguments
4883
    if self.op.file_storage_dir is None:
4884
      string_file_storage_dir = ""
4885
    else:
4886
      string_file_storage_dir = self.op.file_storage_dir
4887

    
4888
    # build the full file storage dir path
4889
    file_storage_dir = os.path.normpath(os.path.join(
4890
                                        self.cfg.GetFileStorageDir(),
4891
                                        string_file_storage_dir, instance))
4892

    
4893

    
4894
    disks = _GenerateDiskTemplate(self,
4895
                                  self.op.disk_template,
4896
                                  instance, pnode_name,
4897
                                  self.secondaries,
4898
                                  self.disks,
4899
                                  file_storage_dir,
4900
                                  self.op.file_driver,
4901
                                  0)
4902

    
4903
    iobj = objects.Instance(name=instance, os=self.op.os_type,
4904
                            primary_node=pnode_name,
4905
                            nics=self.nics, disks=disks,
4906
                            disk_template=self.op.disk_template,
4907
                            admin_up=False,
4908
                            network_port=network_port,
4909
                            beparams=self.op.beparams,
4910
                            hvparams=self.op.hvparams,
4911
                            hypervisor=self.op.hypervisor,
4912
                            )
4913

    
4914
    feedback_fn("* creating instance disks...")
4915
    try:
4916
      _CreateDisks(self, iobj)
4917
    except errors.OpExecError:
4918
      self.LogWarning("Device creation failed, reverting...")
4919
      try:
4920
        _RemoveDisks(self, iobj)
4921
      finally:
4922
        self.cfg.ReleaseDRBDMinors(instance)
4923
        raise
4924

    
4925
    feedback_fn("adding instance %s to cluster config" % instance)
4926

    
4927
    self.cfg.AddInstance(iobj)
4928
    # Declare that we don't want to remove the instance lock anymore, as we've
4929
    # added the instance to the config
4930
    del self.remove_locks[locking.LEVEL_INSTANCE]
4931
    # Unlock all the nodes
4932
    if self.op.mode == constants.INSTANCE_IMPORT:
4933
      nodes_keep = [self.op.src_node]
4934
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
4935
                       if node != self.op.src_node]
4936
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
4937
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
4938
    else:
4939
      self.context.glm.release(locking.LEVEL_NODE)
4940
      del self.acquired_locks[locking.LEVEL_NODE]
4941

    
4942
    if self.op.wait_for_sync:
4943
      disk_abort = not _WaitForSync(self, iobj)
4944
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
4945
      # make sure the disks are not degraded (still sync-ing is ok)
4946
      time.sleep(15)
4947
      feedback_fn("* checking mirrors status")
4948
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
4949
    else:
4950
      disk_abort = False
4951

    
4952
    if disk_abort:
4953
      _RemoveDisks(self, iobj)
4954
      self.cfg.RemoveInstance(iobj.name)
4955
      # Make sure the instance lock gets removed
4956
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
4957
      raise errors.OpExecError("There are some degraded disks for"
4958
                               " this instance")
4959

    
4960
    feedback_fn("creating os for instance %s on node %s" %
4961
                (instance, pnode_name))
4962

    
4963
    if iobj.disk_template != constants.DT_DISKLESS:
4964
      if self.op.mode == constants.INSTANCE_CREATE:
4965
        feedback_fn("* running the instance OS create scripts...")
4966
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
4967
        result.Raise("Could not add os for instance %s"
4968
                     " on node %s" % (instance, pnode_name))
4969

    
4970
      elif self.op.mode == constants.INSTANCE_IMPORT:
4971
        feedback_fn("* running the instance OS import scripts...")
4972
        src_node = self.op.src_node
4973
        src_images = self.src_images
4974
        cluster_name = self.cfg.GetClusterName()
4975
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
4976
                                                         src_node, src_images,
4977
                                                         cluster_name)
4978
        msg = import_result.fail_msg
4979
        if msg:
4980
          self.LogWarning("Error while importing the disk images for instance"
4981
                          " %s on node %s: %s" % (instance, pnode_name, msg))
4982
      else:
4983
        # also checked in the prereq part
4984
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
4985
                                     % self.op.mode)
4986

    
4987
    if self.op.start:
4988
      iobj.admin_up = True
4989
      self.cfg.Update(iobj)
4990
      logging.info("Starting instance %s on node %s", instance, pnode_name)
4991
      feedback_fn("* starting instance...")
4992
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
4993
      result.Raise("Could not start instance")
4994

    
4995

    
4996
class LUConnectConsole(NoHooksLU):
4997
  """Connect to an instance's console.
4998

4999
  This is somewhat special in that it returns the command line that
5000
  you need to run on the master node in order to connect to the
5001
  console.
5002

5003
  """
5004
  _OP_REQP = ["instance_name"]
5005
  REQ_BGL = False
5006

    
5007
  def ExpandNames(self):
5008
    self._ExpandAndLockInstance()
5009

    
5010
  def CheckPrereq(self):
5011
    """Check prerequisites.
5012

5013
    This checks that the instance is in the cluster.
5014

5015
    """
5016
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5017
    assert self.instance is not None, \
5018
      "Cannot retrieve locked instance %s" % self.op.instance_name
5019
    _CheckNodeOnline(self, self.instance.primary_node)
5020

    
5021
  def Exec(self, feedback_fn):
5022
    """Connect to the console of an instance
5023

5024
    """
5025
    instance = self.instance
5026
    node = instance.primary_node
5027

    
5028
    node_insts = self.rpc.call_instance_list([node],
5029
                                             [instance.hypervisor])[node]
5030
    node_insts.Raise("Can't get node information from %s" % node)
5031

    
5032
    if instance.name not in node_insts.payload:
5033
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5034

    
5035
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5036

    
5037
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5038
    cluster = self.cfg.GetClusterInfo()
5039
    # beparams and hvparams are passed separately, to avoid editing the
5040
    # instance and then saving the defaults in the instance itself.
5041
    hvparams = cluster.FillHV(instance)
5042
    beparams = cluster.FillBE(instance)
5043
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5044

    
5045
    # build ssh cmdline
5046
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5047

    
5048

    
5049
class LUReplaceDisks(LogicalUnit):
5050
  """Replace the disks of an instance.
5051

5052
  """
5053
  HPATH = "mirrors-replace"
5054
  HTYPE = constants.HTYPE_INSTANCE
5055
  _OP_REQP = ["instance_name", "mode", "disks"]
5056
  REQ_BGL = False
5057

    
5058
  def CheckArguments(self):
5059
    if not hasattr(self.op, "remote_node"):
5060
      self.op.remote_node = None
5061
    if not hasattr(self.op, "iallocator"):
5062
      self.op.iallocator = None
5063

    
5064
    # check for valid parameter combination
5065
    cnt = [self.op.remote_node, self.op.iallocator].count(None)
5066
    if self.op.mode == constants.REPLACE_DISK_CHG:
5067
      if cnt == 2:
5068
        raise errors.OpPrereqError("When changing the secondary either an"
5069
                                   " iallocator script must be used or the"
5070
                                   " new node given")
5071
      elif cnt == 0:
5072
        raise errors.OpPrereqError("Give either the iallocator or the new"
5073
                                   " secondary, not both")
5074
    else: # not replacing the secondary
5075
      if cnt != 2:
5076
        raise errors.OpPrereqError("The iallocator and new node options can"
5077
                                   " be used only when changing the"
5078
                                   " secondary node")
5079

    
5080
  def ExpandNames(self):
5081
    self._ExpandAndLockInstance()
5082

    
5083
    if self.op.iallocator is not None:
5084
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5085
    elif self.op.remote_node is not None:
5086
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5087
      if remote_node is None:
5088
        raise errors.OpPrereqError("Node '%s' not known" %
5089
                                   self.op.remote_node)
5090
      self.op.remote_node = remote_node
5091
      # Warning: do not remove the locking of the new secondary here
5092
      # unless DRBD8.AddChildren is changed to work in parallel;
5093
      # currently it doesn't since parallel invocations of
5094
      # FindUnusedMinor will conflict
5095
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5096
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5097
    else:
5098
      self.needed_locks[locking.LEVEL_NODE] = []
5099
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5100

    
5101
  def DeclareLocks(self, level):
5102
    # If we're not already locking all nodes in the set we have to declare the
5103
    # instance's primary/secondary nodes.
5104
    if (level == locking.LEVEL_NODE and
5105
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5106
      self._LockInstancesNodes()
5107

    
5108
  def _RunAllocator(self):
5109
    """Compute a new secondary node using an IAllocator.
5110

5111
    """
5112
    ial = IAllocator(self,
5113
                     mode=constants.IALLOCATOR_MODE_RELOC,
5114
                     name=self.op.instance_name,
5115
                     relocate_from=[self.sec_node])
5116

    
5117
    ial.Run(self.op.iallocator)
5118

    
5119
    if not ial.success:
5120
      raise errors.OpPrereqError("Can't compute nodes using"
5121
                                 " iallocator '%s': %s" % (self.op.iallocator,
5122
                                                           ial.info))
5123
    if len(ial.nodes) != ial.required_nodes:
5124
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5125
                                 " of nodes (%s), required %s" %
5126
                                 (len(ial.nodes), ial.required_nodes))
5127
    self.op.remote_node = ial.nodes[0]
5128
    self.LogInfo("Selected new secondary for the instance: %s",
5129
                 self.op.remote_node)
5130

    
5131
  def BuildHooksEnv(self):
5132
    """Build hooks env.
5133

5134
    This runs on the master, the primary and all the secondaries.
5135

5136
    """
5137
    env = {
5138
      "MODE": self.op.mode,
5139
      "NEW_SECONDARY": self.op.remote_node,
5140
      "OLD_SECONDARY": self.instance.secondary_nodes[0],
5141
      }
5142
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5143
    nl = [
5144
      self.cfg.GetMasterNode(),
5145
      self.instance.primary_node,
5146
      ]
5147
    if self.op.remote_node is not None:
5148
      nl.append(self.op.remote_node)
5149
    return env, nl, nl
5150

    
5151
  def CheckPrereq(self):
5152
    """Check prerequisites.
5153

5154
    This checks that the instance is in the cluster.
5155

5156
    """
5157
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5158
    assert instance is not None, \
5159
      "Cannot retrieve locked instance %s" % self.op.instance_name
5160
    self.instance = instance
5161

    
5162
    if instance.disk_template != constants.DT_DRBD8:
5163
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5164
                                 " instances")
5165

    
5166
    if len(instance.secondary_nodes) != 1:
5167
      raise errors.OpPrereqError("The instance has a strange layout,"
5168
                                 " expected one secondary but found %d" %
5169
                                 len(instance.secondary_nodes))
5170

    
5171
    self.sec_node = instance.secondary_nodes[0]
5172

    
5173
    if self.op.iallocator is not None:
5174
      self._RunAllocator()
5175

    
5176
    remote_node = self.op.remote_node
5177
    if remote_node is not None:
5178
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5179
      assert self.remote_node_info is not None, \
5180
        "Cannot retrieve locked node %s" % remote_node
5181
    else:
5182
      self.remote_node_info = None
5183
    if remote_node == instance.primary_node:
5184
      raise errors.OpPrereqError("The specified node is the primary node of"
5185
                                 " the instance.")
5186
    elif remote_node == self.sec_node:
5187
      raise errors.OpPrereqError("The specified node is already the"
5188
                                 " secondary node of the instance.")
5189

    
5190
    if self.op.mode == constants.REPLACE_DISK_PRI:
5191
      n1 = self.tgt_node = instance.primary_node
5192
      n2 = self.oth_node = self.sec_node
5193
    elif self.op.mode == constants.REPLACE_DISK_SEC:
5194
      n1 = self.tgt_node = self.sec_node
5195
      n2 = self.oth_node = instance.primary_node
5196
    elif self.op.mode == constants.REPLACE_DISK_CHG:
5197
      n1 = self.new_node = remote_node
5198
      n2 = self.oth_node = instance.primary_node
5199
      self.tgt_node = self.sec_node
5200
      _CheckNodeNotDrained(self, remote_node)
5201
    else:
5202
      raise errors.ProgrammerError("Unhandled disk replace mode")
5203

    
5204
    _CheckNodeOnline(self, n1)
5205
    _CheckNodeOnline(self, n2)
5206

    
5207
    if not self.op.disks:
5208
      self.op.disks = range(len(instance.disks))
5209

    
5210
    for disk_idx in self.op.disks:
5211
      instance.FindDisk(disk_idx)
5212

    
5213
  def _ExecD8DiskOnly(self, feedback_fn):
5214
    """Replace a disk on the primary or secondary for dbrd8.
5215

5216
    The algorithm for replace is quite complicated:
5217

5218
      1. for each disk to be replaced:
5219

5220
        1. create new LVs on the target node with unique names
5221
        1. detach old LVs from the drbd device
5222
        1. rename old LVs to name_replaced.<time_t>
5223
        1. rename new LVs to old LVs
5224
        1. attach the new LVs (with the old names now) to the drbd device
5225

5226
      1. wait for sync across all devices
5227

5228
      1. for each modified disk:
5229

5230
        1. remove old LVs (which have the name name_replaces.<time_t>)
5231

5232
    Failures are not very well handled.
5233

5234
    """
5235
    steps_total = 6
5236
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5237
    instance = self.instance
5238
    iv_names = {}
5239
    vgname = self.cfg.GetVGName()
5240
    # start of work
5241
    cfg = self.cfg
5242
    tgt_node = self.tgt_node
5243
    oth_node = self.oth_node
5244

    
5245
    # Step: check device activation
5246
    self.proc.LogStep(1, steps_total, "check device existence")
5247
    info("checking volume groups")
5248
    my_vg = cfg.GetVGName()
5249
    results = self.rpc.call_vg_list([oth_node, tgt_node])
5250
    if not results:
5251
      raise errors.OpExecError("Can't list volume groups on the nodes")
5252
    for node in oth_node, tgt_node:
5253
      res = results[node]
5254
      res.Raise("Error checking node %s" % node)
5255
      if my_vg not in res.payload:
5256
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5257
                                 (my_vg, node))
5258
    for idx, dev in enumerate(instance.disks):
5259
      if idx not in self.op.disks:
5260
        continue
5261
      for node in tgt_node, oth_node:
5262
        info("checking disk/%d on %s" % (idx, node))
5263
        cfg.SetDiskID(dev, node)
5264
        result = self.rpc.call_blockdev_find(node, dev)
5265
        msg = result.fail_msg
5266
        if not msg and not result.payload:
5267
          msg = "disk not found"
5268
        if msg:
5269
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5270
                                   (idx, node, msg))
5271

    
5272
    # Step: check other node consistency
5273
    self.proc.LogStep(2, steps_total, "check peer consistency")
5274
    for idx, dev in enumerate(instance.disks):
5275
      if idx not in self.op.disks:
5276
        continue
5277
      info("checking disk/%d consistency on %s" % (idx, oth_node))
5278
      if not _CheckDiskConsistency(self, dev, oth_node,
5279
                                   oth_node==instance.primary_node):
5280
        raise errors.OpExecError("Peer node (%s) has degraded storage, unsafe"
5281
                                 " to replace disks on this node (%s)" %
5282
                                 (oth_node, tgt_node))
5283

    
5284
    # Step: create new storage
5285
    self.proc.LogStep(3, steps_total, "allocate new storage")
5286
    for idx, dev in enumerate(instance.disks):
5287
      if idx not in self.op.disks:
5288
        continue
5289
      size = dev.size
5290
      cfg.SetDiskID(dev, tgt_node)
5291
      lv_names = [".disk%d_%s" % (idx, suf)
5292
                  for suf in ["data", "meta"]]
5293
      names = _GenerateUniqueNames(self, lv_names)
5294
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5295
                             logical_id=(vgname, names[0]))
5296
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5297
                             logical_id=(vgname, names[1]))
5298
      new_lvs = [lv_data, lv_meta]
5299
      old_lvs = dev.children
5300
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
5301
      info("creating new local storage on %s for %s" %
5302
           (tgt_node, dev.iv_name))
5303
      # we pass force_create=True to force the LVM creation
5304
      for new_lv in new_lvs:
5305
        _CreateBlockDev(self, tgt_node, instance, new_lv, True,
5306
                        _GetInstanceInfoText(instance), False)
5307

    
5308
    # Step: for each lv, detach+rename*2+attach
5309
    self.proc.LogStep(4, steps_total, "change drbd configuration")
5310
    for dev, old_lvs, new_lvs in iv_names.itervalues():
5311
      info("detaching %s drbd from local storage" % dev.iv_name)
5312
      result = self.rpc.call_blockdev_removechildren(tgt_node, dev, old_lvs)
5313
      result.Raise("Can't detach drbd from local storage on node"
5314
                   " %s for device %s" % (tgt_node, dev.iv_name))
5315
      #dev.children = []
5316
      #cfg.Update(instance)
5317

    
5318
      # ok, we created the new LVs, so now we know we have the needed
5319
      # storage; as such, we proceed on the target node to rename
5320
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
5321
      # using the assumption that logical_id == physical_id (which in
5322
      # turn is the unique_id on that node)
5323

    
5324
      # FIXME(iustin): use a better name for the replaced LVs
5325
      temp_suffix = int(time.time())
5326
      ren_fn = lambda d, suff: (d.physical_id[0],
5327
                                d.physical_id[1] + "_replaced-%s" % suff)
5328
      # build the rename list based on what LVs exist on the node
5329
      rlist = []
5330
      for to_ren in old_lvs:
5331
        result = self.rpc.call_blockdev_find(tgt_node, to_ren)
5332
        if not result.fail_msg and result.payload:
5333
          # device exists
5334
          rlist.append((to_ren, ren_fn(to_ren, temp_suffix)))
5335

    
5336
      info("renaming the old LVs on the target node")
5337
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5338
      result.Raise("Can't rename old LVs on node %s" % tgt_node)
5339
      # now we rename the new LVs to the old LVs
5340
      info("renaming the new LVs on the target node")
5341
      rlist = [(new, old.physical_id) for old, new in zip(old_lvs, new_lvs)]
5342
      result = self.rpc.call_blockdev_rename(tgt_node, rlist)
5343
      result.Raise("Can't rename new LVs on node %s" % tgt_node)
5344

    
5345
      for old, new in zip(old_lvs, new_lvs):
5346
        new.logical_id = old.logical_id
5347
        cfg.SetDiskID(new, tgt_node)
5348

    
5349
      for disk in old_lvs:
5350
        disk.logical_id = ren_fn(disk, temp_suffix)
5351
        cfg.SetDiskID(disk, tgt_node)
5352

    
5353
      # now that the new lvs have the old name, we can add them to the device
5354
      info("adding new mirror component on %s" % tgt_node)
5355
      result = self.rpc.call_blockdev_addchildren(tgt_node, dev, new_lvs)
5356
      msg = result.fail_msg
5357
      if msg:
5358
        for new_lv in new_lvs:
5359
          msg2 = self.rpc.call_blockdev_remove(tgt_node, new_lv).fail_msg
5360
          if msg2:
5361
            warning("Can't rollback device %s: %s", dev, msg2,
5362
                    hint="cleanup manually the unused logical volumes")
5363
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
5364

    
5365
      dev.children = new_lvs
5366
      cfg.Update(instance)
5367

    
5368
    # Step: wait for sync
5369

    
5370
    # this can fail as the old devices are degraded and _WaitForSync
5371
    # does a combined result over all disks, so we don't check its
5372
    # return value
5373
    self.proc.LogStep(5, steps_total, "sync devices")
5374
    _WaitForSync(self, instance, unlock=True)
5375

    
5376
    # so check manually all the devices
5377
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5378
      cfg.SetDiskID(dev, instance.primary_node)
5379
      result = self.rpc.call_blockdev_find(instance.primary_node, dev)
5380
      msg = result.fail_msg
5381
      if not msg and not result.payload:
5382
        msg = "disk not found"
5383
      if msg:
5384
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
5385
                                 (name, msg))
5386
      if result.payload[5]:
5387
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
5388

    
5389
    # Step: remove old storage
5390
    self.proc.LogStep(6, steps_total, "removing old storage")
5391
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
5392
      info("remove logical volumes for %s" % name)
5393
      for lv in old_lvs:
5394
        cfg.SetDiskID(lv, tgt_node)
5395
        msg = self.rpc.call_blockdev_remove(tgt_node, lv).fail_msg
5396
        if msg:
5397
          warning("Can't remove old LV: %s" % msg,
5398
                  hint="manually remove unused LVs")
5399
          continue
5400

    
5401
  def _ExecD8Secondary(self, feedback_fn):
5402
    """Replace the secondary node for drbd8.
5403

5404
    The algorithm for replace is quite complicated:
5405
      - for all disks of the instance:
5406
        - create new LVs on the new node with same names
5407
        - shutdown the drbd device on the old secondary
5408
        - disconnect the drbd network on the primary
5409
        - create the drbd device on the new secondary
5410
        - network attach the drbd on the primary, using an artifice:
5411
          the drbd code for Attach() will connect to the network if it
5412
          finds a device which is connected to the good local disks but
5413
          not network enabled
5414
      - wait for sync across all devices
5415
      - remove all disks from the old secondary
5416

5417
    Failures are not very well handled.
5418

5419
    """
5420
    steps_total = 6
5421
    warning, info = (self.proc.LogWarning, self.proc.LogInfo)
5422
    instance = self.instance
5423
    iv_names = {}
5424
    # start of work
5425
    cfg = self.cfg
5426
    old_node = self.tgt_node
5427
    new_node = self.new_node
5428
    pri_node = instance.primary_node
5429
    nodes_ip = {
5430
      old_node: self.cfg.GetNodeInfo(old_node).secondary_ip,
5431
      new_node: self.cfg.GetNodeInfo(new_node).secondary_ip,
5432
      pri_node: self.cfg.GetNodeInfo(pri_node).secondary_ip,
5433
      }
5434

    
5435
    # Step: check device activation
5436
    self.proc.LogStep(1, steps_total, "check device existence")
5437
    info("checking volume groups")
5438
    my_vg = cfg.GetVGName()
5439
    results = self.rpc.call_vg_list([pri_node, new_node])
5440
    for node in pri_node, new_node:
5441
      res = results[node]
5442
      res.Raise("Error checking node %s" % node)
5443
      if my_vg not in res.payload:
5444
        raise errors.OpExecError("Volume group '%s' not found on %s" %
5445
                                 (my_vg, node))
5446
    for idx, dev in enumerate(instance.disks):
5447
      if idx not in self.op.disks:
5448
        continue
5449
      info("checking disk/%d on %s" % (idx, pri_node))
5450
      cfg.SetDiskID(dev, pri_node)
5451
      result = self.rpc.call_blockdev_find(pri_node, dev)
5452
      msg = result.fail_msg
5453
      if not msg and not result.payload:
5454
        msg = "disk not found"
5455
      if msg:
5456
        raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
5457
                                 (idx, pri_node, msg))
5458

    
5459
    # Step: check other node consistency
5460
    self.proc.LogStep(2, steps_total, "check peer consistency")
5461
    for idx, dev in enumerate(instance.disks):
5462
      if idx not in self.op.disks:
5463
        continue
5464
      info("checking disk/%d consistency on %s" % (idx, pri_node))
5465
      if not _CheckDiskConsistency(self, dev, pri_node, True, ldisk=True):
5466
        raise errors.OpExecError("Primary node (%s) has degraded storage,"
5467
                                 " unsafe to replace the secondary" %
5468
                                 pri_node)
5469

    
5470
    # Step: create new storage
5471
    self.proc.LogStep(3, steps_total, "allocate new storage")
5472
    for idx, dev in enumerate(instance.disks):
5473
      info("adding new local storage on %s for disk/%d" %
5474
           (new_node, idx))
5475
      # we pass force_create=True to force LVM creation
5476
      for new_lv in dev.children:
5477
        _CreateBlockDev(self, new_node, instance, new_lv, True,
5478
                        _GetInstanceInfoText(instance), False)
5479

    
5480
    # Step 4: dbrd minors and drbd setups changes
5481
    # after this, we must manually remove the drbd minors on both the
5482
    # error and the success paths
5483
    minors = cfg.AllocateDRBDMinor([new_node for dev in instance.disks],
5484
                                   instance.name)
5485
    logging.debug("Allocated minors %s" % (minors,))
5486
    self.proc.LogStep(4, steps_total, "changing drbd configuration")
5487
    for idx, (dev, new_minor) in enumerate(zip(instance.disks, minors)):
5488
      size = dev.size
5489
      info("activating a new drbd on %s for disk/%d" % (new_node, idx))
5490
      # create new devices on new_node; note that we create two IDs:
5491
      # one without port, so the drbd will be activated without
5492
      # networking information on the new node at this stage, and one
5493
      # with network, for the latter activation in step 4
5494
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
5495
      if pri_node == o_node1:
5496
        p_minor = o_minor1
5497
      else:
5498
        p_minor = o_minor2
5499

    
5500
      new_alone_id = (pri_node, new_node, None, p_minor, new_minor, o_secret)
5501
      new_net_id = (pri_node, new_node, o_port, p_minor, new_minor, o_secret)
5502

    
5503
      iv_names[idx] = (dev, dev.children, new_net_id)
5504
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
5505
                    new_net_id)
5506
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
5507
                              logical_id=new_alone_id,
5508
                              children=dev.children,
5509
                              size=dev.size)
5510
      try:
5511
        _CreateSingleBlockDev(self, new_node, instance, new_drbd,
5512
                              _GetInstanceInfoText(instance), False)
5513
      except errors.GenericError:
5514
        self.cfg.ReleaseDRBDMinors(instance.name)
5515
        raise
5516

    
5517
    for idx, dev in enumerate(instance.disks):
5518
      # we have new devices, shutdown the drbd on the old secondary
5519
      info("shutting down drbd for disk/%d on old node" % idx)
5520
      cfg.SetDiskID(dev, old_node)
5521
      msg = self.rpc.call_blockdev_shutdown(old_node, dev).fail_msg
5522
      if msg:
5523
        warning("Failed to shutdown drbd for disk/%d on old node: %s" %
5524
                (idx, msg),
5525
                hint="Please cleanup this device manually as soon as possible")
5526

    
5527
    info("detaching primary drbds from the network (=> standalone)")
5528
    result = self.rpc.call_drbd_disconnect_net([pri_node], nodes_ip,
5529
                                               instance.disks)[pri_node]
5530

    
5531
    msg = result.fail_msg
5532
    if msg:
5533
      # detaches didn't succeed (unlikely)
5534
      self.cfg.ReleaseDRBDMinors(instance.name)
5535
      raise errors.OpExecError("Can't detach the disks from the network on"
5536
                               " old node: %s" % (msg,))
5537

    
5538
    # if we managed to detach at least one, we update all the disks of
5539
    # the instance to point to the new secondary
5540
    info("updating instance configuration")
5541
    for dev, _, new_logical_id in iv_names.itervalues():
5542
      dev.logical_id = new_logical_id
5543
      cfg.SetDiskID(dev, pri_node)
5544
    cfg.Update(instance)
5545

    
5546
    # and now perform the drbd attach
5547
    info("attaching primary drbds to new secondary (standalone => connected)")
5548
    result = self.rpc.call_drbd_attach_net([pri_node, new_node], nodes_ip,
5549
                                           instance.disks, instance.name,
5550
                                           False)
5551
    for to_node, to_result in result.items():
5552
      msg = to_result.fail_msg
5553
      if msg:
5554
        warning("can't attach drbd disks on node %s: %s", to_node, msg,
5555
                hint="please do a gnt-instance info to see the"
5556
                " status of disks")
5557

    
5558
    # this can fail as the old devices are degraded and _WaitForSync
5559
    # does a combined result over all disks, so we don't check its
5560
    # return value
5561
    self.proc.LogStep(5, steps_total, "sync devices")
5562
    _WaitForSync(self, instance, unlock=True)
5563

    
5564
    # so check manually all the devices
5565
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5566
      cfg.SetDiskID(dev, pri_node)
5567
      result = self.rpc.call_blockdev_find(pri_node, dev)
5568
      msg = result.fail_msg
5569
      if not msg and not result.payload:
5570
        msg = "disk not found"
5571
      if msg:
5572
        raise errors.OpExecError("Can't find DRBD device disk/%d: %s" %
5573
                                 (idx, msg))
5574
      if result.payload[5]:
5575
        raise errors.OpExecError("DRBD device disk/%d is degraded!" % idx)
5576

    
5577
    self.proc.LogStep(6, steps_total, "removing old storage")
5578
    for idx, (dev, old_lvs, _) in iv_names.iteritems():
5579
      info("remove logical volumes for disk/%d" % idx)
5580
      for lv in old_lvs:
5581
        cfg.SetDiskID(lv, old_node)
5582
        msg = self.rpc.call_blockdev_remove(old_node, lv).fail_msg
5583
        if msg:
5584
          warning("Can't remove LV on old secondary: %s", msg,
5585
                  hint="Cleanup stale volumes by hand")
5586

    
5587
  def Exec(self, feedback_fn):
5588
    """Execute disk replacement.
5589

5590
    This dispatches the disk replacement to the appropriate handler.
5591

5592
    """
5593
    instance = self.instance
5594

    
5595
    # Activate the instance disks if we're replacing them on a down instance
5596
    if not instance.admin_up:
5597
      _StartInstanceDisks(self, instance, True)
5598

    
5599
    if self.op.mode == constants.REPLACE_DISK_CHG:
5600
      fn = self._ExecD8Secondary
5601
    else:
5602
      fn = self._ExecD8DiskOnly
5603

    
5604
    ret = fn(feedback_fn)
5605

    
5606
    # Deactivate the instance disks if we're replacing them on a down instance
5607
    if not instance.admin_up:
5608
      _SafeShutdownInstanceDisks(self, instance)
5609

    
5610
    return ret
5611

    
5612

    
5613
class LUGrowDisk(LogicalUnit):
5614
  """Grow a disk of an instance.
5615

5616
  """
5617
  HPATH = "disk-grow"
5618
  HTYPE = constants.HTYPE_INSTANCE
5619
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
5620
  REQ_BGL = False
5621

    
5622
  def ExpandNames(self):
5623
    self._ExpandAndLockInstance()
5624
    self.needed_locks[locking.LEVEL_NODE] = []
5625
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5626

    
5627
  def DeclareLocks(self, level):
5628
    if level == locking.LEVEL_NODE:
5629
      self._LockInstancesNodes()
5630

    
5631
  def BuildHooksEnv(self):
5632
    """Build hooks env.
5633

5634
    This runs on the master, the primary and all the secondaries.
5635

5636
    """
5637
    env = {
5638
      "DISK": self.op.disk,
5639
      "AMOUNT": self.op.amount,
5640
      }
5641
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5642
    nl = [
5643
      self.cfg.GetMasterNode(),
5644
      self.instance.primary_node,
5645
      ]
5646
    return env, nl, nl
5647

    
5648
  def CheckPrereq(self):
5649
    """Check prerequisites.
5650

5651
    This checks that the instance is in the cluster.
5652

5653
    """
5654
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5655
    assert instance is not None, \
5656
      "Cannot retrieve locked instance %s" % self.op.instance_name
5657
    nodenames = list(instance.all_nodes)
5658
    for node in nodenames:
5659
      _CheckNodeOnline(self, node)
5660

    
5661

    
5662
    self.instance = instance
5663

    
5664
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
5665
      raise errors.OpPrereqError("Instance's disk layout does not support"
5666
                                 " growing.")
5667

    
5668
    self.disk = instance.FindDisk(self.op.disk)
5669

    
5670
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5671
                                       instance.hypervisor)
5672
    for node in nodenames:
5673
      info = nodeinfo[node]
5674
      info.Raise("Cannot get current information from node %s" % node)
5675
      vg_free = info.payload.get('vg_free', None)
5676
      if not isinstance(vg_free, int):
5677
        raise errors.OpPrereqError("Can't compute free disk space on"
5678
                                   " node %s" % node)
5679
      if self.op.amount > vg_free:
5680
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
5681
                                   " %d MiB available, %d MiB required" %
5682
                                   (node, vg_free, self.op.amount))
5683

    
5684
  def Exec(self, feedback_fn):
5685
    """Execute disk grow.
5686

5687
    """
5688
    instance = self.instance
5689
    disk = self.disk
5690
    for node in instance.all_nodes:
5691
      self.cfg.SetDiskID(disk, node)
5692
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
5693
      result.Raise("Grow request failed to node %s" % node)
5694
    disk.RecordGrow(self.op.amount)
5695
    self.cfg.Update(instance)
5696
    if self.op.wait_for_sync:
5697
      disk_abort = not _WaitForSync(self, instance)
5698
      if disk_abort:
5699
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
5700
                             " status.\nPlease check the instance.")
5701

    
5702

    
5703
class LUQueryInstanceData(NoHooksLU):
5704
  """Query runtime instance data.
5705

5706
  """
5707
  _OP_REQP = ["instances", "static"]
5708
  REQ_BGL = False
5709

    
5710
  def ExpandNames(self):
5711
    self.needed_locks = {}
5712
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
5713

    
5714
    if not isinstance(self.op.instances, list):
5715
      raise errors.OpPrereqError("Invalid argument type 'instances'")
5716

    
5717
    if self.op.instances:
5718
      self.wanted_names = []
5719
      for name in self.op.instances:
5720
        full_name = self.cfg.ExpandInstanceName(name)
5721
        if full_name is None:
5722
          raise errors.OpPrereqError("Instance '%s' not known" % name)
5723
        self.wanted_names.append(full_name)
5724
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
5725
    else:
5726
      self.wanted_names = None
5727
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5728

    
5729
    self.needed_locks[locking.LEVEL_NODE] = []
5730
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5731

    
5732
  def DeclareLocks(self, level):
5733
    if level == locking.LEVEL_NODE:
5734
      self._LockInstancesNodes()
5735

    
5736
  def CheckPrereq(self):
5737
    """Check prerequisites.
5738

5739
    This only checks the optional instance list against the existing names.
5740

5741
    """
5742
    if self.wanted_names is None:
5743
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5744

    
5745
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
5746
                             in self.wanted_names]
5747
    return
5748

    
5749
  def _ComputeDiskStatus(self, instance, snode, dev):
5750
    """Compute block device status.
5751

5752
    """
5753
    static = self.op.static
5754
    if not static:
5755
      self.cfg.SetDiskID(dev, instance.primary_node)
5756
      dev_pstatus = self.rpc.call_blockdev_find(instance.primary_node, dev)
5757
      if dev_pstatus.offline:
5758
        dev_pstatus = None
5759
      else:
5760
        dev_pstatus.Raise("Can't compute disk status for %s" % instance.name)
5761
        dev_pstatus = dev_pstatus.payload
5762
    else:
5763
      dev_pstatus = None
5764

    
5765
    if dev.dev_type in constants.LDS_DRBD:
5766
      # we change the snode then (otherwise we use the one passed in)
5767
      if dev.logical_id[0] == instance.primary_node:
5768
        snode = dev.logical_id[1]
5769
      else:
5770
        snode = dev.logical_id[0]
5771

    
5772
    if snode and not static:
5773
      self.cfg.SetDiskID(dev, snode)
5774
      dev_sstatus = self.rpc.call_blockdev_find(snode, dev)
5775
      if dev_sstatus.offline:
5776
        dev_sstatus = None
5777
      else:
5778
        dev_sstatus.Raise("Can't compute disk status for %s" % instance.name)
5779
        dev_sstatus = dev_sstatus.payload
5780
    else:
5781
      dev_sstatus = None
5782

    
5783
    if dev.children:
5784
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
5785
                      for child in dev.children]
5786
    else:
5787
      dev_children = []
5788

    
5789
    data = {
5790
      "iv_name": dev.iv_name,
5791
      "dev_type": dev.dev_type,
5792
      "logical_id": dev.logical_id,
5793
      "physical_id": dev.physical_id,
5794
      "pstatus": dev_pstatus,
5795
      "sstatus": dev_sstatus,
5796
      "children": dev_children,
5797
      "mode": dev.mode,
5798
      }
5799

    
5800
    return data
5801

    
5802
  def Exec(self, feedback_fn):
5803
    """Gather and return data"""
5804
    result = {}
5805

    
5806
    cluster = self.cfg.GetClusterInfo()
5807

    
5808
    for instance in self.wanted_instances:
5809
      if not self.op.static:
5810
        remote_info = self.rpc.call_instance_info(instance.primary_node,
5811
                                                  instance.name,
5812
                                                  instance.hypervisor)
5813
        remote_info.Raise("Error checking node %s" % instance.primary_node)
5814
        remote_info = remote_info.payload
5815
        if remote_info and "state" in remote_info:
5816
          remote_state = "up"
5817
        else:
5818
          remote_state = "down"
5819
      else:
5820
        remote_state = None
5821
      if instance.admin_up:
5822
        config_state = "up"
5823
      else:
5824
        config_state = "down"
5825

    
5826
      disks = [self._ComputeDiskStatus(instance, None, device)
5827
               for device in instance.disks]
5828

    
5829
      idict = {
5830
        "name": instance.name,
5831
        "config_state": config_state,
5832
        "run_state": remote_state,
5833
        "pnode": instance.primary_node,
5834
        "snodes": instance.secondary_nodes,
5835
        "os": instance.os,
5836
        # this happens to be the same format used for hooks
5837
        "nics": _NICListToTuple(self, instance.nics),
5838
        "disks": disks,
5839
        "hypervisor": instance.hypervisor,
5840
        "network_port": instance.network_port,
5841
        "hv_instance": instance.hvparams,
5842
        "hv_actual": cluster.FillHV(instance),
5843
        "be_instance": instance.beparams,
5844
        "be_actual": cluster.FillBE(instance),
5845
        }
5846

    
5847
      result[instance.name] = idict
5848

    
5849
    return result
5850

    
5851

    
5852
class LUSetInstanceParams(LogicalUnit):
5853
  """Modifies an instances's parameters.
5854

5855
  """
5856
  HPATH = "instance-modify"
5857
  HTYPE = constants.HTYPE_INSTANCE
5858
  _OP_REQP = ["instance_name"]
5859
  REQ_BGL = False
5860

    
5861
  def CheckArguments(self):
5862
    if not hasattr(self.op, 'nics'):
5863
      self.op.nics = []
5864
    if not hasattr(self.op, 'disks'):
5865
      self.op.disks = []
5866
    if not hasattr(self.op, 'beparams'):
5867
      self.op.beparams = {}
5868
    if not hasattr(self.op, 'hvparams'):
5869
      self.op.hvparams = {}
5870
    self.op.force = getattr(self.op, "force", False)
5871
    if not (self.op.nics or self.op.disks or
5872
            self.op.hvparams or self.op.beparams):
5873
      raise errors.OpPrereqError("No changes submitted")
5874

    
5875
    # Disk validation
5876
    disk_addremove = 0
5877
    for disk_op, disk_dict in self.op.disks:
5878
      if disk_op == constants.DDM_REMOVE:
5879
        disk_addremove += 1
5880
        continue
5881
      elif disk_op == constants.DDM_ADD:
5882
        disk_addremove += 1
5883
      else:
5884
        if not isinstance(disk_op, int):
5885
          raise errors.OpPrereqError("Invalid disk index")
5886
      if disk_op == constants.DDM_ADD:
5887
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
5888
        if mode not in constants.DISK_ACCESS_SET:
5889
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
5890
        size = disk_dict.get('size', None)
5891
        if size is None:
5892
          raise errors.OpPrereqError("Required disk parameter size missing")
5893
        try:
5894
          size = int(size)
5895
        except ValueError, err:
5896
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
5897
                                     str(err))
5898
        disk_dict['size'] = size
5899
      else:
5900
        # modification of disk
5901
        if 'size' in disk_dict:
5902
          raise errors.OpPrereqError("Disk size change not possible, use"
5903
                                     " grow-disk")
5904

    
5905
    if disk_addremove > 1:
5906
      raise errors.OpPrereqError("Only one disk add or remove operation"
5907
                                 " supported at a time")
5908

    
5909
    # NIC validation
5910
    nic_addremove = 0
5911
    for nic_op, nic_dict in self.op.nics:
5912
      if nic_op == constants.DDM_REMOVE:
5913
        nic_addremove += 1
5914
        continue
5915
      elif nic_op == constants.DDM_ADD:
5916
        nic_addremove += 1
5917
      else:
5918
        if not isinstance(nic_op, int):
5919
          raise errors.OpPrereqError("Invalid nic index")
5920

    
5921
      # nic_dict should be a dict
5922
      nic_ip = nic_dict.get('ip', None)
5923
      if nic_ip is not None:
5924
        if nic_ip.lower() == constants.VALUE_NONE:
5925
          nic_dict['ip'] = None
5926
        else:
5927
          if not utils.IsValidIP(nic_ip):
5928
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
5929

    
5930
      nic_bridge = nic_dict.get('bridge', None)
5931
      nic_link = nic_dict.get('link', None)
5932
      if nic_bridge and nic_link:
5933
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5934
                                   " at the same time")
5935
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
5936
        nic_dict['bridge'] = None
5937
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
5938
        nic_dict['link'] = None
5939

    
5940
      if nic_op == constants.DDM_ADD:
5941
        nic_mac = nic_dict.get('mac', None)
5942
        if nic_mac is None:
5943
          nic_dict['mac'] = constants.VALUE_AUTO
5944

    
5945
      if 'mac' in nic_dict:
5946
        nic_mac = nic_dict['mac']
5947
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5948
          if not utils.IsValidMac(nic_mac):
5949
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
5950
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
5951
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
5952
                                     " modifying an existing nic")
5953

    
5954
    if nic_addremove > 1:
5955
      raise errors.OpPrereqError("Only one NIC add or remove operation"
5956
                                 " supported at a time")
5957

    
5958
  def ExpandNames(self):
5959
    self._ExpandAndLockInstance()
5960
    self.needed_locks[locking.LEVEL_NODE] = []
5961
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5962

    
5963
  def DeclareLocks(self, level):
5964
    if level == locking.LEVEL_NODE:
5965
      self._LockInstancesNodes()
5966

    
5967
  def BuildHooksEnv(self):
5968
    """Build hooks env.
5969

5970
    This runs on the master, primary and secondaries.
5971

5972
    """
5973
    args = dict()
5974
    if constants.BE_MEMORY in self.be_new:
5975
      args['memory'] = self.be_new[constants.BE_MEMORY]
5976
    if constants.BE_VCPUS in self.be_new:
5977
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
5978
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
5979
    # information at all.
5980
    if self.op.nics:
5981
      args['nics'] = []
5982
      nic_override = dict(self.op.nics)
5983
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
5984
      for idx, nic in enumerate(self.instance.nics):
5985
        if idx in nic_override:
5986
          this_nic_override = nic_override[idx]
5987
        else:
5988
          this_nic_override = {}
5989
        if 'ip' in this_nic_override:
5990
          ip = this_nic_override['ip']
5991
        else:
5992
          ip = nic.ip
5993
        if 'mac' in this_nic_override:
5994
          mac = this_nic_override['mac']
5995
        else:
5996
          mac = nic.mac
5997
        if idx in self.nic_pnew:
5998
          nicparams = self.nic_pnew[idx]
5999
        else:
6000
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6001
        mode = nicparams[constants.NIC_MODE]
6002
        link = nicparams[constants.NIC_LINK]
6003
        args['nics'].append((ip, mac, mode, link))
6004
      if constants.DDM_ADD in nic_override:
6005
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6006
        mac = nic_override[constants.DDM_ADD]['mac']
6007
        nicparams = self.nic_pnew[constants.DDM_ADD]
6008
        mode = nicparams[constants.NIC_MODE]
6009
        link = nicparams[constants.NIC_LINK]
6010
        args['nics'].append((ip, mac, mode, link))
6011
      elif constants.DDM_REMOVE in nic_override:
6012
        del args['nics'][-1]
6013

    
6014
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6015
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6016
    return env, nl, nl
6017

    
6018
  def _GetUpdatedParams(self, old_params, update_dict,
6019
                        default_values, parameter_types):
6020
    """Return the new params dict for the given params.
6021

6022
    @type old_params: dict
6023
    @param old_params: old parameters
6024
    @type update_dict: dict
6025
    @param update_dict: dict containing new parameter values,
6026
                        or constants.VALUE_DEFAULT to reset the
6027
                        parameter to its default value
6028
    @type default_values: dict
6029
    @param default_values: default values for the filled parameters
6030
    @type parameter_types: dict
6031
    @param parameter_types: dict mapping target dict keys to types
6032
                            in constants.ENFORCEABLE_TYPES
6033
    @rtype: (dict, dict)
6034
    @return: (new_parameters, filled_parameters)
6035

6036
    """
6037
    params_copy = copy.deepcopy(old_params)
6038
    for key, val in update_dict.iteritems():
6039
      if val == constants.VALUE_DEFAULT:
6040
        try:
6041
          del params_copy[key]
6042
        except KeyError:
6043
          pass
6044
      else:
6045
        params_copy[key] = val
6046
    utils.ForceDictType(params_copy, parameter_types)
6047
    params_filled = objects.FillDict(default_values, params_copy)
6048
    return (params_copy, params_filled)
6049

    
6050
  def CheckPrereq(self):
6051
    """Check prerequisites.
6052

6053
    This only checks the instance list against the existing names.
6054

6055
    """
6056
    force = self.force = self.op.force
6057

    
6058
    # checking the new params on the primary/secondary nodes
6059

    
6060
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6061
    cluster = self.cluster = self.cfg.GetClusterInfo()
6062
    assert self.instance is not None, \
6063
      "Cannot retrieve locked instance %s" % self.op.instance_name
6064
    pnode = instance.primary_node
6065
    nodelist = list(instance.all_nodes)
6066

    
6067
    # hvparams processing
6068
    if self.op.hvparams:
6069
      i_hvdict, hv_new = self._GetUpdatedParams(
6070
                             instance.hvparams, self.op.hvparams,
6071
                             cluster.hvparams[instance.hypervisor],
6072
                             constants.HVS_PARAMETER_TYPES)
6073
      # local check
6074
      hypervisor.GetHypervisor(
6075
        instance.hypervisor).CheckParameterSyntax(hv_new)
6076
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6077
      self.hv_new = hv_new # the new actual values
6078
      self.hv_inst = i_hvdict # the new dict (without defaults)
6079
    else:
6080
      self.hv_new = self.hv_inst = {}
6081

    
6082
    # beparams processing
6083
    if self.op.beparams:
6084
      i_bedict, be_new = self._GetUpdatedParams(
6085
                             instance.beparams, self.op.beparams,
6086
                             cluster.beparams[constants.PP_DEFAULT],
6087
                             constants.BES_PARAMETER_TYPES)
6088
      self.be_new = be_new # the new actual values
6089
      self.be_inst = i_bedict # the new dict (without defaults)
6090
    else:
6091
      self.be_new = self.be_inst = {}
6092

    
6093
    self.warn = []
6094

    
6095
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6096
      mem_check_list = [pnode]
6097
      if be_new[constants.BE_AUTO_BALANCE]:
6098
        # either we changed auto_balance to yes or it was from before
6099
        mem_check_list.extend(instance.secondary_nodes)
6100
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
6101
                                                  instance.hypervisor)
6102
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
6103
                                         instance.hypervisor)
6104
      pninfo = nodeinfo[pnode]
6105
      msg = pninfo.fail_msg
6106
      if msg:
6107
        # Assume the primary node is unreachable and go ahead
6108
        self.warn.append("Can't get info from primary node %s: %s" %
6109
                         (pnode,  msg))
6110
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
6111
        self.warn.append("Node data from primary node %s doesn't contain"
6112
                         " free memory information" % pnode)
6113
      elif instance_info.fail_msg:
6114
        self.warn.append("Can't get instance runtime information: %s" %
6115
                        instance_info.fail_msg)
6116
      else:
6117
        if instance_info.payload:
6118
          current_mem = int(instance_info.payload['memory'])
6119
        else:
6120
          # Assume instance not running
6121
          # (there is a slight race condition here, but it's not very probable,
6122
          # and we have no other way to check)
6123
          current_mem = 0
6124
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
6125
                    pninfo.payload['memory_free'])
6126
        if miss_mem > 0:
6127
          raise errors.OpPrereqError("This change will prevent the instance"
6128
                                     " from starting, due to %d MB of memory"
6129
                                     " missing on its primary node" % miss_mem)
6130

    
6131
      if be_new[constants.BE_AUTO_BALANCE]:
6132
        for node, nres in nodeinfo.items():
6133
          if node not in instance.secondary_nodes:
6134
            continue
6135
          msg = nres.fail_msg
6136
          if msg:
6137
            self.warn.append("Can't get info from secondary node %s: %s" %
6138
                             (node, msg))
6139
          elif not isinstance(nres.payload.get('memory_free', None), int):
6140
            self.warn.append("Secondary node %s didn't return free"
6141
                             " memory information" % node)
6142
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
6143
            self.warn.append("Not enough memory to failover instance to"
6144
                             " secondary node %s" % node)
6145

    
6146
    # NIC processing
6147
    self.nic_pnew = {}
6148
    self.nic_pinst = {}
6149
    for nic_op, nic_dict in self.op.nics:
6150
      if nic_op == constants.DDM_REMOVE:
6151
        if not instance.nics:
6152
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
6153
        continue
6154
      if nic_op != constants.DDM_ADD:
6155
        # an existing nic
6156
        if nic_op < 0 or nic_op >= len(instance.nics):
6157
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
6158
                                     " are 0 to %d" %
6159
                                     (nic_op, len(instance.nics)))
6160
        old_nic_params = instance.nics[nic_op].nicparams
6161
        old_nic_ip = instance.nics[nic_op].ip
6162
      else:
6163
        old_nic_params = {}
6164
        old_nic_ip = None
6165

    
6166
      update_params_dict = dict([(key, nic_dict[key])
6167
                                 for key in constants.NICS_PARAMETERS
6168
                                 if key in nic_dict])
6169

    
6170
      if 'bridge' in nic_dict:
6171
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
6172

    
6173
      new_nic_params, new_filled_nic_params = \
6174
          self._GetUpdatedParams(old_nic_params, update_params_dict,
6175
                                 cluster.nicparams[constants.PP_DEFAULT],
6176
                                 constants.NICS_PARAMETER_TYPES)
6177
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
6178
      self.nic_pinst[nic_op] = new_nic_params
6179
      self.nic_pnew[nic_op] = new_filled_nic_params
6180
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
6181

    
6182
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
6183
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
6184
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
6185
        if msg:
6186
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
6187
          if self.force:
6188
            self.warn.append(msg)
6189
          else:
6190
            raise errors.OpPrereqError(msg)
6191
      if new_nic_mode == constants.NIC_MODE_ROUTED:
6192
        if 'ip' in nic_dict:
6193
          nic_ip = nic_dict['ip']
6194
        else:
6195
          nic_ip = old_nic_ip
6196
        if nic_ip is None:
6197
          raise errors.OpPrereqError('Cannot set the nic ip to None'
6198
                                     ' on a routed nic')
6199
      if 'mac' in nic_dict:
6200
        nic_mac = nic_dict['mac']
6201
        if nic_mac is None:
6202
          raise errors.OpPrereqError('Cannot set the nic mac to None')
6203
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6204
          # otherwise generate the mac
6205
          nic_dict['mac'] = self.cfg.GenerateMAC()
6206
        else:
6207
          # or validate/reserve the current one
6208
          if self.cfg.IsMacInUse(nic_mac):
6209
            raise errors.OpPrereqError("MAC address %s already in use"
6210
                                       " in cluster" % nic_mac)
6211

    
6212
    # DISK processing
6213
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
6214
      raise errors.OpPrereqError("Disk operations not supported for"
6215
                                 " diskless instances")
6216
    for disk_op, disk_dict in self.op.disks:
6217
      if disk_op == constants.DDM_REMOVE:
6218
        if len(instance.disks) == 1:
6219
          raise errors.OpPrereqError("Cannot remove the last disk of"
6220
                                     " an instance")
6221
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
6222
        ins_l = ins_l[pnode]
6223
        msg = ins_l.fail_msg
6224
        if msg:
6225
          raise errors.OpPrereqError("Can't contact node %s: %s" %
6226
                                     (pnode, msg))
6227
        if instance.name in ins_l.payload:
6228
          raise errors.OpPrereqError("Instance is running, can't remove"
6229
                                     " disks.")
6230

    
6231
      if (disk_op == constants.DDM_ADD and
6232
          len(instance.nics) >= constants.MAX_DISKS):
6233
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
6234
                                   " add more" % constants.MAX_DISKS)
6235
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
6236
        # an existing disk
6237
        if disk_op < 0 or disk_op >= len(instance.disks):
6238
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
6239
                                     " are 0 to %d" %
6240
                                     (disk_op, len(instance.disks)))
6241

    
6242
    return
6243

    
6244
  def Exec(self, feedback_fn):
6245
    """Modifies an instance.
6246

6247
    All parameters take effect only at the next restart of the instance.
6248

6249
    """
6250
    # Process here the warnings from CheckPrereq, as we don't have a
6251
    # feedback_fn there.
6252
    for warn in self.warn:
6253
      feedback_fn("WARNING: %s" % warn)
6254

    
6255
    result = []
6256
    instance = self.instance
6257
    cluster = self.cluster
6258
    # disk changes
6259
    for disk_op, disk_dict in self.op.disks:
6260
      if disk_op == constants.DDM_REMOVE:
6261
        # remove the last disk
6262
        device = instance.disks.pop()
6263
        device_idx = len(instance.disks)
6264
        for node, disk in device.ComputeNodeTree(instance.primary_node):
6265
          self.cfg.SetDiskID(disk, node)
6266
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
6267
          if msg:
6268
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
6269
                            " continuing anyway", device_idx, node, msg)
6270
        result.append(("disk/%d" % device_idx, "remove"))
6271
      elif disk_op == constants.DDM_ADD:
6272
        # add a new disk
6273
        if instance.disk_template == constants.DT_FILE:
6274
          file_driver, file_path = instance.disks[0].logical_id
6275
          file_path = os.path.dirname(file_path)
6276
        else:
6277
          file_driver = file_path = None
6278
        disk_idx_base = len(instance.disks)
6279
        new_disk = _GenerateDiskTemplate(self,
6280
                                         instance.disk_template,
6281
                                         instance.name, instance.primary_node,
6282
                                         instance.secondary_nodes,
6283
                                         [disk_dict],
6284
                                         file_path,
6285
                                         file_driver,
6286
                                         disk_idx_base)[0]
6287
        instance.disks.append(new_disk)
6288
        info = _GetInstanceInfoText(instance)
6289

    
6290
        logging.info("Creating volume %s for instance %s",
6291
                     new_disk.iv_name, instance.name)
6292
        # Note: this needs to be kept in sync with _CreateDisks
6293
        #HARDCODE
6294
        for node in instance.all_nodes:
6295
          f_create = node == instance.primary_node
6296
          try:
6297
            _CreateBlockDev(self, node, instance, new_disk,
6298
                            f_create, info, f_create)
6299
          except errors.OpExecError, err:
6300
            self.LogWarning("Failed to create volume %s (%s) on"
6301
                            " node %s: %s",
6302
                            new_disk.iv_name, new_disk, node, err)
6303
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
6304
                       (new_disk.size, new_disk.mode)))
6305
      else:
6306
        # change a given disk
6307
        instance.disks[disk_op].mode = disk_dict['mode']
6308
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
6309
    # NIC changes
6310
    for nic_op, nic_dict in self.op.nics:
6311
      if nic_op == constants.DDM_REMOVE:
6312
        # remove the last nic
6313
        del instance.nics[-1]
6314
        result.append(("nic.%d" % len(instance.nics), "remove"))
6315
      elif nic_op == constants.DDM_ADD:
6316
        # mac and bridge should be set, by now
6317
        mac = nic_dict['mac']
6318
        ip = nic_dict.get('ip', None)
6319
        nicparams = self.nic_pinst[constants.DDM_ADD]
6320
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
6321
        instance.nics.append(new_nic)
6322
        result.append(("nic.%d" % (len(instance.nics) - 1),
6323
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
6324
                       (new_nic.mac, new_nic.ip,
6325
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
6326
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
6327
                       )))
6328
      else:
6329
        for key in 'mac', 'ip':
6330
          if key in nic_dict:
6331
            setattr(instance.nics[nic_op], key, nic_dict[key])
6332
        if nic_op in self.nic_pnew:
6333
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
6334
        for key, val in nic_dict.iteritems():
6335
          result.append(("nic.%s/%d" % (key, nic_op), val))
6336

    
6337
    # hvparams changes
6338
    if self.op.hvparams:
6339
      instance.hvparams = self.hv_inst
6340
      for key, val in self.op.hvparams.iteritems():
6341
        result.append(("hv/%s" % key, val))
6342

    
6343
    # beparams changes
6344
    if self.op.beparams:
6345
      instance.beparams = self.be_inst
6346
      for key, val in self.op.beparams.iteritems():
6347
        result.append(("be/%s" % key, val))
6348

    
6349
    self.cfg.Update(instance)
6350

    
6351
    return result
6352

    
6353

    
6354
class LUQueryExports(NoHooksLU):
6355
  """Query the exports list
6356

6357
  """
6358
  _OP_REQP = ['nodes']
6359
  REQ_BGL = False
6360

    
6361
  def ExpandNames(self):
6362
    self.needed_locks = {}
6363
    self.share_locks[locking.LEVEL_NODE] = 1
6364
    if not self.op.nodes:
6365
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6366
    else:
6367
      self.needed_locks[locking.LEVEL_NODE] = \
6368
        _GetWantedNodes(self, self.op.nodes)
6369

    
6370
  def CheckPrereq(self):
6371
    """Check prerequisites.
6372

6373
    """
6374
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
6375

    
6376
  def Exec(self, feedback_fn):
6377
    """Compute the list of all the exported system images.
6378

6379
    @rtype: dict
6380
    @return: a dictionary with the structure node->(export-list)
6381
        where export-list is a list of the instances exported on
6382
        that node.
6383

6384
    """
6385
    rpcresult = self.rpc.call_export_list(self.nodes)
6386
    result = {}
6387
    for node in rpcresult:
6388
      if rpcresult[node].fail_msg:
6389
        result[node] = False
6390
      else:
6391
        result[node] = rpcresult[node].payload
6392

    
6393
    return result
6394

    
6395

    
6396
class LUExportInstance(LogicalUnit):
6397
  """Export an instance to an image in the cluster.
6398

6399
  """
6400
  HPATH = "instance-export"
6401
  HTYPE = constants.HTYPE_INSTANCE
6402
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
6403
  REQ_BGL = False
6404

    
6405
  def ExpandNames(self):
6406
    self._ExpandAndLockInstance()
6407
    # FIXME: lock only instance primary and destination node
6408
    #
6409
    # Sad but true, for now we have do lock all nodes, as we don't know where
6410
    # the previous export might be, and and in this LU we search for it and
6411
    # remove it from its current node. In the future we could fix this by:
6412
    #  - making a tasklet to search (share-lock all), then create the new one,
6413
    #    then one to remove, after
6414
    #  - removing the removal operation altoghether
6415
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6416

    
6417
  def DeclareLocks(self, level):
6418
    """Last minute lock declaration."""
6419
    # All nodes are locked anyway, so nothing to do here.
6420

    
6421
  def BuildHooksEnv(self):
6422
    """Build hooks env.
6423

6424
    This will run on the master, primary node and target node.
6425

6426
    """
6427
    env = {
6428
      "EXPORT_NODE": self.op.target_node,
6429
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
6430
      }
6431
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6432
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
6433
          self.op.target_node]
6434
    return env, nl, nl
6435

    
6436
  def CheckPrereq(self):
6437
    """Check prerequisites.
6438

6439
    This checks that the instance and node names are valid.
6440

6441
    """
6442
    instance_name = self.op.instance_name
6443
    self.instance = self.cfg.GetInstanceInfo(instance_name)
6444
    assert self.instance is not None, \
6445
          "Cannot retrieve locked instance %s" % self.op.instance_name
6446
    _CheckNodeOnline(self, self.instance.primary_node)
6447

    
6448
    self.dst_node = self.cfg.GetNodeInfo(
6449
      self.cfg.ExpandNodeName(self.op.target_node))
6450

    
6451
    if self.dst_node is None:
6452
      # This is wrong node name, not a non-locked node
6453
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
6454
    _CheckNodeOnline(self, self.dst_node.name)
6455
    _CheckNodeNotDrained(self, self.dst_node.name)
6456

    
6457
    # instance disk type verification
6458
    for disk in self.instance.disks:
6459
      if disk.dev_type == constants.LD_FILE:
6460
        raise errors.OpPrereqError("Export not supported for instances with"
6461
                                   " file-based disks")
6462

    
6463
  def Exec(self, feedback_fn):
6464
    """Export an instance to an image in the cluster.
6465

6466
    """
6467
    instance = self.instance
6468
    dst_node = self.dst_node
6469
    src_node = instance.primary_node
6470
    if self.op.shutdown:
6471
      # shutdown the instance, but not the disks
6472
      result = self.rpc.call_instance_shutdown(src_node, instance)
6473
      result.Raise("Could not shutdown instance %s on"
6474
                   " node %s" % (instance.name, src_node))
6475

    
6476
    vgname = self.cfg.GetVGName()
6477

    
6478
    snap_disks = []
6479

    
6480
    # set the disks ID correctly since call_instance_start needs the
6481
    # correct drbd minor to create the symlinks
6482
    for disk in instance.disks:
6483
      self.cfg.SetDiskID(disk, src_node)
6484

    
6485
    try:
6486
      for idx, disk in enumerate(instance.disks):
6487
        # result.payload will be a snapshot of an lvm leaf of the one we passed
6488
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
6489
        msg = result.fail_msg
6490
        if msg:
6491
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
6492
                          idx, src_node, msg)
6493
          snap_disks.append(False)
6494
        else:
6495
          disk_id = (vgname, result.payload)
6496
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
6497
                                 logical_id=disk_id, physical_id=disk_id,
6498
                                 iv_name=disk.iv_name)
6499
          snap_disks.append(new_dev)
6500

    
6501
    finally:
6502
      if self.op.shutdown and instance.admin_up:
6503
        result = self.rpc.call_instance_start(src_node, instance, None, None)
6504
        msg = result.fail_msg
6505
        if msg:
6506
          _ShutdownInstanceDisks(self, instance)
6507
          raise errors.OpExecError("Could not start instance: %s" % msg)
6508

    
6509
    # TODO: check for size
6510

    
6511
    cluster_name = self.cfg.GetClusterName()
6512
    for idx, dev in enumerate(snap_disks):
6513
      if dev:
6514
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
6515
                                               instance, cluster_name, idx)
6516
        msg = result.fail_msg
6517
        if msg:
6518
          self.LogWarning("Could not export disk/%s from node %s to"
6519
                          " node %s: %s", idx, src_node, dst_node.name, msg)
6520
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
6521
        if msg:
6522
          self.LogWarning("Could not remove snapshot for disk/%d from node"
6523
                          " %s: %s", idx, src_node, msg)
6524

    
6525
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
6526
    msg = result.fail_msg
6527
    if msg:
6528
      self.LogWarning("Could not finalize export for instance %s"
6529
                      " on node %s: %s", instance.name, dst_node.name, msg)
6530

    
6531
    nodelist = self.cfg.GetNodeList()
6532
    nodelist.remove(dst_node.name)
6533

    
6534
    # on one-node clusters nodelist will be empty after the removal
6535
    # if we proceed the backup would be removed because OpQueryExports
6536
    # substitutes an empty list with the full cluster node list.
6537
    iname = instance.name
6538
    if nodelist:
6539
      exportlist = self.rpc.call_export_list(nodelist)
6540
      for node in exportlist:
6541
        if exportlist[node].fail_msg:
6542
          continue
6543
        if iname in exportlist[node].payload:
6544
          msg = self.rpc.call_export_remove(node, iname).fail_msg
6545
          if msg:
6546
            self.LogWarning("Could not remove older export for instance %s"
6547
                            " on node %s: %s", iname, node, msg)
6548

    
6549

    
6550
class LURemoveExport(NoHooksLU):
6551
  """Remove exports related to the named instance.
6552

6553
  """
6554
  _OP_REQP = ["instance_name"]
6555
  REQ_BGL = False
6556

    
6557
  def ExpandNames(self):
6558
    self.needed_locks = {}
6559
    # We need all nodes to be locked in order for RemoveExport to work, but we
6560
    # don't need to lock the instance itself, as nothing will happen to it (and
6561
    # we can remove exports also for a removed instance)
6562
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6563

    
6564
  def CheckPrereq(self):
6565
    """Check prerequisites.
6566
    """
6567
    pass
6568

    
6569
  def Exec(self, feedback_fn):
6570
    """Remove any export.
6571

6572
    """
6573
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
6574
    # If the instance was not found we'll try with the name that was passed in.
6575
    # This will only work if it was an FQDN, though.
6576
    fqdn_warn = False
6577
    if not instance_name:
6578
      fqdn_warn = True
6579
      instance_name = self.op.instance_name
6580

    
6581
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6582
    exportlist = self.rpc.call_export_list(locked_nodes)
6583
    found = False
6584
    for node in exportlist:
6585
      msg = exportlist[node].fail_msg
6586
      if msg:
6587
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
6588
        continue
6589
      if instance_name in exportlist[node].payload:
6590
        found = True
6591
        result = self.rpc.call_export_remove(node, instance_name)
6592
        msg = result.fail_msg
6593
        if msg:
6594
          logging.error("Could not remove export for instance %s"
6595
                        " on node %s: %s", instance_name, node, msg)
6596

    
6597
    if fqdn_warn and not found:
6598
      feedback_fn("Export not found. If trying to remove an export belonging"
6599
                  " to a deleted instance please use its Fully Qualified"
6600
                  " Domain Name.")
6601

    
6602

    
6603
class TagsLU(NoHooksLU):
6604
  """Generic tags LU.
6605

6606
  This is an abstract class which is the parent of all the other tags LUs.
6607

6608
  """
6609

    
6610
  def ExpandNames(self):
6611
    self.needed_locks = {}
6612
    if self.op.kind == constants.TAG_NODE:
6613
      name = self.cfg.ExpandNodeName(self.op.name)
6614
      if name is None:
6615
        raise errors.OpPrereqError("Invalid node name (%s)" %
6616
                                   (self.op.name,))
6617
      self.op.name = name
6618
      self.needed_locks[locking.LEVEL_NODE] = name
6619
    elif self.op.kind == constants.TAG_INSTANCE:
6620
      name = self.cfg.ExpandInstanceName(self.op.name)
6621
      if name is None:
6622
        raise errors.OpPrereqError("Invalid instance name (%s)" %
6623
                                   (self.op.name,))
6624
      self.op.name = name
6625
      self.needed_locks[locking.LEVEL_INSTANCE] = name
6626

    
6627
  def CheckPrereq(self):
6628
    """Check prerequisites.
6629

6630
    """
6631
    if self.op.kind == constants.TAG_CLUSTER:
6632
      self.target = self.cfg.GetClusterInfo()
6633
    elif self.op.kind == constants.TAG_NODE:
6634
      self.target = self.cfg.GetNodeInfo(self.op.name)
6635
    elif self.op.kind == constants.TAG_INSTANCE:
6636
      self.target = self.cfg.GetInstanceInfo(self.op.name)
6637
    else:
6638
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
6639
                                 str(self.op.kind))
6640

    
6641

    
6642
class LUGetTags(TagsLU):
6643
  """Returns the tags of a given object.
6644

6645
  """
6646
  _OP_REQP = ["kind", "name"]
6647
  REQ_BGL = False
6648

    
6649
  def Exec(self, feedback_fn):
6650
    """Returns the tag list.
6651

6652
    """
6653
    return list(self.target.GetTags())
6654

    
6655

    
6656
class LUSearchTags(NoHooksLU):
6657
  """Searches the tags for a given pattern.
6658

6659
  """
6660
  _OP_REQP = ["pattern"]
6661
  REQ_BGL = False
6662

    
6663
  def ExpandNames(self):
6664
    self.needed_locks = {}
6665

    
6666
  def CheckPrereq(self):
6667
    """Check prerequisites.
6668

6669
    This checks the pattern passed for validity by compiling it.
6670

6671
    """
6672
    try:
6673
      self.re = re.compile(self.op.pattern)
6674
    except re.error, err:
6675
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
6676
                                 (self.op.pattern, err))
6677

    
6678
  def Exec(self, feedback_fn):
6679
    """Returns the tag list.
6680

6681
    """
6682
    cfg = self.cfg
6683
    tgts = [("/cluster", cfg.GetClusterInfo())]
6684
    ilist = cfg.GetAllInstancesInfo().values()
6685
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
6686
    nlist = cfg.GetAllNodesInfo().values()
6687
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
6688
    results = []
6689
    for path, target in tgts:
6690
      for tag in target.GetTags():
6691
        if self.re.search(tag):
6692
          results.append((path, tag))
6693
    return results
6694

    
6695

    
6696
class LUAddTags(TagsLU):
6697
  """Sets a tag on a given object.
6698

6699
  """
6700
  _OP_REQP = ["kind", "name", "tags"]
6701
  REQ_BGL = False
6702

    
6703
  def CheckPrereq(self):
6704
    """Check prerequisites.
6705

6706
    This checks the type and length of the tag name and value.
6707

6708
    """
6709
    TagsLU.CheckPrereq(self)
6710
    for tag in self.op.tags:
6711
      objects.TaggableObject.ValidateTag(tag)
6712

    
6713
  def Exec(self, feedback_fn):
6714
    """Sets the tag.
6715

6716
    """
6717
    try:
6718
      for tag in self.op.tags:
6719
        self.target.AddTag(tag)
6720
    except errors.TagError, err:
6721
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
6722
    try:
6723
      self.cfg.Update(self.target)
6724
    except errors.ConfigurationError:
6725
      raise errors.OpRetryError("There has been a modification to the"
6726
                                " config file and the operation has been"
6727
                                " aborted. Please retry.")
6728

    
6729

    
6730
class LUDelTags(TagsLU):
6731
  """Delete a list of tags from a given object.
6732

6733
  """
6734
  _OP_REQP = ["kind", "name", "tags"]
6735
  REQ_BGL = False
6736

    
6737
  def CheckPrereq(self):
6738
    """Check prerequisites.
6739

6740
    This checks that we have the given tag.
6741

6742
    """
6743
    TagsLU.CheckPrereq(self)
6744
    for tag in self.op.tags:
6745
      objects.TaggableObject.ValidateTag(tag)
6746
    del_tags = frozenset(self.op.tags)
6747
    cur_tags = self.target.GetTags()
6748
    if not del_tags <= cur_tags:
6749
      diff_tags = del_tags - cur_tags
6750
      diff_names = ["'%s'" % tag for tag in diff_tags]
6751
      diff_names.sort()
6752
      raise errors.OpPrereqError("Tag(s) %s not found" %
6753
                                 (",".join(diff_names)))
6754

    
6755
  def Exec(self, feedback_fn):
6756
    """Remove the tag from the object.
6757

6758
    """
6759
    for tag in self.op.tags:
6760
      self.target.RemoveTag(tag)
6761
    try:
6762
      self.cfg.Update(self.target)
6763
    except errors.ConfigurationError:
6764
      raise errors.OpRetryError("There has been a modification to the"
6765
                                " config file and the operation has been"
6766
                                " aborted. Please retry.")
6767

    
6768

    
6769
class LUTestDelay(NoHooksLU):
6770
  """Sleep for a specified amount of time.
6771

6772
  This LU sleeps on the master and/or nodes for a specified amount of
6773
  time.
6774

6775
  """
6776
  _OP_REQP = ["duration", "on_master", "on_nodes"]
6777
  REQ_BGL = False
6778

    
6779
  def ExpandNames(self):
6780
    """Expand names and set required locks.
6781

6782
    This expands the node list, if any.
6783

6784
    """
6785
    self.needed_locks = {}
6786
    if self.op.on_nodes:
6787
      # _GetWantedNodes can be used here, but is not always appropriate to use
6788
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
6789
      # more information.
6790
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
6791
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
6792

    
6793
  def CheckPrereq(self):
6794
    """Check prerequisites.
6795

6796
    """
6797

    
6798
  def Exec(self, feedback_fn):
6799
    """Do the actual sleep.
6800

6801
    """
6802
    if self.op.on_master:
6803
      if not utils.TestDelay(self.op.duration):
6804
        raise errors.OpExecError("Error during master delay test")
6805
    if self.op.on_nodes:
6806
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
6807
      for node, node_result in result.items():
6808
        node_result.Raise("Failure during rpc call to node %s" % node)
6809

    
6810

    
6811
class IAllocator(object):
6812
  """IAllocator framework.
6813

6814
  An IAllocator instance has three sets of attributes:
6815
    - cfg that is needed to query the cluster
6816
    - input data (all members of the _KEYS class attribute are required)
6817
    - four buffer attributes (in|out_data|text), that represent the
6818
      input (to the external script) in text and data structure format,
6819
      and the output from it, again in two formats
6820
    - the result variables from the script (success, info, nodes) for
6821
      easy usage
6822

6823
  """
6824
  _ALLO_KEYS = [
6825
    "mem_size", "disks", "disk_template",
6826
    "os", "tags", "nics", "vcpus", "hypervisor",
6827
    ]
6828
  _RELO_KEYS = [
6829
    "relocate_from",
6830
    ]
6831

    
6832
  def __init__(self, lu, mode, name, **kwargs):
6833
    self.lu = lu
6834
    # init buffer variables
6835
    self.in_text = self.out_text = self.in_data = self.out_data = None
6836
    # init all input fields so that pylint is happy
6837
    self.mode = mode
6838
    self.name = name
6839
    self.mem_size = self.disks = self.disk_template = None
6840
    self.os = self.tags = self.nics = self.vcpus = None
6841
    self.hypervisor = None
6842
    self.relocate_from = None
6843
    # computed fields
6844
    self.required_nodes = None
6845
    # init result fields
6846
    self.success = self.info = self.nodes = None
6847
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6848
      keyset = self._ALLO_KEYS
6849
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6850
      keyset = self._RELO_KEYS
6851
    else:
6852
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
6853
                                   " IAllocator" % self.mode)
6854
    for key in kwargs:
6855
      if key not in keyset:
6856
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
6857
                                     " IAllocator" % key)
6858
      setattr(self, key, kwargs[key])
6859
    for key in keyset:
6860
      if key not in kwargs:
6861
        raise errors.ProgrammerError("Missing input parameter '%s' to"
6862
                                     " IAllocator" % key)
6863
    self._BuildInputData()
6864

    
6865
  def _ComputeClusterData(self):
6866
    """Compute the generic allocator input data.
6867

6868
    This is the data that is independent of the actual operation.
6869

6870
    """
6871
    cfg = self.lu.cfg
6872
    cluster_info = cfg.GetClusterInfo()
6873
    # cluster data
6874
    data = {
6875
      "version": constants.IALLOCATOR_VERSION,
6876
      "cluster_name": cfg.GetClusterName(),
6877
      "cluster_tags": list(cluster_info.GetTags()),
6878
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
6879
      # we don't have job IDs
6880
      }
6881
    iinfo = cfg.GetAllInstancesInfo().values()
6882
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
6883

    
6884
    # node data
6885
    node_results = {}
6886
    node_list = cfg.GetNodeList()
6887

    
6888
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
6889
      hypervisor_name = self.hypervisor
6890
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
6891
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
6892

    
6893
    node_data = self.lu.rpc.call_node_info(node_list, cfg.GetVGName(),
6894
                                           hypervisor_name)
6895
    node_iinfo = self.lu.rpc.call_all_instances_info(node_list,
6896
                       cluster_info.enabled_hypervisors)
6897
    for nname, nresult in node_data.items():
6898
      # first fill in static (config-based) values
6899
      ninfo = cfg.GetNodeInfo(nname)
6900
      pnr = {
6901
        "tags": list(ninfo.GetTags()),
6902
        "primary_ip": ninfo.primary_ip,
6903
        "secondary_ip": ninfo.secondary_ip,
6904
        "offline": ninfo.offline,
6905
        "drained": ninfo.drained,
6906
        "master_candidate": ninfo.master_candidate,
6907
        }
6908

    
6909
      if not ninfo.offline:
6910
        nresult.Raise("Can't get data for node %s" % nname)
6911
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
6912
                                nname)
6913
        remote_info = nresult.payload
6914
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
6915
                     'vg_size', 'vg_free', 'cpu_total']:
6916
          if attr not in remote_info:
6917
            raise errors.OpExecError("Node '%s' didn't return attribute"
6918
                                     " '%s'" % (nname, attr))
6919
          if not isinstance(remote_info[attr], int):
6920
            raise errors.OpExecError("Node '%s' returned invalid value"
6921
                                     " for '%s': %s" %
6922
                                     (nname, attr, remote_info[attr]))
6923
        # compute memory used by primary instances
6924
        i_p_mem = i_p_up_mem = 0
6925
        for iinfo, beinfo in i_list:
6926
          if iinfo.primary_node == nname:
6927
            i_p_mem += beinfo[constants.BE_MEMORY]
6928
            if iinfo.name not in node_iinfo[nname].payload:
6929
              i_used_mem = 0
6930
            else:
6931
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
6932
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
6933
            remote_info['memory_free'] -= max(0, i_mem_diff)
6934

    
6935
            if iinfo.admin_up:
6936
              i_p_up_mem += beinfo[constants.BE_MEMORY]
6937

    
6938
        # compute memory used by instances
6939
        pnr_dyn = {
6940
          "total_memory": remote_info['memory_total'],
6941
          "reserved_memory": remote_info['memory_dom0'],
6942
          "free_memory": remote_info['memory_free'],
6943
          "total_disk": remote_info['vg_size'],
6944
          "free_disk": remote_info['vg_free'],
6945
          "total_cpus": remote_info['cpu_total'],
6946
          "i_pri_memory": i_p_mem,
6947
          "i_pri_up_memory": i_p_up_mem,
6948
          }
6949
        pnr.update(pnr_dyn)
6950

    
6951
      node_results[nname] = pnr
6952
    data["nodes"] = node_results
6953

    
6954
    # instance data
6955
    instance_data = {}
6956
    for iinfo, beinfo in i_list:
6957
      nic_data = []
6958
      for nic in iinfo.nics:
6959
        filled_params = objects.FillDict(
6960
            cluster_info.nicparams[constants.PP_DEFAULT],
6961
            nic.nicparams)
6962
        nic_dict = {"mac": nic.mac,
6963
                    "ip": nic.ip,
6964
                    "mode": filled_params[constants.NIC_MODE],
6965
                    "link": filled_params[constants.NIC_LINK],
6966
                   }
6967
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
6968
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
6969
        nic_data.append(nic_dict)
6970
      pir = {
6971
        "tags": list(iinfo.GetTags()),
6972
        "admin_up": iinfo.admin_up,
6973
        "vcpus": beinfo[constants.BE_VCPUS],
6974
        "memory": beinfo[constants.BE_MEMORY],
6975
        "os": iinfo.os,
6976
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
6977
        "nics": nic_data,
6978
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
6979
        "disk_template": iinfo.disk_template,
6980
        "hypervisor": iinfo.hypervisor,
6981
        }
6982
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
6983
                                                 pir["disks"])
6984
      instance_data[iinfo.name] = pir
6985

    
6986
    data["instances"] = instance_data
6987

    
6988
    self.in_data = data
6989

    
6990
  def _AddNewInstance(self):
6991
    """Add new instance data to allocator structure.
6992

6993
    This in combination with _AllocatorGetClusterData will create the
6994
    correct structure needed as input for the allocator.
6995

6996
    The checks for the completeness of the opcode must have already been
6997
    done.
6998

6999
    """
7000
    data = self.in_data
7001

    
7002
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7003

    
7004
    if self.disk_template in constants.DTS_NET_MIRROR:
7005
      self.required_nodes = 2
7006
    else:
7007
      self.required_nodes = 1
7008
    request = {
7009
      "type": "allocate",
7010
      "name": self.name,
7011
      "disk_template": self.disk_template,
7012
      "tags": self.tags,
7013
      "os": self.os,
7014
      "vcpus": self.vcpus,
7015
      "memory": self.mem_size,
7016
      "disks": self.disks,
7017
      "disk_space_total": disk_space,
7018
      "nics": self.nics,
7019
      "required_nodes": self.required_nodes,
7020
      }
7021
    data["request"] = request
7022

    
7023
  def _AddRelocateInstance(self):
7024
    """Add relocate instance data to allocator structure.
7025

7026
    This in combination with _IAllocatorGetClusterData will create the
7027
    correct structure needed as input for the allocator.
7028

7029
    The checks for the completeness of the opcode must have already been
7030
    done.
7031

7032
    """
7033
    instance = self.lu.cfg.GetInstanceInfo(self.name)
7034
    if instance is None:
7035
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7036
                                   " IAllocator" % self.name)
7037

    
7038
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7039
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7040

    
7041
    if len(instance.secondary_nodes) != 1:
7042
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7043

    
7044
    self.required_nodes = 1
7045
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7046
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7047

    
7048
    request = {
7049
      "type": "relocate",
7050
      "name": self.name,
7051
      "disk_space_total": disk_space,
7052
      "required_nodes": self.required_nodes,
7053
      "relocate_from": self.relocate_from,
7054
      }
7055
    self.in_data["request"] = request
7056

    
7057
  def _BuildInputData(self):
7058
    """Build input data structures.
7059

7060
    """
7061
    self._ComputeClusterData()
7062

    
7063
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7064
      self._AddNewInstance()
7065
    else:
7066
      self._AddRelocateInstance()
7067

    
7068
    self.in_text = serializer.Dump(self.in_data)
7069

    
7070
  def Run(self, name, validate=True, call_fn=None):
7071
    """Run an instance allocator and return the results.
7072

7073
    """
7074
    if call_fn is None:
7075
      call_fn = self.lu.rpc.call_iallocator_runner
7076
    data = self.in_text
7077

    
7078
    result = call_fn(self.lu.cfg.GetMasterNode(), name, self.in_text)
7079
    result.Raise("Failure while running the iallocator script")
7080

    
7081
    self.out_text = result.payload
7082
    if validate:
7083
      self._ValidateResult()
7084

    
7085
  def _ValidateResult(self):
7086
    """Process the allocator results.
7087

7088
    This will process and if successful save the result in
7089
    self.out_data and the other parameters.
7090

7091
    """
7092
    try:
7093
      rdict = serializer.Load(self.out_text)
7094
    except Exception, err:
7095
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
7096

    
7097
    if not isinstance(rdict, dict):
7098
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
7099

    
7100
    for key in "success", "info", "nodes":
7101
      if key not in rdict:
7102
        raise errors.OpExecError("Can't parse iallocator results:"
7103
                                 " missing key '%s'" % key)
7104
      setattr(self, key, rdict[key])
7105

    
7106
    if not isinstance(rdict["nodes"], list):
7107
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
7108
                               " is not a list")
7109
    self.out_data = rdict
7110

    
7111

    
7112
class LUTestAllocator(NoHooksLU):
7113
  """Run allocator tests.
7114

7115
  This LU runs the allocator tests
7116

7117
  """
7118
  _OP_REQP = ["direction", "mode", "name"]
7119

    
7120
  def CheckPrereq(self):
7121
    """Check prerequisites.
7122

7123
    This checks the opcode parameters depending on the director and mode test.
7124

7125
    """
7126
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7127
      for attr in ["name", "mem_size", "disks", "disk_template",
7128
                   "os", "tags", "nics", "vcpus"]:
7129
        if not hasattr(self.op, attr):
7130
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
7131
                                     attr)
7132
      iname = self.cfg.ExpandInstanceName(self.op.name)
7133
      if iname is not None:
7134
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
7135
                                   iname)
7136
      if not isinstance(self.op.nics, list):
7137
        raise errors.OpPrereqError("Invalid parameter 'nics'")
7138
      for row in self.op.nics:
7139
        if (not isinstance(row, dict) or
7140
            "mac" not in row or
7141
            "ip" not in row or
7142
            "bridge" not in row):
7143
          raise errors.OpPrereqError("Invalid contents of the"
7144
                                     " 'nics' parameter")
7145
      if not isinstance(self.op.disks, list):
7146
        raise errors.OpPrereqError("Invalid parameter 'disks'")
7147
      for row in self.op.disks:
7148
        if (not isinstance(row, dict) or
7149
            "size" not in row or
7150
            not isinstance(row["size"], int) or
7151
            "mode" not in row or
7152
            row["mode"] not in ['r', 'w']):
7153
          raise errors.OpPrereqError("Invalid contents of the"
7154
                                     " 'disks' parameter")
7155
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
7156
        self.op.hypervisor = self.cfg.GetHypervisorType()
7157
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
7158
      if not hasattr(self.op, "name"):
7159
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
7160
      fname = self.cfg.ExpandInstanceName(self.op.name)
7161
      if fname is None:
7162
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
7163
                                   self.op.name)
7164
      self.op.name = fname
7165
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
7166
    else:
7167
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
7168
                                 self.op.mode)
7169

    
7170
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
7171
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
7172
        raise errors.OpPrereqError("Missing allocator name")
7173
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
7174
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
7175
                                 self.op.direction)
7176

    
7177
  def Exec(self, feedback_fn):
7178
    """Run the allocator test.
7179

7180
    """
7181
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
7182
      ial = IAllocator(self,
7183
                       mode=self.op.mode,
7184
                       name=self.op.name,
7185
                       mem_size=self.op.mem_size,
7186
                       disks=self.op.disks,
7187
                       disk_template=self.op.disk_template,
7188
                       os=self.op.os,
7189
                       tags=self.op.tags,
7190
                       nics=self.op.nics,
7191
                       vcpus=self.op.vcpus,
7192
                       hypervisor=self.op.hypervisor,
7193
                       )
7194
    else:
7195
      ial = IAllocator(self,
7196
                       mode=self.op.mode,
7197
                       name=self.op.name,
7198
                       relocate_from=list(self.relocate_from),
7199
                       )
7200

    
7201
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
7202
      result = ial.in_text
7203
    else:
7204
      ial.Run(self.op.allocator, validate=False)
7205
      result = ial.out_text
7206
    return result