Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 34d657ba

History | View | Annotate | Download (289.9 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool()
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _CheckNicsBridgesExist(lu, target_nics, target_node,
690
                               profile=constants.PP_DEFAULT):
691
  """Check that the brigdes needed by a list of nics exist.
692

693
  """
694
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
695
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
696
                for nic in target_nics]
697
  brlist = [params[constants.NIC_LINK] for params in paramslist
698
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
699
  if brlist:
700
    result = lu.rpc.call_bridges_exist(target_node, brlist)
701
    result.Raise("Error checking bridges on destination node '%s'" %
702
                 target_node, prereq=True)
703

    
704

    
705
def _CheckInstanceBridgesExist(lu, instance, node=None):
706
  """Check that the brigdes needed by an instance exist.
707

708
  """
709
  if node is None:
710
    node = instance.primary_node
711
  _CheckNicsBridgesExist(lu, instance.nics, node)
712

    
713

    
714
def _GetNodeInstancesInner(cfg, fn):
715
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
716

    
717

    
718
def _GetNodeInstances(cfg, node_name):
719
  """Returns a list of all primary and secondary instances on a node.
720

721
  """
722

    
723
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
724

    
725

    
726
def _GetNodePrimaryInstances(cfg, node_name):
727
  """Returns primary instances on a node.
728

729
  """
730
  return _GetNodeInstancesInner(cfg,
731
                                lambda inst: node_name == inst.primary_node)
732

    
733

    
734
def _GetNodeSecondaryInstances(cfg, node_name):
735
  """Returns secondary instances on a node.
736

737
  """
738
  return _GetNodeInstancesInner(cfg,
739
                                lambda inst: node_name in inst.secondary_nodes)
740

    
741

    
742
def _GetStorageTypeArgs(cfg, storage_type):
743
  """Returns the arguments for a storage type.
744

745
  """
746
  # Special case for file storage
747
  if storage_type == constants.ST_FILE:
748
    # storage.FileStorage wants a list of storage directories
749
    return [[cfg.GetFileStorageDir()]]
750

    
751
  return []
752

    
753

    
754
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
755
  faulty = []
756

    
757
  for dev in instance.disks:
758
    cfg.SetDiskID(dev, node_name)
759

    
760
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
761
  result.Raise("Failed to get disk status from node %s" % node_name,
762
               prereq=prereq)
763

    
764
  for idx, bdev_status in enumerate(result.payload):
765
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
766
      faulty.append(idx)
767

    
768
  return faulty
769

    
770

    
771
class LUPostInitCluster(LogicalUnit):
772
  """Logical unit for running hooks after cluster initialization.
773

774
  """
775
  HPATH = "cluster-init"
776
  HTYPE = constants.HTYPE_CLUSTER
777
  _OP_REQP = []
778

    
779
  def BuildHooksEnv(self):
780
    """Build hooks env.
781

782
    """
783
    env = {"OP_TARGET": self.cfg.GetClusterName()}
784
    mn = self.cfg.GetMasterNode()
785
    return env, [], [mn]
786

    
787
  def CheckPrereq(self):
788
    """No prerequisites to check.
789

790
    """
791
    return True
792

    
793
  def Exec(self, feedback_fn):
794
    """Nothing to do.
795

796
    """
797
    return True
798

    
799

    
800
class LUDestroyCluster(LogicalUnit):
801
  """Logical unit for destroying the cluster.
802

803
  """
804
  HPATH = "cluster-destroy"
805
  HTYPE = constants.HTYPE_CLUSTER
806
  _OP_REQP = []
807

    
808
  def BuildHooksEnv(self):
809
    """Build hooks env.
810

811
    """
812
    env = {"OP_TARGET": self.cfg.GetClusterName()}
813
    return env, [], []
814

    
815
  def CheckPrereq(self):
816
    """Check prerequisites.
817

818
    This checks whether the cluster is empty.
819

820
    Any errors are signaled by raising errors.OpPrereqError.
821

822
    """
823
    master = self.cfg.GetMasterNode()
824

    
825
    nodelist = self.cfg.GetNodeList()
826
    if len(nodelist) != 1 or nodelist[0] != master:
827
      raise errors.OpPrereqError("There are still %d node(s) in"
828
                                 " this cluster." % (len(nodelist) - 1))
829
    instancelist = self.cfg.GetInstanceList()
830
    if instancelist:
831
      raise errors.OpPrereqError("There are still %d instance(s) in"
832
                                 " this cluster." % len(instancelist))
833

    
834
  def Exec(self, feedback_fn):
835
    """Destroys the cluster.
836

837
    """
838
    master = self.cfg.GetMasterNode()
839

    
840
    # Run post hooks on master node before it's removed
841
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
842
    try:
843
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
844
    except:
845
      self.LogWarning("Errors occurred running hooks on %s" % master)
846

    
847
    result = self.rpc.call_node_stop_master(master, False)
848
    result.Raise("Could not disable the master role")
849
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
850
    utils.CreateBackup(priv_key)
851
    utils.CreateBackup(pub_key)
852
    return master
853

    
854

    
855
class LUVerifyCluster(LogicalUnit):
856
  """Verifies the cluster status.
857

858
  """
859
  HPATH = "cluster-verify"
860
  HTYPE = constants.HTYPE_CLUSTER
861
  _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
862
  REQ_BGL = False
863

    
864
  TCLUSTER = "cluster"
865
  TNODE = "node"
866
  TINSTANCE = "instance"
867

    
868
  ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
869
  EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
870
  EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
871
  EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
872
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
873
  EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
874
  EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
875
  ENODEDRBD = (TNODE, "ENODEDRBD")
876
  ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
877
  ENODEHOOKS = (TNODE, "ENODEHOOKS")
878
  ENODEHV = (TNODE, "ENODEHV")
879
  ENODELVM = (TNODE, "ENODELVM")
880
  ENODEN1 = (TNODE, "ENODEN1")
881
  ENODENET = (TNODE, "ENODENET")
882
  ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
883
  ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
884
  ENODERPC = (TNODE, "ENODERPC")
885
  ENODESSH = (TNODE, "ENODESSH")
886
  ENODEVERSION = (TNODE, "ENODEVERSION")
887

    
888
  ETYPE_FIELD = "code"
889
  ETYPE_ERROR = "ERROR"
890
  ETYPE_WARNING = "WARNING"
891

    
892
  def ExpandNames(self):
893
    self.needed_locks = {
894
      locking.LEVEL_NODE: locking.ALL_SET,
895
      locking.LEVEL_INSTANCE: locking.ALL_SET,
896
    }
897
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
898

    
899
  def _Error(self, ecode, item, msg, *args, **kwargs):
900
    """Format an error message.
901

902
    Based on the opcode's error_codes parameter, either format a
903
    parseable error code, or a simpler error string.
904

905
    This must be called only from Exec and functions called from Exec.
906

907
    """
908
    ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
909
    itype, etxt = ecode
910
    # first complete the msg
911
    if args:
912
      msg = msg % args
913
    # then format the whole message
914
    if self.op.error_codes:
915
      msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
916
    else:
917
      if item:
918
        item = " " + item
919
      else:
920
        item = ""
921
      msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
922
    # and finally report it via the feedback_fn
923
    self._feedback_fn("  - %s" % msg)
924

    
925
  def _ErrorIf(self, cond, *args, **kwargs):
926
    """Log an error message if the passed condition is True.
927

928
    """
929
    cond = bool(cond) or self.op.debug_simulate_errors
930
    if cond:
931
      self._Error(*args, **kwargs)
932
    # do not mark the operation as failed for WARN cases only
933
    if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
934
      self.bad = self.bad or cond
935

    
936
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
937
                  node_result, master_files, drbd_map, vg_name):
938
    """Run multiple tests against a node.
939

940
    Test list:
941

942
      - compares ganeti version
943
      - checks vg existence and size > 20G
944
      - checks config file checksum
945
      - checks ssh to other nodes
946

947
    @type nodeinfo: L{objects.Node}
948
    @param nodeinfo: the node to check
949
    @param file_list: required list of files
950
    @param local_cksum: dictionary of local files and their checksums
951
    @param node_result: the results from the node
952
    @param master_files: list of files that only masters should have
953
    @param drbd_map: the useddrbd minors for this node, in
954
        form of minor: (instance, must_exist) which correspond to instances
955
        and their running status
956
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
957

958
    """
959
    node = nodeinfo.name
960
    _ErrorIf = self._ErrorIf
961

    
962
    # main result, node_result should be a non-empty dict
963
    test = not node_result or not isinstance(node_result, dict)
964
    _ErrorIf(test, self.ENODERPC, node,
965
                  "unable to verify node: no data returned")
966
    if test:
967
      return
968

    
969
    # compares ganeti version
970
    local_version = constants.PROTOCOL_VERSION
971
    remote_version = node_result.get('version', None)
972
    test = not (remote_version and
973
                isinstance(remote_version, (list, tuple)) and
974
                len(remote_version) == 2)
975
    _ErrorIf(test, self.ENODERPC, node,
976
             "connection to node returned invalid data")
977
    if test:
978
      return
979

    
980
    test = local_version != remote_version[0]
981
    _ErrorIf(test, self.ENODEVERSION, node,
982
             "incompatible protocol versions: master %s,"
983
             " node %s", local_version, remote_version[0])
984
    if test:
985
      return
986

    
987
    # node seems compatible, we can actually try to look into its results
988

    
989
    # full package version
990
    self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
991
                  self.ENODEVERSION, node,
992
                  "software version mismatch: master %s, node %s",
993
                  constants.RELEASE_VERSION, remote_version[1],
994
                  code=self.ETYPE_WARNING)
995

    
996
    # checks vg existence and size > 20G
997
    if vg_name is not None:
998
      vglist = node_result.get(constants.NV_VGLIST, None)
999
      test = not vglist
1000
      _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1001
      if not test:
1002
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1003
                                              constants.MIN_VG_SIZE)
1004
        _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1005

    
1006
    # checks config file checksum
1007

    
1008
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
1009
    test = not isinstance(remote_cksum, dict)
1010
    _ErrorIf(test, self.ENODEFILECHECK, node,
1011
             "node hasn't returned file checksum data")
1012
    if not test:
1013
      for file_name in file_list:
1014
        node_is_mc = nodeinfo.master_candidate
1015
        must_have = (file_name not in master_files) or node_is_mc
1016
        # missing
1017
        test1 = file_name not in remote_cksum
1018
        # invalid checksum
1019
        test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1020
        # existing and good
1021
        test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1022
        _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1023
                 "file '%s' missing", file_name)
1024
        _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1025
                 "file '%s' has wrong checksum", file_name)
1026
        # not candidate and this is not a must-have file
1027
        _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1028
                 "file '%s' should not exist on non master"
1029
                 " candidates (and the file is outdated)", file_name)
1030
        # all good, except non-master/non-must have combination
1031
        _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1032
                 "file '%s' should not exist"
1033
                 " on non master candidates", file_name)
1034

    
1035
    # checks ssh to any
1036

    
1037
    test = constants.NV_NODELIST not in node_result
1038
    _ErrorIf(test, self.ENODESSH, node,
1039
             "node hasn't returned node ssh connectivity data")
1040
    if not test:
1041
      if node_result[constants.NV_NODELIST]:
1042
        for a_node, a_msg in node_result[constants.NV_NODELIST].items():
1043
          _ErrorIf(True, self.ENODESSH, node,
1044
                   "ssh communication with node '%s': %s", a_node, a_msg)
1045

    
1046
    test = constants.NV_NODENETTEST not in node_result
1047
    _ErrorIf(test, self.ENODENET, node,
1048
             "node hasn't returned node tcp connectivity data")
1049
    if not test:
1050
      if node_result[constants.NV_NODENETTEST]:
1051
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
1052
        for anode in nlist:
1053
          _ErrorIf(True, self.ENODENET, node,
1054
                   "tcp communication with node '%s': %s",
1055
                   anode, node_result[constants.NV_NODENETTEST][anode])
1056

    
1057
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
1058
    if isinstance(hyp_result, dict):
1059
      for hv_name, hv_result in hyp_result.iteritems():
1060
        test = hv_result is not None
1061
        _ErrorIf(test, self.ENODEHV, node,
1062
                 "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1063

    
1064
    # check used drbd list
1065
    if vg_name is not None:
1066
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1067
      test = not isinstance(used_minors, (tuple, list))
1068
      _ErrorIf(test, self.ENODEDRBD, node,
1069
               "cannot parse drbd status file: %s", str(used_minors))
1070
      if not test:
1071
        for minor, (iname, must_exist) in drbd_map.items():
1072
          test = minor not in used_minors and must_exist
1073
          _ErrorIf(test, self.ENODEDRBD, node,
1074
                   "drbd minor %d of instance %s is not active",
1075
                   minor, iname)
1076
        for minor in used_minors:
1077
          test = minor not in drbd_map
1078
          _ErrorIf(test, self.ENODEDRBD, node,
1079
                   "unallocated drbd minor %d is in use", minor)
1080

    
1081
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1082
                      node_instance, n_offline):
1083
    """Verify an instance.
1084

1085
    This function checks to see if the required block devices are
1086
    available on the instance's node.
1087

1088
    """
1089
    _ErrorIf = self._ErrorIf
1090
    node_current = instanceconfig.primary_node
1091

    
1092
    node_vol_should = {}
1093
    instanceconfig.MapLVsByNode(node_vol_should)
1094

    
1095
    for node in node_vol_should:
1096
      if node in n_offline:
1097
        # ignore missing volumes on offline nodes
1098
        continue
1099
      for volume in node_vol_should[node]:
1100
        test = node not in node_vol_is or volume not in node_vol_is[node]
1101
        _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1102
                 "volume %s missing on node %s", volume, node)
1103

    
1104
    if instanceconfig.admin_up:
1105
      test = ((node_current not in node_instance or
1106
               not instance in node_instance[node_current]) and
1107
              node_current not in n_offline)
1108
      _ErrorIf(test, self.EINSTANCEDOWN, instance,
1109
               "instance not running on its primary node %s",
1110
               node_current)
1111

    
1112
    for node in node_instance:
1113
      if (not node == node_current):
1114
        test = instance in node_instance[node]
1115
        _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1116
                 "instance should not run on node %s", node)
1117

    
1118
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is):
1119
    """Verify if there are any unknown volumes in the cluster.
1120

1121
    The .os, .swap and backup volumes are ignored. All other volumes are
1122
    reported as unknown.
1123

1124
    """
1125
    for node in node_vol_is:
1126
      for volume in node_vol_is[node]:
1127
        test = (node not in node_vol_should or
1128
                volume not in node_vol_should[node])
1129
        self._ErrorIf(test, self.ENODEORPHANLV, node,
1130
                      "volume %s is unknown", volume)
1131

    
1132
  def _VerifyOrphanInstances(self, instancelist, node_instance):
1133
    """Verify the list of running instances.
1134

1135
    This checks what instances are running but unknown to the cluster.
1136

1137
    """
1138
    for node in node_instance:
1139
      for o_inst in node_instance[node]:
1140
        test = o_inst not in instancelist
1141
        self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1142
                      "instance %s on node %s should not exist", o_inst, node)
1143

    
1144
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg):
1145
    """Verify N+1 Memory Resilience.
1146

1147
    Check that if one single node dies we can still start all the instances it
1148
    was primary for.
1149

1150
    """
1151
    for node, nodeinfo in node_info.iteritems():
1152
      # This code checks that every node which is now listed as secondary has
1153
      # enough memory to host all instances it is supposed to should a single
1154
      # other node in the cluster fail.
1155
      # FIXME: not ready for failover to an arbitrary node
1156
      # FIXME: does not support file-backed instances
1157
      # WARNING: we currently take into account down instances as well as up
1158
      # ones, considering that even if they're down someone might want to start
1159
      # them even in the event of a node failure.
1160
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1161
        needed_mem = 0
1162
        for instance in instances:
1163
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1164
          if bep[constants.BE_AUTO_BALANCE]:
1165
            needed_mem += bep[constants.BE_MEMORY]
1166
        test = nodeinfo['mfree'] < needed_mem
1167
        self._ErrorIf(test, self.ENODEN1, node,
1168
                      "not enough memory on to accommodate"
1169
                      " failovers should peer node %s fail", prinode)
1170

    
1171
  def CheckPrereq(self):
1172
    """Check prerequisites.
1173

1174
    Transform the list of checks we're going to skip into a set and check that
1175
    all its members are valid.
1176

1177
    """
1178
    self.skip_set = frozenset(self.op.skip_checks)
1179
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1180
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1181

    
1182
  def BuildHooksEnv(self):
1183
    """Build hooks env.
1184

1185
    Cluster-Verify hooks just ran in the post phase and their failure makes
1186
    the output be logged in the verify output and the verification to fail.
1187

1188
    """
1189
    all_nodes = self.cfg.GetNodeList()
1190
    env = {
1191
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1192
      }
1193
    for node in self.cfg.GetAllNodesInfo().values():
1194
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1195

    
1196
    return env, [], all_nodes
1197

    
1198
  def Exec(self, feedback_fn):
1199
    """Verify integrity of cluster, performing various test on nodes.
1200

1201
    """
1202
    self.bad = False
1203
    _ErrorIf = self._ErrorIf
1204
    verbose = self.op.verbose
1205
    self._feedback_fn = feedback_fn
1206
    feedback_fn("* Verifying global settings")
1207
    for msg in self.cfg.VerifyConfig():
1208
      _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1209

    
1210
    vg_name = self.cfg.GetVGName()
1211
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1212
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1213
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1214
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1215
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1216
                        for iname in instancelist)
1217
    i_non_redundant = [] # Non redundant instances
1218
    i_non_a_balanced = [] # Non auto-balanced instances
1219
    n_offline = [] # List of offline nodes
1220
    n_drained = [] # List of nodes being drained
1221
    node_volume = {}
1222
    node_instance = {}
1223
    node_info = {}
1224
    instance_cfg = {}
1225

    
1226
    # FIXME: verify OS list
1227
    # do local checksums
1228
    master_files = [constants.CLUSTER_CONF_FILE]
1229

    
1230
    file_names = ssconf.SimpleStore().GetFileList()
1231
    file_names.append(constants.SSL_CERT_FILE)
1232
    file_names.append(constants.RAPI_CERT_FILE)
1233
    file_names.extend(master_files)
1234

    
1235
    local_checksums = utils.FingerprintFiles(file_names)
1236

    
1237
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1238
    node_verify_param = {
1239
      constants.NV_FILELIST: file_names,
1240
      constants.NV_NODELIST: [node.name for node in nodeinfo
1241
                              if not node.offline],
1242
      constants.NV_HYPERVISOR: hypervisors,
1243
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1244
                                  node.secondary_ip) for node in nodeinfo
1245
                                 if not node.offline],
1246
      constants.NV_INSTANCELIST: hypervisors,
1247
      constants.NV_VERSION: None,
1248
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1249
      }
1250
    if vg_name is not None:
1251
      node_verify_param[constants.NV_VGLIST] = None
1252
      node_verify_param[constants.NV_LVLIST] = vg_name
1253
      node_verify_param[constants.NV_DRBDLIST] = None
1254
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1255
                                           self.cfg.GetClusterName())
1256

    
1257
    cluster = self.cfg.GetClusterInfo()
1258
    master_node = self.cfg.GetMasterNode()
1259
    all_drbd_map = self.cfg.ComputeDRBDMap()
1260

    
1261
    feedback_fn("* Verifying node status")
1262
    for node_i in nodeinfo:
1263
      node = node_i.name
1264

    
1265
      if node_i.offline:
1266
        if verbose:
1267
          feedback_fn("* Skipping offline node %s" % (node,))
1268
        n_offline.append(node)
1269
        continue
1270

    
1271
      if node == master_node:
1272
        ntype = "master"
1273
      elif node_i.master_candidate:
1274
        ntype = "master candidate"
1275
      elif node_i.drained:
1276
        ntype = "drained"
1277
        n_drained.append(node)
1278
      else:
1279
        ntype = "regular"
1280
      if verbose:
1281
        feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1282

    
1283
      msg = all_nvinfo[node].fail_msg
1284
      _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1285
      if msg:
1286
        continue
1287

    
1288
      nresult = all_nvinfo[node].payload
1289
      node_drbd = {}
1290
      for minor, instance in all_drbd_map[node].items():
1291
        test = instance not in instanceinfo
1292
        _ErrorIf(test, self.ECLUSTERCFG, None,
1293
                 "ghost instance '%s' in temporary DRBD map", instance)
1294
          # ghost instance should not be running, but otherwise we
1295
          # don't give double warnings (both ghost instance and
1296
          # unallocated minor in use)
1297
        if test:
1298
          node_drbd[minor] = (instance, False)
1299
        else:
1300
          instance = instanceinfo[instance]
1301
          node_drbd[minor] = (instance.name, instance.admin_up)
1302
      self._VerifyNode(node_i, file_names, local_checksums,
1303
                       nresult, master_files, node_drbd, vg_name)
1304

    
1305
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1306
      if vg_name is None:
1307
        node_volume[node] = {}
1308
      elif isinstance(lvdata, basestring):
1309
        _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1310
                 utils.SafeEncode(lvdata))
1311
        node_volume[node] = {}
1312
      elif not isinstance(lvdata, dict):
1313
        _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1314
        continue
1315
      else:
1316
        node_volume[node] = lvdata
1317

    
1318
      # node_instance
1319
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1320
      test = not isinstance(idata, list)
1321
      _ErrorIf(test, self.ENODEHV, node,
1322
               "rpc call to node failed (instancelist)")
1323
      if test:
1324
        continue
1325

    
1326
      node_instance[node] = idata
1327

    
1328
      # node_info
1329
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1330
      test = not isinstance(nodeinfo, dict)
1331
      _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1332
      if test:
1333
        continue
1334

    
1335
      try:
1336
        node_info[node] = {
1337
          "mfree": int(nodeinfo['memory_free']),
1338
          "pinst": [],
1339
          "sinst": [],
1340
          # dictionary holding all instances this node is secondary for,
1341
          # grouped by their primary node. Each key is a cluster node, and each
1342
          # value is a list of instances which have the key as primary and the
1343
          # current node as secondary.  this is handy to calculate N+1 memory
1344
          # availability if you can only failover from a primary to its
1345
          # secondary.
1346
          "sinst-by-pnode": {},
1347
        }
1348
        # FIXME: devise a free space model for file based instances as well
1349
        if vg_name is not None:
1350
          test = (constants.NV_VGLIST not in nresult or
1351
                  vg_name not in nresult[constants.NV_VGLIST])
1352
          _ErrorIf(test, self.ENODELVM, node,
1353
                   "node didn't return data for the volume group '%s'"
1354
                   " - it is either missing or broken", vg_name)
1355
          if test:
1356
            continue
1357
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1358
      except (ValueError, KeyError):
1359
        _ErrorIf(True, self.ENODERPC, node,
1360
                 "node returned invalid nodeinfo, check lvm/hypervisor")
1361
        continue
1362

    
1363
    node_vol_should = {}
1364

    
1365
    feedback_fn("* Verifying instance status")
1366
    for instance in instancelist:
1367
      if verbose:
1368
        feedback_fn("* Verifying instance %s" % instance)
1369
      inst_config = instanceinfo[instance]
1370
      self._VerifyInstance(instance, inst_config, node_volume,
1371
                           node_instance, n_offline)
1372
      inst_nodes_offline = []
1373

    
1374
      inst_config.MapLVsByNode(node_vol_should)
1375

    
1376
      instance_cfg[instance] = inst_config
1377

    
1378
      pnode = inst_config.primary_node
1379
      _ErrorIf(pnode not in node_info and pnode not in n_offline,
1380
               self.ENODERPC, pnode, "instance %s, connection to"
1381
               " primary node failed", instance)
1382
      if pnode in node_info:
1383
        node_info[pnode]['pinst'].append(instance)
1384

    
1385
      if pnode in n_offline:
1386
        inst_nodes_offline.append(pnode)
1387

    
1388
      # If the instance is non-redundant we cannot survive losing its primary
1389
      # node, so we are not N+1 compliant. On the other hand we have no disk
1390
      # templates with more than one secondary so that situation is not well
1391
      # supported either.
1392
      # FIXME: does not support file-backed instances
1393
      if len(inst_config.secondary_nodes) == 0:
1394
        i_non_redundant.append(instance)
1395
      _ErrorIf(len(inst_config.secondary_nodes) > 1,
1396
               self.EINSTANCELAYOUT, instance,
1397
               "instance has multiple secondary nodes", code="WARNING")
1398

    
1399
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1400
        i_non_a_balanced.append(instance)
1401

    
1402
      for snode in inst_config.secondary_nodes:
1403
        _ErrorIf(snode not in node_info and snode not in n_offline,
1404
                 self.ENODERPC, snode,
1405
                 "instance %s, connection to secondary node"
1406
                 "failed", instance)
1407

    
1408
        if snode in node_info:
1409
          node_info[snode]['sinst'].append(instance)
1410
          if pnode not in node_info[snode]['sinst-by-pnode']:
1411
            node_info[snode]['sinst-by-pnode'][pnode] = []
1412
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1413

    
1414
        if snode in n_offline:
1415
          inst_nodes_offline.append(snode)
1416

    
1417
      # warn that the instance lives on offline nodes
1418
      _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1419
               "instance lives on offline node(s) %s",
1420
               ", ".join(inst_nodes_offline))
1421

    
1422
    feedback_fn("* Verifying orphan volumes")
1423
    self._VerifyOrphanVolumes(node_vol_should, node_volume)
1424

    
1425
    feedback_fn("* Verifying remaining instances")
1426
    self._VerifyOrphanInstances(instancelist, node_instance)
1427

    
1428
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1429
      feedback_fn("* Verifying N+1 Memory redundancy")
1430
      self._VerifyNPlusOneMemory(node_info, instance_cfg)
1431

    
1432
    feedback_fn("* Other Notes")
1433
    if i_non_redundant:
1434
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1435
                  % len(i_non_redundant))
1436

    
1437
    if i_non_a_balanced:
1438
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1439
                  % len(i_non_a_balanced))
1440

    
1441
    if n_offline:
1442
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1443

    
1444
    if n_drained:
1445
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1446

    
1447
    return not self.bad
1448

    
1449
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1450
    """Analyze the post-hooks' result
1451

1452
    This method analyses the hook result, handles it, and sends some
1453
    nicely-formatted feedback back to the user.
1454

1455
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1456
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1457
    @param hooks_results: the results of the multi-node hooks rpc call
1458
    @param feedback_fn: function used send feedback back to the caller
1459
    @param lu_result: previous Exec result
1460
    @return: the new Exec result, based on the previous result
1461
        and hook results
1462

1463
    """
1464
    # We only really run POST phase hooks, and are only interested in
1465
    # their results
1466
    if phase == constants.HOOKS_PHASE_POST:
1467
      # Used to change hooks' output to proper indentation
1468
      indent_re = re.compile('^', re.M)
1469
      feedback_fn("* Hooks Results")
1470
      assert hooks_results, "invalid result from hooks"
1471

    
1472
      for node_name in hooks_results:
1473
        show_node_header = True
1474
        res = hooks_results[node_name]
1475
        msg = res.fail_msg
1476
        test = msg and not res.offline
1477
        self._ErrorIf(test, self.ENODEHOOKS, node_name,
1478
                      "Communication failure in hooks execution: %s", msg)
1479
        if test:
1480
          # override manually lu_result here as _ErrorIf only
1481
          # overrides self.bad
1482
          lu_result = 1
1483
          continue
1484
        for script, hkr, output in res.payload:
1485
          test = hkr == constants.HKR_FAIL
1486
          self._ErrorIf(test, self.ENODEHOOKS, node_name,
1487
                        "Script %s failed, output:", script)
1488
          if test:
1489
            output = indent_re.sub('      ', output)
1490
            feedback_fn("%s" % output)
1491
            lu_result = 1
1492

    
1493
      return lu_result
1494

    
1495

    
1496
class LUVerifyDisks(NoHooksLU):
1497
  """Verifies the cluster disks status.
1498

1499
  """
1500
  _OP_REQP = []
1501
  REQ_BGL = False
1502

    
1503
  def ExpandNames(self):
1504
    self.needed_locks = {
1505
      locking.LEVEL_NODE: locking.ALL_SET,
1506
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1507
    }
1508
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1509

    
1510
  def CheckPrereq(self):
1511
    """Check prerequisites.
1512

1513
    This has no prerequisites.
1514

1515
    """
1516
    pass
1517

    
1518
  def Exec(self, feedback_fn):
1519
    """Verify integrity of cluster disks.
1520

1521
    @rtype: tuple of three items
1522
    @return: a tuple of (dict of node-to-node_error, list of instances
1523
        which need activate-disks, dict of instance: (node, volume) for
1524
        missing volumes
1525

1526
    """
1527
    result = res_nodes, res_instances, res_missing = {}, [], {}
1528

    
1529
    vg_name = self.cfg.GetVGName()
1530
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1531
    instances = [self.cfg.GetInstanceInfo(name)
1532
                 for name in self.cfg.GetInstanceList()]
1533

    
1534
    nv_dict = {}
1535
    for inst in instances:
1536
      inst_lvs = {}
1537
      if (not inst.admin_up or
1538
          inst.disk_template not in constants.DTS_NET_MIRROR):
1539
        continue
1540
      inst.MapLVsByNode(inst_lvs)
1541
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1542
      for node, vol_list in inst_lvs.iteritems():
1543
        for vol in vol_list:
1544
          nv_dict[(node, vol)] = inst
1545

    
1546
    if not nv_dict:
1547
      return result
1548

    
1549
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1550

    
1551
    for node in nodes:
1552
      # node_volume
1553
      node_res = node_lvs[node]
1554
      if node_res.offline:
1555
        continue
1556
      msg = node_res.fail_msg
1557
      if msg:
1558
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1559
        res_nodes[node] = msg
1560
        continue
1561

    
1562
      lvs = node_res.payload
1563
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1564
        inst = nv_dict.pop((node, lv_name), None)
1565
        if (not lv_online and inst is not None
1566
            and inst.name not in res_instances):
1567
          res_instances.append(inst.name)
1568

    
1569
    # any leftover items in nv_dict are missing LVs, let's arrange the
1570
    # data better
1571
    for key, inst in nv_dict.iteritems():
1572
      if inst.name not in res_missing:
1573
        res_missing[inst.name] = []
1574
      res_missing[inst.name].append(key)
1575

    
1576
    return result
1577

    
1578

    
1579
class LURepairDiskSizes(NoHooksLU):
1580
  """Verifies the cluster disks sizes.
1581

1582
  """
1583
  _OP_REQP = ["instances"]
1584
  REQ_BGL = False
1585

    
1586
  def ExpandNames(self):
1587
    if not isinstance(self.op.instances, list):
1588
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1589

    
1590
    if self.op.instances:
1591
      self.wanted_names = []
1592
      for name in self.op.instances:
1593
        full_name = self.cfg.ExpandInstanceName(name)
1594
        if full_name is None:
1595
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1596
        self.wanted_names.append(full_name)
1597
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1598
      self.needed_locks = {
1599
        locking.LEVEL_NODE: [],
1600
        locking.LEVEL_INSTANCE: self.wanted_names,
1601
        }
1602
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1603
    else:
1604
      self.wanted_names = None
1605
      self.needed_locks = {
1606
        locking.LEVEL_NODE: locking.ALL_SET,
1607
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1608
        }
1609
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1610

    
1611
  def DeclareLocks(self, level):
1612
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1613
      self._LockInstancesNodes(primary_only=True)
1614

    
1615
  def CheckPrereq(self):
1616
    """Check prerequisites.
1617

1618
    This only checks the optional instance list against the existing names.
1619

1620
    """
1621
    if self.wanted_names is None:
1622
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1623

    
1624
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1625
                             in self.wanted_names]
1626

    
1627
  def Exec(self, feedback_fn):
1628
    """Verify the size of cluster disks.
1629

1630
    """
1631
    # TODO: check child disks too
1632
    # TODO: check differences in size between primary/secondary nodes
1633
    per_node_disks = {}
1634
    for instance in self.wanted_instances:
1635
      pnode = instance.primary_node
1636
      if pnode not in per_node_disks:
1637
        per_node_disks[pnode] = []
1638
      for idx, disk in enumerate(instance.disks):
1639
        per_node_disks[pnode].append((instance, idx, disk))
1640

    
1641
    changed = []
1642
    for node, dskl in per_node_disks.items():
1643
      result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1644
      if result.fail_msg:
1645
        self.LogWarning("Failure in blockdev_getsizes call to node"
1646
                        " %s, ignoring", node)
1647
        continue
1648
      if len(result.data) != len(dskl):
1649
        self.LogWarning("Invalid result from node %s, ignoring node results",
1650
                        node)
1651
        continue
1652
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1653
        if size is None:
1654
          self.LogWarning("Disk %d of instance %s did not return size"
1655
                          " information, ignoring", idx, instance.name)
1656
          continue
1657
        if not isinstance(size, (int, long)):
1658
          self.LogWarning("Disk %d of instance %s did not return valid"
1659
                          " size information, ignoring", idx, instance.name)
1660
          continue
1661
        size = size >> 20
1662
        if size != disk.size:
1663
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1664
                       " correcting: recorded %d, actual %d", idx,
1665
                       instance.name, disk.size, size)
1666
          disk.size = size
1667
          self.cfg.Update(instance)
1668
          changed.append((instance.name, idx, size))
1669
    return changed
1670

    
1671

    
1672
class LURenameCluster(LogicalUnit):
1673
  """Rename the cluster.
1674

1675
  """
1676
  HPATH = "cluster-rename"
1677
  HTYPE = constants.HTYPE_CLUSTER
1678
  _OP_REQP = ["name"]
1679

    
1680
  def BuildHooksEnv(self):
1681
    """Build hooks env.
1682

1683
    """
1684
    env = {
1685
      "OP_TARGET": self.cfg.GetClusterName(),
1686
      "NEW_NAME": self.op.name,
1687
      }
1688
    mn = self.cfg.GetMasterNode()
1689
    return env, [mn], [mn]
1690

    
1691
  def CheckPrereq(self):
1692
    """Verify that the passed name is a valid one.
1693

1694
    """
1695
    hostname = utils.HostInfo(self.op.name)
1696

    
1697
    new_name = hostname.name
1698
    self.ip = new_ip = hostname.ip
1699
    old_name = self.cfg.GetClusterName()
1700
    old_ip = self.cfg.GetMasterIP()
1701
    if new_name == old_name and new_ip == old_ip:
1702
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1703
                                 " cluster has changed")
1704
    if new_ip != old_ip:
1705
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1706
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1707
                                   " reachable on the network. Aborting." %
1708
                                   new_ip)
1709

    
1710
    self.op.name = new_name
1711

    
1712
  def Exec(self, feedback_fn):
1713
    """Rename the cluster.
1714

1715
    """
1716
    clustername = self.op.name
1717
    ip = self.ip
1718

    
1719
    # shutdown the master IP
1720
    master = self.cfg.GetMasterNode()
1721
    result = self.rpc.call_node_stop_master(master, False)
1722
    result.Raise("Could not disable the master role")
1723

    
1724
    try:
1725
      cluster = self.cfg.GetClusterInfo()
1726
      cluster.cluster_name = clustername
1727
      cluster.master_ip = ip
1728
      self.cfg.Update(cluster)
1729

    
1730
      # update the known hosts file
1731
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1732
      node_list = self.cfg.GetNodeList()
1733
      try:
1734
        node_list.remove(master)
1735
      except ValueError:
1736
        pass
1737
      result = self.rpc.call_upload_file(node_list,
1738
                                         constants.SSH_KNOWN_HOSTS_FILE)
1739
      for to_node, to_result in result.iteritems():
1740
        msg = to_result.fail_msg
1741
        if msg:
1742
          msg = ("Copy of file %s to node %s failed: %s" %
1743
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1744
          self.proc.LogWarning(msg)
1745

    
1746
    finally:
1747
      result = self.rpc.call_node_start_master(master, False, False)
1748
      msg = result.fail_msg
1749
      if msg:
1750
        self.LogWarning("Could not re-enable the master role on"
1751
                        " the master, please restart manually: %s", msg)
1752

    
1753

    
1754
def _RecursiveCheckIfLVMBased(disk):
1755
  """Check if the given disk or its children are lvm-based.
1756

1757
  @type disk: L{objects.Disk}
1758
  @param disk: the disk to check
1759
  @rtype: boolean
1760
  @return: boolean indicating whether a LD_LV dev_type was found or not
1761

1762
  """
1763
  if disk.children:
1764
    for chdisk in disk.children:
1765
      if _RecursiveCheckIfLVMBased(chdisk):
1766
        return True
1767
  return disk.dev_type == constants.LD_LV
1768

    
1769

    
1770
class LUSetClusterParams(LogicalUnit):
1771
  """Change the parameters of the cluster.
1772

1773
  """
1774
  HPATH = "cluster-modify"
1775
  HTYPE = constants.HTYPE_CLUSTER
1776
  _OP_REQP = []
1777
  REQ_BGL = False
1778

    
1779
  def CheckArguments(self):
1780
    """Check parameters
1781

1782
    """
1783
    if not hasattr(self.op, "candidate_pool_size"):
1784
      self.op.candidate_pool_size = None
1785
    if self.op.candidate_pool_size is not None:
1786
      try:
1787
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1788
      except (ValueError, TypeError), err:
1789
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1790
                                   str(err))
1791
      if self.op.candidate_pool_size < 1:
1792
        raise errors.OpPrereqError("At least one master candidate needed")
1793

    
1794
  def ExpandNames(self):
1795
    # FIXME: in the future maybe other cluster params won't require checking on
1796
    # all nodes to be modified.
1797
    self.needed_locks = {
1798
      locking.LEVEL_NODE: locking.ALL_SET,
1799
    }
1800
    self.share_locks[locking.LEVEL_NODE] = 1
1801

    
1802
  def BuildHooksEnv(self):
1803
    """Build hooks env.
1804

1805
    """
1806
    env = {
1807
      "OP_TARGET": self.cfg.GetClusterName(),
1808
      "NEW_VG_NAME": self.op.vg_name,
1809
      }
1810
    mn = self.cfg.GetMasterNode()
1811
    return env, [mn], [mn]
1812

    
1813
  def CheckPrereq(self):
1814
    """Check prerequisites.
1815

1816
    This checks whether the given params don't conflict and
1817
    if the given volume group is valid.
1818

1819
    """
1820
    if self.op.vg_name is not None and not self.op.vg_name:
1821
      instances = self.cfg.GetAllInstancesInfo().values()
1822
      for inst in instances:
1823
        for disk in inst.disks:
1824
          if _RecursiveCheckIfLVMBased(disk):
1825
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1826
                                       " lvm-based instances exist")
1827

    
1828
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1829

    
1830
    # if vg_name not None, checks given volume group on all nodes
1831
    if self.op.vg_name:
1832
      vglist = self.rpc.call_vg_list(node_list)
1833
      for node in node_list:
1834
        msg = vglist[node].fail_msg
1835
        if msg:
1836
          # ignoring down node
1837
          self.LogWarning("Error while gathering data on node %s"
1838
                          " (ignoring node): %s", node, msg)
1839
          continue
1840
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1841
                                              self.op.vg_name,
1842
                                              constants.MIN_VG_SIZE)
1843
        if vgstatus:
1844
          raise errors.OpPrereqError("Error on node '%s': %s" %
1845
                                     (node, vgstatus))
1846

    
1847
    self.cluster = cluster = self.cfg.GetClusterInfo()
1848
    # validate params changes
1849
    if self.op.beparams:
1850
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1851
      self.new_beparams = objects.FillDict(
1852
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1853

    
1854
    if self.op.nicparams:
1855
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1856
      self.new_nicparams = objects.FillDict(
1857
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1858
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1859

    
1860
    # hypervisor list/parameters
1861
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1862
    if self.op.hvparams:
1863
      if not isinstance(self.op.hvparams, dict):
1864
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1865
      for hv_name, hv_dict in self.op.hvparams.items():
1866
        if hv_name not in self.new_hvparams:
1867
          self.new_hvparams[hv_name] = hv_dict
1868
        else:
1869
          self.new_hvparams[hv_name].update(hv_dict)
1870

    
1871
    if self.op.enabled_hypervisors is not None:
1872
      self.hv_list = self.op.enabled_hypervisors
1873
      if not self.hv_list:
1874
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1875
                                   " least one member")
1876
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1877
      if invalid_hvs:
1878
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1879
                                   " entries: %s" %
1880
                                   utils.CommaJoin(invalid_hvs))
1881
    else:
1882
      self.hv_list = cluster.enabled_hypervisors
1883

    
1884
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1885
      # either the enabled list has changed, or the parameters have, validate
1886
      for hv_name, hv_params in self.new_hvparams.items():
1887
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1888
            (self.op.enabled_hypervisors and
1889
             hv_name in self.op.enabled_hypervisors)):
1890
          # either this is a new hypervisor, or its parameters have changed
1891
          hv_class = hypervisor.GetHypervisor(hv_name)
1892
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1893
          hv_class.CheckParameterSyntax(hv_params)
1894
          _CheckHVParams(self, node_list, hv_name, hv_params)
1895

    
1896
  def Exec(self, feedback_fn):
1897
    """Change the parameters of the cluster.
1898

1899
    """
1900
    if self.op.vg_name is not None:
1901
      new_volume = self.op.vg_name
1902
      if not new_volume:
1903
        new_volume = None
1904
      if new_volume != self.cfg.GetVGName():
1905
        self.cfg.SetVGName(new_volume)
1906
      else:
1907
        feedback_fn("Cluster LVM configuration already in desired"
1908
                    " state, not changing")
1909
    if self.op.hvparams:
1910
      self.cluster.hvparams = self.new_hvparams
1911
    if self.op.enabled_hypervisors is not None:
1912
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1913
    if self.op.beparams:
1914
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1915
    if self.op.nicparams:
1916
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1917

    
1918
    if self.op.candidate_pool_size is not None:
1919
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1920
      # we need to update the pool size here, otherwise the save will fail
1921
      _AdjustCandidatePool(self)
1922

    
1923
    self.cfg.Update(self.cluster)
1924

    
1925

    
1926
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1927
  """Distribute additional files which are part of the cluster configuration.
1928

1929
  ConfigWriter takes care of distributing the config and ssconf files, but
1930
  there are more files which should be distributed to all nodes. This function
1931
  makes sure those are copied.
1932

1933
  @param lu: calling logical unit
1934
  @param additional_nodes: list of nodes not in the config to distribute to
1935

1936
  """
1937
  # 1. Gather target nodes
1938
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1939
  dist_nodes = lu.cfg.GetNodeList()
1940
  if additional_nodes is not None:
1941
    dist_nodes.extend(additional_nodes)
1942
  if myself.name in dist_nodes:
1943
    dist_nodes.remove(myself.name)
1944
  # 2. Gather files to distribute
1945
  dist_files = set([constants.ETC_HOSTS,
1946
                    constants.SSH_KNOWN_HOSTS_FILE,
1947
                    constants.RAPI_CERT_FILE,
1948
                    constants.RAPI_USERS_FILE,
1949
                    constants.HMAC_CLUSTER_KEY,
1950
                   ])
1951

    
1952
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1953
  for hv_name in enabled_hypervisors:
1954
    hv_class = hypervisor.GetHypervisor(hv_name)
1955
    dist_files.update(hv_class.GetAncillaryFiles())
1956

    
1957
  # 3. Perform the files upload
1958
  for fname in dist_files:
1959
    if os.path.exists(fname):
1960
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1961
      for to_node, to_result in result.items():
1962
        msg = to_result.fail_msg
1963
        if msg:
1964
          msg = ("Copy of file %s to node %s failed: %s" %
1965
                 (fname, to_node, msg))
1966
          lu.proc.LogWarning(msg)
1967

    
1968

    
1969
class LURedistributeConfig(NoHooksLU):
1970
  """Force the redistribution of cluster configuration.
1971

1972
  This is a very simple LU.
1973

1974
  """
1975
  _OP_REQP = []
1976
  REQ_BGL = False
1977

    
1978
  def ExpandNames(self):
1979
    self.needed_locks = {
1980
      locking.LEVEL_NODE: locking.ALL_SET,
1981
    }
1982
    self.share_locks[locking.LEVEL_NODE] = 1
1983

    
1984
  def CheckPrereq(self):
1985
    """Check prerequisites.
1986

1987
    """
1988

    
1989
  def Exec(self, feedback_fn):
1990
    """Redistribute the configuration.
1991

1992
    """
1993
    self.cfg.Update(self.cfg.GetClusterInfo())
1994
    _RedistributeAncillaryFiles(self)
1995

    
1996

    
1997
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1998
  """Sleep and poll for an instance's disk to sync.
1999

2000
  """
2001
  if not instance.disks:
2002
    return True
2003

    
2004
  if not oneshot:
2005
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2006

    
2007
  node = instance.primary_node
2008

    
2009
  for dev in instance.disks:
2010
    lu.cfg.SetDiskID(dev, node)
2011

    
2012
  retries = 0
2013
  degr_retries = 10 # in seconds, as we sleep 1 second each time
2014
  while True:
2015
    max_time = 0
2016
    done = True
2017
    cumul_degraded = False
2018
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
2019
    msg = rstats.fail_msg
2020
    if msg:
2021
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2022
      retries += 1
2023
      if retries >= 10:
2024
        raise errors.RemoteError("Can't contact node %s for mirror data,"
2025
                                 " aborting." % node)
2026
      time.sleep(6)
2027
      continue
2028
    rstats = rstats.payload
2029
    retries = 0
2030
    for i, mstat in enumerate(rstats):
2031
      if mstat is None:
2032
        lu.LogWarning("Can't compute data for node %s/%s",
2033
                           node, instance.disks[i].iv_name)
2034
        continue
2035

    
2036
      cumul_degraded = (cumul_degraded or
2037
                        (mstat.is_degraded and mstat.sync_percent is None))
2038
      if mstat.sync_percent is not None:
2039
        done = False
2040
        if mstat.estimated_time is not None:
2041
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2042
          max_time = mstat.estimated_time
2043
        else:
2044
          rem_time = "no time estimate"
2045
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2046
                        (instance.disks[i].iv_name, mstat.sync_percent,
2047
                         rem_time))
2048

    
2049
    # if we're done but degraded, let's do a few small retries, to
2050
    # make sure we see a stable and not transient situation; therefore
2051
    # we force restart of the loop
2052
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2053
      logging.info("Degraded disks found, %d retries left", degr_retries)
2054
      degr_retries -= 1
2055
      time.sleep(1)
2056
      continue
2057

    
2058
    if done or oneshot:
2059
      break
2060

    
2061
    time.sleep(min(60, max_time))
2062

    
2063
  if done:
2064
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2065
  return not cumul_degraded
2066

    
2067

    
2068
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2069
  """Check that mirrors are not degraded.
2070

2071
  The ldisk parameter, if True, will change the test from the
2072
  is_degraded attribute (which represents overall non-ok status for
2073
  the device(s)) to the ldisk (representing the local storage status).
2074

2075
  """
2076
  lu.cfg.SetDiskID(dev, node)
2077

    
2078
  result = True
2079

    
2080
  if on_primary or dev.AssembleOnSecondary():
2081
    rstats = lu.rpc.call_blockdev_find(node, dev)
2082
    msg = rstats.fail_msg
2083
    if msg:
2084
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2085
      result = False
2086
    elif not rstats.payload:
2087
      lu.LogWarning("Can't find disk on node %s", node)
2088
      result = False
2089
    else:
2090
      if ldisk:
2091
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2092
      else:
2093
        result = result and not rstats.payload.is_degraded
2094

    
2095
  if dev.children:
2096
    for child in dev.children:
2097
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2098

    
2099
  return result
2100

    
2101

    
2102
class LUDiagnoseOS(NoHooksLU):
2103
  """Logical unit for OS diagnose/query.
2104

2105
  """
2106
  _OP_REQP = ["output_fields", "names"]
2107
  REQ_BGL = False
2108
  _FIELDS_STATIC = utils.FieldSet()
2109
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
2110

    
2111
  def ExpandNames(self):
2112
    if self.op.names:
2113
      raise errors.OpPrereqError("Selective OS query not supported")
2114

    
2115
    _CheckOutputFields(static=self._FIELDS_STATIC,
2116
                       dynamic=self._FIELDS_DYNAMIC,
2117
                       selected=self.op.output_fields)
2118

    
2119
    # Lock all nodes, in shared mode
2120
    # Temporary removal of locks, should be reverted later
2121
    # TODO: reintroduce locks when they are lighter-weight
2122
    self.needed_locks = {}
2123
    #self.share_locks[locking.LEVEL_NODE] = 1
2124
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2125

    
2126
  def CheckPrereq(self):
2127
    """Check prerequisites.
2128

2129
    """
2130

    
2131
  @staticmethod
2132
  def _DiagnoseByOS(node_list, rlist):
2133
    """Remaps a per-node return list into an a per-os per-node dictionary
2134

2135
    @param node_list: a list with the names of all nodes
2136
    @param rlist: a map with node names as keys and OS objects as values
2137

2138
    @rtype: dict
2139
    @return: a dictionary with osnames as keys and as value another map, with
2140
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2141

2142
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2143
                                     (/srv/..., False, "invalid api")],
2144
                           "node2": [(/srv/..., True, "")]}
2145
          }
2146

2147
    """
2148
    all_os = {}
2149
    # we build here the list of nodes that didn't fail the RPC (at RPC
2150
    # level), so that nodes with a non-responding node daemon don't
2151
    # make all OSes invalid
2152
    good_nodes = [node_name for node_name in rlist
2153
                  if not rlist[node_name].fail_msg]
2154
    for node_name, nr in rlist.items():
2155
      if nr.fail_msg or not nr.payload:
2156
        continue
2157
      for name, path, status, diagnose in nr.payload:
2158
        if name not in all_os:
2159
          # build a list of nodes for this os containing empty lists
2160
          # for each node in node_list
2161
          all_os[name] = {}
2162
          for nname in good_nodes:
2163
            all_os[name][nname] = []
2164
        all_os[name][node_name].append((path, status, diagnose))
2165
    return all_os
2166

    
2167
  def Exec(self, feedback_fn):
2168
    """Compute the list of OSes.
2169

2170
    """
2171
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2172
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2173
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2174
    output = []
2175
    for os_name, os_data in pol.items():
2176
      row = []
2177
      for field in self.op.output_fields:
2178
        if field == "name":
2179
          val = os_name
2180
        elif field == "valid":
2181
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
2182
        elif field == "node_status":
2183
          # this is just a copy of the dict
2184
          val = {}
2185
          for node_name, nos_list in os_data.items():
2186
            val[node_name] = nos_list
2187
        else:
2188
          raise errors.ParameterError(field)
2189
        row.append(val)
2190
      output.append(row)
2191

    
2192
    return output
2193

    
2194

    
2195
class LURemoveNode(LogicalUnit):
2196
  """Logical unit for removing a node.
2197

2198
  """
2199
  HPATH = "node-remove"
2200
  HTYPE = constants.HTYPE_NODE
2201
  _OP_REQP = ["node_name"]
2202

    
2203
  def BuildHooksEnv(self):
2204
    """Build hooks env.
2205

2206
    This doesn't run on the target node in the pre phase as a failed
2207
    node would then be impossible to remove.
2208

2209
    """
2210
    env = {
2211
      "OP_TARGET": self.op.node_name,
2212
      "NODE_NAME": self.op.node_name,
2213
      }
2214
    all_nodes = self.cfg.GetNodeList()
2215
    if self.op.node_name in all_nodes:
2216
      all_nodes.remove(self.op.node_name)
2217
    return env, all_nodes, all_nodes
2218

    
2219
  def CheckPrereq(self):
2220
    """Check prerequisites.
2221

2222
    This checks:
2223
     - the node exists in the configuration
2224
     - it does not have primary or secondary instances
2225
     - it's not the master
2226

2227
    Any errors are signaled by raising errors.OpPrereqError.
2228

2229
    """
2230
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2231
    if node is None:
2232
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2233

    
2234
    instance_list = self.cfg.GetInstanceList()
2235

    
2236
    masternode = self.cfg.GetMasterNode()
2237
    if node.name == masternode:
2238
      raise errors.OpPrereqError("Node is the master node,"
2239
                                 " you need to failover first.")
2240

    
2241
    for instance_name in instance_list:
2242
      instance = self.cfg.GetInstanceInfo(instance_name)
2243
      if node.name in instance.all_nodes:
2244
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2245
                                   " please remove first." % instance_name)
2246
    self.op.node_name = node.name
2247
    self.node = node
2248

    
2249
  def Exec(self, feedback_fn):
2250
    """Removes the node from the cluster.
2251

2252
    """
2253
    node = self.node
2254
    logging.info("Stopping the node daemon and removing configs from node %s",
2255
                 node.name)
2256

    
2257
    self.context.RemoveNode(node.name)
2258

    
2259
    # Run post hooks on the node before it's removed
2260
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2261
    try:
2262
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2263
    except:
2264
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2265

    
2266
    result = self.rpc.call_node_leave_cluster(node.name)
2267
    msg = result.fail_msg
2268
    if msg:
2269
      self.LogWarning("Errors encountered on the remote node while leaving"
2270
                      " the cluster: %s", msg)
2271

    
2272
    # Promote nodes to master candidate as needed
2273
    _AdjustCandidatePool(self)
2274

    
2275

    
2276
class LUQueryNodes(NoHooksLU):
2277
  """Logical unit for querying nodes.
2278

2279
  """
2280
  _OP_REQP = ["output_fields", "names", "use_locking"]
2281
  REQ_BGL = False
2282
  _FIELDS_DYNAMIC = utils.FieldSet(
2283
    "dtotal", "dfree",
2284
    "mtotal", "mnode", "mfree",
2285
    "bootid",
2286
    "ctotal", "cnodes", "csockets",
2287
    )
2288

    
2289
  _FIELDS_STATIC = utils.FieldSet(
2290
    "name", "pinst_cnt", "sinst_cnt",
2291
    "pinst_list", "sinst_list",
2292
    "pip", "sip", "tags",
2293
    "serial_no", "ctime", "mtime",
2294
    "master_candidate",
2295
    "master",
2296
    "offline",
2297
    "drained",
2298
    "role",
2299
    )
2300

    
2301
  def ExpandNames(self):
2302
    _CheckOutputFields(static=self._FIELDS_STATIC,
2303
                       dynamic=self._FIELDS_DYNAMIC,
2304
                       selected=self.op.output_fields)
2305

    
2306
    self.needed_locks = {}
2307
    self.share_locks[locking.LEVEL_NODE] = 1
2308

    
2309
    if self.op.names:
2310
      self.wanted = _GetWantedNodes(self, self.op.names)
2311
    else:
2312
      self.wanted = locking.ALL_SET
2313

    
2314
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2315
    self.do_locking = self.do_node_query and self.op.use_locking
2316
    if self.do_locking:
2317
      # if we don't request only static fields, we need to lock the nodes
2318
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2319

    
2320

    
2321
  def CheckPrereq(self):
2322
    """Check prerequisites.
2323

2324
    """
2325
    # The validation of the node list is done in the _GetWantedNodes,
2326
    # if non empty, and if empty, there's no validation to do
2327
    pass
2328

    
2329
  def Exec(self, feedback_fn):
2330
    """Computes the list of nodes and their attributes.
2331

2332
    """
2333
    all_info = self.cfg.GetAllNodesInfo()
2334
    if self.do_locking:
2335
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2336
    elif self.wanted != locking.ALL_SET:
2337
      nodenames = self.wanted
2338
      missing = set(nodenames).difference(all_info.keys())
2339
      if missing:
2340
        raise errors.OpExecError(
2341
          "Some nodes were removed before retrieving their data: %s" % missing)
2342
    else:
2343
      nodenames = all_info.keys()
2344

    
2345
    nodenames = utils.NiceSort(nodenames)
2346
    nodelist = [all_info[name] for name in nodenames]
2347

    
2348
    # begin data gathering
2349

    
2350
    if self.do_node_query:
2351
      live_data = {}
2352
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2353
                                          self.cfg.GetHypervisorType())
2354
      for name in nodenames:
2355
        nodeinfo = node_data[name]
2356
        if not nodeinfo.fail_msg and nodeinfo.payload:
2357
          nodeinfo = nodeinfo.payload
2358
          fn = utils.TryConvert
2359
          live_data[name] = {
2360
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2361
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2362
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2363
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2364
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2365
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2366
            "bootid": nodeinfo.get('bootid', None),
2367
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2368
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2369
            }
2370
        else:
2371
          live_data[name] = {}
2372
    else:
2373
      live_data = dict.fromkeys(nodenames, {})
2374

    
2375
    node_to_primary = dict([(name, set()) for name in nodenames])
2376
    node_to_secondary = dict([(name, set()) for name in nodenames])
2377

    
2378
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2379
                             "sinst_cnt", "sinst_list"))
2380
    if inst_fields & frozenset(self.op.output_fields):
2381
      instancelist = self.cfg.GetInstanceList()
2382

    
2383
      for instance_name in instancelist:
2384
        inst = self.cfg.GetInstanceInfo(instance_name)
2385
        if inst.primary_node in node_to_primary:
2386
          node_to_primary[inst.primary_node].add(inst.name)
2387
        for secnode in inst.secondary_nodes:
2388
          if secnode in node_to_secondary:
2389
            node_to_secondary[secnode].add(inst.name)
2390

    
2391
    master_node = self.cfg.GetMasterNode()
2392

    
2393
    # end data gathering
2394

    
2395
    output = []
2396
    for node in nodelist:
2397
      node_output = []
2398
      for field in self.op.output_fields:
2399
        if field == "name":
2400
          val = node.name
2401
        elif field == "pinst_list":
2402
          val = list(node_to_primary[node.name])
2403
        elif field == "sinst_list":
2404
          val = list(node_to_secondary[node.name])
2405
        elif field == "pinst_cnt":
2406
          val = len(node_to_primary[node.name])
2407
        elif field == "sinst_cnt":
2408
          val = len(node_to_secondary[node.name])
2409
        elif field == "pip":
2410
          val = node.primary_ip
2411
        elif field == "sip":
2412
          val = node.secondary_ip
2413
        elif field == "tags":
2414
          val = list(node.GetTags())
2415
        elif field == "serial_no":
2416
          val = node.serial_no
2417
        elif field == "ctime":
2418
          val = node.ctime
2419
        elif field == "mtime":
2420
          val = node.mtime
2421
        elif field == "master_candidate":
2422
          val = node.master_candidate
2423
        elif field == "master":
2424
          val = node.name == master_node
2425
        elif field == "offline":
2426
          val = node.offline
2427
        elif field == "drained":
2428
          val = node.drained
2429
        elif self._FIELDS_DYNAMIC.Matches(field):
2430
          val = live_data[node.name].get(field, None)
2431
        elif field == "role":
2432
          if node.name == master_node:
2433
            val = "M"
2434
          elif node.master_candidate:
2435
            val = "C"
2436
          elif node.drained:
2437
            val = "D"
2438
          elif node.offline:
2439
            val = "O"
2440
          else:
2441
            val = "R"
2442
        else:
2443
          raise errors.ParameterError(field)
2444
        node_output.append(val)
2445
      output.append(node_output)
2446

    
2447
    return output
2448

    
2449

    
2450
class LUQueryNodeVolumes(NoHooksLU):
2451
  """Logical unit for getting volumes on node(s).
2452

2453
  """
2454
  _OP_REQP = ["nodes", "output_fields"]
2455
  REQ_BGL = False
2456
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2457
  _FIELDS_STATIC = utils.FieldSet("node")
2458

    
2459
  def ExpandNames(self):
2460
    _CheckOutputFields(static=self._FIELDS_STATIC,
2461
                       dynamic=self._FIELDS_DYNAMIC,
2462
                       selected=self.op.output_fields)
2463

    
2464
    self.needed_locks = {}
2465
    self.share_locks[locking.LEVEL_NODE] = 1
2466
    if not self.op.nodes:
2467
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2468
    else:
2469
      self.needed_locks[locking.LEVEL_NODE] = \
2470
        _GetWantedNodes(self, self.op.nodes)
2471

    
2472
  def CheckPrereq(self):
2473
    """Check prerequisites.
2474

2475
    This checks that the fields required are valid output fields.
2476

2477
    """
2478
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2479

    
2480
  def Exec(self, feedback_fn):
2481
    """Computes the list of nodes and their attributes.
2482

2483
    """
2484
    nodenames = self.nodes
2485
    volumes = self.rpc.call_node_volumes(nodenames)
2486

    
2487
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2488
             in self.cfg.GetInstanceList()]
2489

    
2490
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2491

    
2492
    output = []
2493
    for node in nodenames:
2494
      nresult = volumes[node]
2495
      if nresult.offline:
2496
        continue
2497
      msg = nresult.fail_msg
2498
      if msg:
2499
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2500
        continue
2501

    
2502
      node_vols = nresult.payload[:]
2503
      node_vols.sort(key=lambda vol: vol['dev'])
2504

    
2505
      for vol in node_vols:
2506
        node_output = []
2507
        for field in self.op.output_fields:
2508
          if field == "node":
2509
            val = node
2510
          elif field == "phys":
2511
            val = vol['dev']
2512
          elif field == "vg":
2513
            val = vol['vg']
2514
          elif field == "name":
2515
            val = vol['name']
2516
          elif field == "size":
2517
            val = int(float(vol['size']))
2518
          elif field == "instance":
2519
            for inst in ilist:
2520
              if node not in lv_by_node[inst]:
2521
                continue
2522
              if vol['name'] in lv_by_node[inst][node]:
2523
                val = inst.name
2524
                break
2525
            else:
2526
              val = '-'
2527
          else:
2528
            raise errors.ParameterError(field)
2529
          node_output.append(str(val))
2530

    
2531
        output.append(node_output)
2532

    
2533
    return output
2534

    
2535

    
2536
class LUQueryNodeStorage(NoHooksLU):
2537
  """Logical unit for getting information on storage units on node(s).
2538

2539
  """
2540
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2541
  REQ_BGL = False
2542
  _FIELDS_STATIC = utils.FieldSet("node")
2543

    
2544
  def ExpandNames(self):
2545
    storage_type = self.op.storage_type
2546

    
2547
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2548
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2549

    
2550
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2551

    
2552
    _CheckOutputFields(static=self._FIELDS_STATIC,
2553
                       dynamic=utils.FieldSet(*dynamic_fields),
2554
                       selected=self.op.output_fields)
2555

    
2556
    self.needed_locks = {}
2557
    self.share_locks[locking.LEVEL_NODE] = 1
2558

    
2559
    if self.op.nodes:
2560
      self.needed_locks[locking.LEVEL_NODE] = \
2561
        _GetWantedNodes(self, self.op.nodes)
2562
    else:
2563
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2564

    
2565
  def CheckPrereq(self):
2566
    """Check prerequisites.
2567

2568
    This checks that the fields required are valid output fields.
2569

2570
    """
2571
    self.op.name = getattr(self.op, "name", None)
2572

    
2573
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2574

    
2575
  def Exec(self, feedback_fn):
2576
    """Computes the list of nodes and their attributes.
2577

2578
    """
2579
    # Always get name to sort by
2580
    if constants.SF_NAME in self.op.output_fields:
2581
      fields = self.op.output_fields[:]
2582
    else:
2583
      fields = [constants.SF_NAME] + self.op.output_fields
2584

    
2585
    # Never ask for node as it's only known to the LU
2586
    while "node" in fields:
2587
      fields.remove("node")
2588

    
2589
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2590
    name_idx = field_idx[constants.SF_NAME]
2591

    
2592
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2593
    data = self.rpc.call_storage_list(self.nodes,
2594
                                      self.op.storage_type, st_args,
2595
                                      self.op.name, fields)
2596

    
2597
    result = []
2598

    
2599
    for node in utils.NiceSort(self.nodes):
2600
      nresult = data[node]
2601
      if nresult.offline:
2602
        continue
2603

    
2604
      msg = nresult.fail_msg
2605
      if msg:
2606
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2607
        continue
2608

    
2609
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2610

    
2611
      for name in utils.NiceSort(rows.keys()):
2612
        row = rows[name]
2613

    
2614
        out = []
2615

    
2616
        for field in self.op.output_fields:
2617
          if field == "node":
2618
            val = node
2619
          elif field in field_idx:
2620
            val = row[field_idx[field]]
2621
          else:
2622
            raise errors.ParameterError(field)
2623

    
2624
          out.append(val)
2625

    
2626
        result.append(out)
2627

    
2628
    return result
2629

    
2630

    
2631
class LUModifyNodeStorage(NoHooksLU):
2632
  """Logical unit for modifying a storage volume on a node.
2633

2634
  """
2635
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2636
  REQ_BGL = False
2637

    
2638
  def CheckArguments(self):
2639
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2640
    if node_name is None:
2641
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2642

    
2643
    self.op.node_name = node_name
2644

    
2645
    storage_type = self.op.storage_type
2646
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2647
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2648

    
2649
  def ExpandNames(self):
2650
    self.needed_locks = {
2651
      locking.LEVEL_NODE: self.op.node_name,
2652
      }
2653

    
2654
  def CheckPrereq(self):
2655
    """Check prerequisites.
2656

2657
    """
2658
    storage_type = self.op.storage_type
2659

    
2660
    try:
2661
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2662
    except KeyError:
2663
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2664
                                 " modified" % storage_type)
2665

    
2666
    diff = set(self.op.changes.keys()) - modifiable
2667
    if diff:
2668
      raise errors.OpPrereqError("The following fields can not be modified for"
2669
                                 " storage units of type '%s': %r" %
2670
                                 (storage_type, list(diff)))
2671

    
2672
  def Exec(self, feedback_fn):
2673
    """Computes the list of nodes and their attributes.
2674

2675
    """
2676
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2677
    result = self.rpc.call_storage_modify(self.op.node_name,
2678
                                          self.op.storage_type, st_args,
2679
                                          self.op.name, self.op.changes)
2680
    result.Raise("Failed to modify storage unit '%s' on %s" %
2681
                 (self.op.name, self.op.node_name))
2682

    
2683

    
2684
class LUAddNode(LogicalUnit):
2685
  """Logical unit for adding node to the cluster.
2686

2687
  """
2688
  HPATH = "node-add"
2689
  HTYPE = constants.HTYPE_NODE
2690
  _OP_REQP = ["node_name"]
2691

    
2692
  def BuildHooksEnv(self):
2693
    """Build hooks env.
2694

2695
    This will run on all nodes before, and on all nodes + the new node after.
2696

2697
    """
2698
    env = {
2699
      "OP_TARGET": self.op.node_name,
2700
      "NODE_NAME": self.op.node_name,
2701
      "NODE_PIP": self.op.primary_ip,
2702
      "NODE_SIP": self.op.secondary_ip,
2703
      }
2704
    nodes_0 = self.cfg.GetNodeList()
2705
    nodes_1 = nodes_0 + [self.op.node_name, ]
2706
    return env, nodes_0, nodes_1
2707

    
2708
  def CheckPrereq(self):
2709
    """Check prerequisites.
2710

2711
    This checks:
2712
     - the new node is not already in the config
2713
     - it is resolvable
2714
     - its parameters (single/dual homed) matches the cluster
2715

2716
    Any errors are signaled by raising errors.OpPrereqError.
2717

2718
    """
2719
    node_name = self.op.node_name
2720
    cfg = self.cfg
2721

    
2722
    dns_data = utils.HostInfo(node_name)
2723

    
2724
    node = dns_data.name
2725
    primary_ip = self.op.primary_ip = dns_data.ip
2726
    secondary_ip = getattr(self.op, "secondary_ip", None)
2727
    if secondary_ip is None:
2728
      secondary_ip = primary_ip
2729
    if not utils.IsValidIP(secondary_ip):
2730
      raise errors.OpPrereqError("Invalid secondary IP given")
2731
    self.op.secondary_ip = secondary_ip
2732

    
2733
    node_list = cfg.GetNodeList()
2734
    if not self.op.readd and node in node_list:
2735
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2736
                                 node)
2737
    elif self.op.readd and node not in node_list:
2738
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2739

    
2740
    for existing_node_name in node_list:
2741
      existing_node = cfg.GetNodeInfo(existing_node_name)
2742

    
2743
      if self.op.readd and node == existing_node_name:
2744
        if (existing_node.primary_ip != primary_ip or
2745
            existing_node.secondary_ip != secondary_ip):
2746
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2747
                                     " address configuration as before")
2748
        continue
2749

    
2750
      if (existing_node.primary_ip == primary_ip or
2751
          existing_node.secondary_ip == primary_ip or
2752
          existing_node.primary_ip == secondary_ip or
2753
          existing_node.secondary_ip == secondary_ip):
2754
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2755
                                   " existing node %s" % existing_node.name)
2756

    
2757
    # check that the type of the node (single versus dual homed) is the
2758
    # same as for the master
2759
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2760
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2761
    newbie_singlehomed = secondary_ip == primary_ip
2762
    if master_singlehomed != newbie_singlehomed:
2763
      if master_singlehomed:
2764
        raise errors.OpPrereqError("The master has no private ip but the"
2765
                                   " new node has one")
2766
      else:
2767
        raise errors.OpPrereqError("The master has a private ip but the"
2768
                                   " new node doesn't have one")
2769

    
2770
    # checks reachability
2771
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2772
      raise errors.OpPrereqError("Node not reachable by ping")
2773

    
2774
    if not newbie_singlehomed:
2775
      # check reachability from my secondary ip to newbie's secondary ip
2776
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2777
                           source=myself.secondary_ip):
2778
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2779
                                   " based ping to noded port")
2780

    
2781
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2782
    if self.op.readd:
2783
      exceptions = [node]
2784
    else:
2785
      exceptions = []
2786
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2787
    # the new node will increase mc_max with one, so:
2788
    mc_max = min(mc_max + 1, cp_size)
2789
    self.master_candidate = mc_now < mc_max
2790

    
2791
    if self.op.readd:
2792
      self.new_node = self.cfg.GetNodeInfo(node)
2793
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2794
    else:
2795
      self.new_node = objects.Node(name=node,
2796
                                   primary_ip=primary_ip,
2797
                                   secondary_ip=secondary_ip,
2798
                                   master_candidate=self.master_candidate,
2799
                                   offline=False, drained=False)
2800

    
2801
  def Exec(self, feedback_fn):
2802
    """Adds the new node to the cluster.
2803

2804
    """
2805
    new_node = self.new_node
2806
    node = new_node.name
2807

    
2808
    # for re-adds, reset the offline/drained/master-candidate flags;
2809
    # we need to reset here, otherwise offline would prevent RPC calls
2810
    # later in the procedure; this also means that if the re-add
2811
    # fails, we are left with a non-offlined, broken node
2812
    if self.op.readd:
2813
      new_node.drained = new_node.offline = False
2814
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2815
      # if we demote the node, we do cleanup later in the procedure
2816
      new_node.master_candidate = self.master_candidate
2817

    
2818
    # notify the user about any possible mc promotion
2819
    if new_node.master_candidate:
2820
      self.LogInfo("Node will be a master candidate")
2821

    
2822
    # check connectivity
2823
    result = self.rpc.call_version([node])[node]
2824
    result.Raise("Can't get version information from node %s" % node)
2825
    if constants.PROTOCOL_VERSION == result.payload:
2826
      logging.info("Communication to node %s fine, sw version %s match",
2827
                   node, result.payload)
2828
    else:
2829
      raise errors.OpExecError("Version mismatch master version %s,"
2830
                               " node version %s" %
2831
                               (constants.PROTOCOL_VERSION, result.payload))
2832

    
2833
    # setup ssh on node
2834
    logging.info("Copy ssh key to node %s", node)
2835
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2836
    keyarray = []
2837
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2838
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2839
                priv_key, pub_key]
2840

    
2841
    for i in keyfiles:
2842
      keyarray.append(utils.ReadFile(i))
2843

    
2844
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2845
                                    keyarray[2],
2846
                                    keyarray[3], keyarray[4], keyarray[5])
2847
    result.Raise("Cannot transfer ssh keys to the new node")
2848

    
2849
    # Add node to our /etc/hosts, and add key to known_hosts
2850
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2851
      utils.AddHostToEtcHosts(new_node.name)
2852

    
2853
    if new_node.secondary_ip != new_node.primary_ip:
2854
      result = self.rpc.call_node_has_ip_address(new_node.name,
2855
                                                 new_node.secondary_ip)
2856
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2857
                   prereq=True)
2858
      if not result.payload:
2859
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2860
                                 " you gave (%s). Please fix and re-run this"
2861
                                 " command." % new_node.secondary_ip)
2862

    
2863
    node_verify_list = [self.cfg.GetMasterNode()]
2864
    node_verify_param = {
2865
      constants.NV_NODELIST: [node],
2866
      # TODO: do a node-net-test as well?
2867
    }
2868

    
2869
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2870
                                       self.cfg.GetClusterName())
2871
    for verifier in node_verify_list:
2872
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2873
      nl_payload = result[verifier].payload[constants.NV_NODELIST]
2874
      if nl_payload:
2875
        for failed in nl_payload:
2876
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2877
                      (verifier, nl_payload[failed]))
2878
        raise errors.OpExecError("ssh/hostname verification failed.")
2879

    
2880
    if self.op.readd:
2881
      _RedistributeAncillaryFiles(self)
2882
      self.context.ReaddNode(new_node)
2883
      # make sure we redistribute the config
2884
      self.cfg.Update(new_node)
2885
      # and make sure the new node will not have old files around
2886
      if not new_node.master_candidate:
2887
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2888
        msg = result.fail_msg
2889
        if msg:
2890
          self.LogWarning("Node failed to demote itself from master"
2891
                          " candidate status: %s" % msg)
2892
    else:
2893
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2894
      self.context.AddNode(new_node)
2895

    
2896

    
2897
class LUSetNodeParams(LogicalUnit):
2898
  """Modifies the parameters of a node.
2899

2900
  """
2901
  HPATH = "node-modify"
2902
  HTYPE = constants.HTYPE_NODE
2903
  _OP_REQP = ["node_name"]
2904
  REQ_BGL = False
2905

    
2906
  def CheckArguments(self):
2907
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2908
    if node_name is None:
2909
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2910
    self.op.node_name = node_name
2911
    _CheckBooleanOpField(self.op, 'master_candidate')
2912
    _CheckBooleanOpField(self.op, 'offline')
2913
    _CheckBooleanOpField(self.op, 'drained')
2914
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2915
    if all_mods.count(None) == 3:
2916
      raise errors.OpPrereqError("Please pass at least one modification")
2917
    if all_mods.count(True) > 1:
2918
      raise errors.OpPrereqError("Can't set the node into more than one"
2919
                                 " state at the same time")
2920

    
2921
  def ExpandNames(self):
2922
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2923

    
2924
  def BuildHooksEnv(self):
2925
    """Build hooks env.
2926

2927
    This runs on the master node.
2928

2929
    """
2930
    env = {
2931
      "OP_TARGET": self.op.node_name,
2932
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2933
      "OFFLINE": str(self.op.offline),
2934
      "DRAINED": str(self.op.drained),
2935
      }
2936
    nl = [self.cfg.GetMasterNode(),
2937
          self.op.node_name]
2938
    return env, nl, nl
2939

    
2940
  def CheckPrereq(self):
2941
    """Check prerequisites.
2942

2943
    This only checks the instance list against the existing names.
2944

2945
    """
2946
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2947

    
2948
    if (self.op.master_candidate is not None or
2949
        self.op.drained is not None or
2950
        self.op.offline is not None):
2951
      # we can't change the master's node flags
2952
      if self.op.node_name == self.cfg.GetMasterNode():
2953
        raise errors.OpPrereqError("The master role can be changed"
2954
                                   " only via masterfailover")
2955

    
2956
    if ((self.op.master_candidate == False or self.op.offline == True or
2957
         self.op.drained == True) and node.master_candidate):
2958
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2959
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2960
      if num_candidates <= cp_size:
2961
        msg = ("Not enough master candidates (desired"
2962
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2963
        if self.op.force:
2964
          self.LogWarning(msg)
2965
        else:
2966
          raise errors.OpPrereqError(msg)
2967

    
2968
    if (self.op.master_candidate == True and
2969
        ((node.offline and not self.op.offline == False) or
2970
         (node.drained and not self.op.drained == False))):
2971
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2972
                                 " to master_candidate" % node.name)
2973

    
2974
    return
2975

    
2976
  def Exec(self, feedback_fn):
2977
    """Modifies a node.
2978

2979
    """
2980
    node = self.node
2981

    
2982
    result = []
2983
    changed_mc = False
2984

    
2985
    if self.op.offline is not None:
2986
      node.offline = self.op.offline
2987
      result.append(("offline", str(self.op.offline)))
2988
      if self.op.offline == True:
2989
        if node.master_candidate:
2990
          node.master_candidate = False
2991
          changed_mc = True
2992
          result.append(("master_candidate", "auto-demotion due to offline"))
2993
        if node.drained:
2994
          node.drained = False
2995
          result.append(("drained", "clear drained status due to offline"))
2996

    
2997
    if self.op.master_candidate is not None:
2998
      node.master_candidate = self.op.master_candidate
2999
      changed_mc = True
3000
      result.append(("master_candidate", str(self.op.master_candidate)))
3001
      if self.op.master_candidate == False:
3002
        rrc = self.rpc.call_node_demote_from_mc(node.name)
3003
        msg = rrc.fail_msg
3004
        if msg:
3005
          self.LogWarning("Node failed to demote itself: %s" % msg)
3006

    
3007
    if self.op.drained is not None:
3008
      node.drained = self.op.drained
3009
      result.append(("drained", str(self.op.drained)))
3010
      if self.op.drained == True:
3011
        if node.master_candidate:
3012
          node.master_candidate = False
3013
          changed_mc = True
3014
          result.append(("master_candidate", "auto-demotion due to drain"))
3015
          rrc = self.rpc.call_node_demote_from_mc(node.name)
3016
          msg = rrc.fail_msg
3017
          if msg:
3018
            self.LogWarning("Node failed to demote itself: %s" % msg)
3019
        if node.offline:
3020
          node.offline = False
3021
          result.append(("offline", "clear offline status due to drain"))
3022

    
3023
    # this will trigger configuration file update, if needed
3024
    self.cfg.Update(node)
3025
    # this will trigger job queue propagation or cleanup
3026
    if changed_mc:
3027
      self.context.ReaddNode(node)
3028

    
3029
    return result
3030

    
3031

    
3032
class LUPowercycleNode(NoHooksLU):
3033
  """Powercycles a node.
3034

3035
  """
3036
  _OP_REQP = ["node_name", "force"]
3037
  REQ_BGL = False
3038

    
3039
  def CheckArguments(self):
3040
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3041
    if node_name is None:
3042
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
3043
    self.op.node_name = node_name
3044
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3045
      raise errors.OpPrereqError("The node is the master and the force"
3046
                                 " parameter was not set")
3047

    
3048
  def ExpandNames(self):
3049
    """Locking for PowercycleNode.
3050

3051
    This is a last-resort option and shouldn't block on other
3052
    jobs. Therefore, we grab no locks.
3053

3054
    """
3055
    self.needed_locks = {}
3056

    
3057
  def CheckPrereq(self):
3058
    """Check prerequisites.
3059

3060
    This LU has no prereqs.
3061

3062
    """
3063
    pass
3064

    
3065
  def Exec(self, feedback_fn):
3066
    """Reboots a node.
3067

3068
    """
3069
    result = self.rpc.call_node_powercycle(self.op.node_name,
3070
                                           self.cfg.GetHypervisorType())
3071
    result.Raise("Failed to schedule the reboot")
3072
    return result.payload
3073

    
3074

    
3075
class LUQueryClusterInfo(NoHooksLU):
3076
  """Query cluster configuration.
3077

3078
  """
3079
  _OP_REQP = []
3080
  REQ_BGL = False
3081

    
3082
  def ExpandNames(self):
3083
    self.needed_locks = {}
3084

    
3085
  def CheckPrereq(self):
3086
    """No prerequsites needed for this LU.
3087

3088
    """
3089
    pass
3090

    
3091
  def Exec(self, feedback_fn):
3092
    """Return cluster config.
3093

3094
    """
3095
    cluster = self.cfg.GetClusterInfo()
3096
    result = {
3097
      "software_version": constants.RELEASE_VERSION,
3098
      "protocol_version": constants.PROTOCOL_VERSION,
3099
      "config_version": constants.CONFIG_VERSION,
3100
      "os_api_version": max(constants.OS_API_VERSIONS),
3101
      "export_version": constants.EXPORT_VERSION,
3102
      "architecture": (platform.architecture()[0], platform.machine()),
3103
      "name": cluster.cluster_name,
3104
      "master": cluster.master_node,
3105
      "default_hypervisor": cluster.enabled_hypervisors[0],
3106
      "enabled_hypervisors": cluster.enabled_hypervisors,
3107
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3108
                        for hypervisor_name in cluster.enabled_hypervisors]),
3109
      "beparams": cluster.beparams,
3110
      "nicparams": cluster.nicparams,
3111
      "candidate_pool_size": cluster.candidate_pool_size,
3112
      "master_netdev": cluster.master_netdev,
3113
      "volume_group_name": cluster.volume_group_name,
3114
      "file_storage_dir": cluster.file_storage_dir,
3115
      "ctime": cluster.ctime,
3116
      "mtime": cluster.mtime,
3117
      "tags": list(cluster.GetTags()),
3118
      }
3119

    
3120
    return result
3121

    
3122

    
3123
class LUQueryConfigValues(NoHooksLU):
3124
  """Return configuration values.
3125

3126
  """
3127
  _OP_REQP = []
3128
  REQ_BGL = False
3129
  _FIELDS_DYNAMIC = utils.FieldSet()
3130
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3131
                                  "watcher_pause")
3132

    
3133
  def ExpandNames(self):
3134
    self.needed_locks = {}
3135

    
3136
    _CheckOutputFields(static=self._FIELDS_STATIC,
3137
                       dynamic=self._FIELDS_DYNAMIC,
3138
                       selected=self.op.output_fields)
3139

    
3140
  def CheckPrereq(self):
3141
    """No prerequisites.
3142

3143
    """
3144
    pass
3145

    
3146
  def Exec(self, feedback_fn):
3147
    """Dump a representation of the cluster config to the standard output.
3148

3149
    """
3150
    values = []
3151
    for field in self.op.output_fields:
3152
      if field == "cluster_name":
3153
        entry = self.cfg.GetClusterName()
3154
      elif field == "master_node":
3155
        entry = self.cfg.GetMasterNode()
3156
      elif field == "drain_flag":
3157
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3158
      elif field == "watcher_pause":
3159
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3160
      else:
3161
        raise errors.ParameterError(field)
3162
      values.append(entry)
3163
    return values
3164

    
3165

    
3166
class LUActivateInstanceDisks(NoHooksLU):
3167
  """Bring up an instance's disks.
3168

3169
  """
3170
  _OP_REQP = ["instance_name"]
3171
  REQ_BGL = False
3172

    
3173
  def ExpandNames(self):
3174
    self._ExpandAndLockInstance()
3175
    self.needed_locks[locking.LEVEL_NODE] = []
3176
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3177

    
3178
  def DeclareLocks(self, level):
3179
    if level == locking.LEVEL_NODE:
3180
      self._LockInstancesNodes()
3181

    
3182
  def CheckPrereq(self):
3183
    """Check prerequisites.
3184

3185
    This checks that the instance is in the cluster.
3186

3187
    """
3188
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3189
    assert self.instance is not None, \
3190
      "Cannot retrieve locked instance %s" % self.op.instance_name
3191
    _CheckNodeOnline(self, self.instance.primary_node)
3192
    if not hasattr(self.op, "ignore_size"):
3193
      self.op.ignore_size = False
3194

    
3195
  def Exec(self, feedback_fn):
3196
    """Activate the disks.
3197

3198
    """
3199
    disks_ok, disks_info = \
3200
              _AssembleInstanceDisks(self, self.instance,
3201
                                     ignore_size=self.op.ignore_size)
3202
    if not disks_ok:
3203
      raise errors.OpExecError("Cannot activate block devices")
3204

    
3205
    return disks_info
3206

    
3207

    
3208
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3209
                           ignore_size=False):
3210
  """Prepare the block devices for an instance.
3211

3212
  This sets up the block devices on all nodes.
3213

3214
  @type lu: L{LogicalUnit}
3215
  @param lu: the logical unit on whose behalf we execute
3216
  @type instance: L{objects.Instance}
3217
  @param instance: the instance for whose disks we assemble
3218
  @type ignore_secondaries: boolean
3219
  @param ignore_secondaries: if true, errors on secondary nodes
3220
      won't result in an error return from the function
3221
  @type ignore_size: boolean
3222
  @param ignore_size: if true, the current known size of the disk
3223
      will not be used during the disk activation, useful for cases
3224
      when the size is wrong
3225
  @return: False if the operation failed, otherwise a list of
3226
      (host, instance_visible_name, node_visible_name)
3227
      with the mapping from node devices to instance devices
3228

3229
  """
3230
  device_info = []
3231
  disks_ok = True
3232
  iname = instance.name
3233
  # With the two passes mechanism we try to reduce the window of
3234
  # opportunity for the race condition of switching DRBD to primary
3235
  # before handshaking occured, but we do not eliminate it
3236

    
3237
  # The proper fix would be to wait (with some limits) until the
3238
  # connection has been made and drbd transitions from WFConnection
3239
  # into any other network-connected state (Connected, SyncTarget,
3240
  # SyncSource, etc.)
3241

    
3242
  # 1st pass, assemble on all nodes in secondary mode
3243
  for inst_disk in instance.disks:
3244
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3245
      if ignore_size:
3246
        node_disk = node_disk.Copy()
3247
        node_disk.UnsetSize()
3248
      lu.cfg.SetDiskID(node_disk, node)
3249
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3250
      msg = result.fail_msg
3251
      if msg:
3252
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3253
                           " (is_primary=False, pass=1): %s",
3254
                           inst_disk.iv_name, node, msg)
3255
        if not ignore_secondaries:
3256
          disks_ok = False
3257

    
3258
  # FIXME: race condition on drbd migration to primary
3259

    
3260
  # 2nd pass, do only the primary node
3261
  for inst_disk in instance.disks:
3262
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3263
      if node != instance.primary_node:
3264
        continue
3265
      if ignore_size:
3266
        node_disk = node_disk.Copy()
3267
        node_disk.UnsetSize()
3268
      lu.cfg.SetDiskID(node_disk, node)
3269
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3270
      msg = result.fail_msg
3271
      if msg:
3272
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3273
                           " (is_primary=True, pass=2): %s",
3274
                           inst_disk.iv_name, node, msg)
3275
        disks_ok = False
3276
    device_info.append((instance.primary_node, inst_disk.iv_name,
3277
                        result.payload))
3278

    
3279
  # leave the disks configured for the primary node
3280
  # this is a workaround that would be fixed better by
3281
  # improving the logical/physical id handling
3282
  for disk in instance.disks:
3283
    lu.cfg.SetDiskID(disk, instance.primary_node)
3284

    
3285
  return disks_ok, device_info
3286

    
3287

    
3288
def _StartInstanceDisks(lu, instance, force):
3289
  """Start the disks of an instance.
3290

3291
  """
3292
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3293
                                           ignore_secondaries=force)
3294
  if not disks_ok:
3295
    _ShutdownInstanceDisks(lu, instance)
3296
    if force is not None and not force:
3297
      lu.proc.LogWarning("", hint="If the message above refers to a"
3298
                         " secondary node,"
3299
                         " you can retry the operation using '--force'.")
3300
    raise errors.OpExecError("Disk consistency error")
3301

    
3302

    
3303
class LUDeactivateInstanceDisks(NoHooksLU):
3304
  """Shutdown an instance's disks.
3305

3306
  """
3307
  _OP_REQP = ["instance_name"]
3308
  REQ_BGL = False
3309

    
3310
  def ExpandNames(self):
3311
    self._ExpandAndLockInstance()
3312
    self.needed_locks[locking.LEVEL_NODE] = []
3313
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3314

    
3315
  def DeclareLocks(self, level):
3316
    if level == locking.LEVEL_NODE:
3317
      self._LockInstancesNodes()
3318

    
3319
  def CheckPrereq(self):
3320
    """Check prerequisites.
3321

3322
    This checks that the instance is in the cluster.
3323

3324
    """
3325
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3326
    assert self.instance is not None, \
3327
      "Cannot retrieve locked instance %s" % self.op.instance_name
3328

    
3329
  def Exec(self, feedback_fn):
3330
    """Deactivate the disks
3331

3332
    """
3333
    instance = self.instance
3334
    _SafeShutdownInstanceDisks(self, instance)
3335

    
3336

    
3337
def _SafeShutdownInstanceDisks(lu, instance):
3338
  """Shutdown block devices of an instance.
3339

3340
  This function checks if an instance is running, before calling
3341
  _ShutdownInstanceDisks.
3342

3343
  """
3344
  pnode = instance.primary_node
3345
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3346
  ins_l.Raise("Can't contact node %s" % pnode)
3347

    
3348
  if instance.name in ins_l.payload:
3349
    raise errors.OpExecError("Instance is running, can't shutdown"
3350
                             " block devices.")
3351

    
3352
  _ShutdownInstanceDisks(lu, instance)
3353

    
3354

    
3355
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3356
  """Shutdown block devices of an instance.
3357

3358
  This does the shutdown on all nodes of the instance.
3359

3360
  If the ignore_primary is false, errors on the primary node are
3361
  ignored.
3362

3363
  """
3364
  all_result = True
3365
  for disk in instance.disks:
3366
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3367
      lu.cfg.SetDiskID(top_disk, node)
3368
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3369
      msg = result.fail_msg
3370
      if msg:
3371
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3372
                      disk.iv_name, node, msg)
3373
        if not ignore_primary or node != instance.primary_node:
3374
          all_result = False
3375
  return all_result
3376

    
3377

    
3378
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3379
  """Checks if a node has enough free memory.
3380

3381
  This function check if a given node has the needed amount of free
3382
  memory. In case the node has less memory or we cannot get the
3383
  information from the node, this function raise an OpPrereqError
3384
  exception.
3385

3386
  @type lu: C{LogicalUnit}
3387
  @param lu: a logical unit from which we get configuration data
3388
  @type node: C{str}
3389
  @param node: the node to check
3390
  @type reason: C{str}
3391
  @param reason: string to use in the error message
3392
  @type requested: C{int}
3393
  @param requested: the amount of memory in MiB to check for
3394
  @type hypervisor_name: C{str}
3395
  @param hypervisor_name: the hypervisor to ask for memory stats
3396
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3397
      we cannot check the node
3398

3399
  """
3400
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3401
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3402
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3403
  if not isinstance(free_mem, int):
3404
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3405
                               " was '%s'" % (node, free_mem))
3406
  if requested > free_mem:
3407
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3408
                               " needed %s MiB, available %s MiB" %
3409
                               (node, reason, requested, free_mem))
3410

    
3411

    
3412
class LUStartupInstance(LogicalUnit):
3413
  """Starts an instance.
3414

3415
  """
3416
  HPATH = "instance-start"
3417
  HTYPE = constants.HTYPE_INSTANCE
3418
  _OP_REQP = ["instance_name", "force"]
3419
  REQ_BGL = False
3420

    
3421
  def ExpandNames(self):
3422
    self._ExpandAndLockInstance()
3423

    
3424
  def BuildHooksEnv(self):
3425
    """Build hooks env.
3426

3427
    This runs on master, primary and secondary nodes of the instance.
3428

3429
    """
3430
    env = {
3431
      "FORCE": self.op.force,
3432
      }
3433
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3434
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3435
    return env, nl, nl
3436

    
3437
  def CheckPrereq(self):
3438
    """Check prerequisites.
3439

3440
    This checks that the instance is in the cluster.
3441

3442
    """
3443
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3444
    assert self.instance is not None, \
3445
      "Cannot retrieve locked instance %s" % self.op.instance_name
3446

    
3447
    # extra beparams
3448
    self.beparams = getattr(self.op, "beparams", {})
3449
    if self.beparams:
3450
      if not isinstance(self.beparams, dict):
3451
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3452
                                   " dict" % (type(self.beparams), ))
3453
      # fill the beparams dict
3454
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3455
      self.op.beparams = self.beparams
3456

    
3457
    # extra hvparams
3458
    self.hvparams = getattr(self.op, "hvparams", {})
3459
    if self.hvparams:
3460
      if not isinstance(self.hvparams, dict):
3461
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3462
                                   " dict" % (type(self.hvparams), ))
3463

    
3464
      # check hypervisor parameter syntax (locally)
3465
      cluster = self.cfg.GetClusterInfo()
3466
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3467
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3468
                                    instance.hvparams)
3469
      filled_hvp.update(self.hvparams)
3470
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3471
      hv_type.CheckParameterSyntax(filled_hvp)
3472
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3473
      self.op.hvparams = self.hvparams
3474

    
3475
    _CheckNodeOnline(self, instance.primary_node)
3476

    
3477
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3478
    # check bridges existence
3479
    _CheckInstanceBridgesExist(self, instance)
3480

    
3481
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3482
                                              instance.name,
3483
                                              instance.hypervisor)
3484
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3485
                      prereq=True)
3486
    if not remote_info.payload: # not running already
3487
      _CheckNodeFreeMemory(self, instance.primary_node,
3488
                           "starting instance %s" % instance.name,
3489
                           bep[constants.BE_MEMORY], instance.hypervisor)
3490

    
3491
  def Exec(self, feedback_fn):
3492
    """Start the instance.
3493

3494
    """
3495
    instance = self.instance
3496
    force = self.op.force
3497

    
3498
    self.cfg.MarkInstanceUp(instance.name)
3499

    
3500
    node_current = instance.primary_node
3501

    
3502
    _StartInstanceDisks(self, instance, force)
3503

    
3504
    result = self.rpc.call_instance_start(node_current, instance,
3505
                                          self.hvparams, self.beparams)
3506
    msg = result.fail_msg
3507
    if msg:
3508
      _ShutdownInstanceDisks(self, instance)
3509
      raise errors.OpExecError("Could not start instance: %s" % msg)
3510

    
3511

    
3512
class LURebootInstance(LogicalUnit):
3513
  """Reboot an instance.
3514

3515
  """
3516
  HPATH = "instance-reboot"
3517
  HTYPE = constants.HTYPE_INSTANCE
3518
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3519
  REQ_BGL = False
3520

    
3521
  def ExpandNames(self):
3522
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3523
                                   constants.INSTANCE_REBOOT_HARD,
3524
                                   constants.INSTANCE_REBOOT_FULL]:
3525
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3526
                                  (constants.INSTANCE_REBOOT_SOFT,
3527
                                   constants.INSTANCE_REBOOT_HARD,
3528
                                   constants.INSTANCE_REBOOT_FULL))
3529
    self._ExpandAndLockInstance()
3530

    
3531
  def BuildHooksEnv(self):
3532
    """Build hooks env.
3533

3534
    This runs on master, primary and secondary nodes of the instance.
3535

3536
    """
3537
    env = {
3538
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3539
      "REBOOT_TYPE": self.op.reboot_type,
3540
      }
3541
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3542
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3543
    return env, nl, nl
3544

    
3545
  def CheckPrereq(self):
3546
    """Check prerequisites.
3547

3548
    This checks that the instance is in the cluster.
3549

3550
    """
3551
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3552
    assert self.instance is not None, \
3553
      "Cannot retrieve locked instance %s" % self.op.instance_name
3554

    
3555
    _CheckNodeOnline(self, instance.primary_node)
3556

    
3557
    # check bridges existence
3558
    _CheckInstanceBridgesExist(self, instance)
3559

    
3560
  def Exec(self, feedback_fn):
3561
    """Reboot the instance.
3562

3563
    """
3564
    instance = self.instance
3565
    ignore_secondaries = self.op.ignore_secondaries
3566
    reboot_type = self.op.reboot_type
3567

    
3568
    node_current = instance.primary_node
3569

    
3570
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3571
                       constants.INSTANCE_REBOOT_HARD]:
3572
      for disk in instance.disks:
3573
        self.cfg.SetDiskID(disk, node_current)
3574
      result = self.rpc.call_instance_reboot(node_current, instance,
3575
                                             reboot_type)
3576
      result.Raise("Could not reboot instance")
3577
    else:
3578
      result = self.rpc.call_instance_shutdown(node_current, instance)
3579
      result.Raise("Could not shutdown instance for full reboot")
3580
      _ShutdownInstanceDisks(self, instance)
3581
      _StartInstanceDisks(self, instance, ignore_secondaries)
3582
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3583
      msg = result.fail_msg
3584
      if msg:
3585
        _ShutdownInstanceDisks(self, instance)
3586
        raise errors.OpExecError("Could not start instance for"
3587
                                 " full reboot: %s" % msg)
3588

    
3589
    self.cfg.MarkInstanceUp(instance.name)
3590

    
3591

    
3592
class LUShutdownInstance(LogicalUnit):
3593
  """Shutdown an instance.
3594

3595
  """
3596
  HPATH = "instance-stop"
3597
  HTYPE = constants.HTYPE_INSTANCE
3598
  _OP_REQP = ["instance_name"]
3599
  REQ_BGL = False
3600

    
3601
  def ExpandNames(self):
3602
    self._ExpandAndLockInstance()
3603

    
3604
  def BuildHooksEnv(self):
3605
    """Build hooks env.
3606

3607
    This runs on master, primary and secondary nodes of the instance.
3608

3609
    """
3610
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3611
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3612
    return env, nl, nl
3613

    
3614
  def CheckPrereq(self):
3615
    """Check prerequisites.
3616

3617
    This checks that the instance is in the cluster.
3618

3619
    """
3620
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3621
    assert self.instance is not None, \
3622
      "Cannot retrieve locked instance %s" % self.op.instance_name
3623
    _CheckNodeOnline(self, self.instance.primary_node)
3624

    
3625
  def Exec(self, feedback_fn):
3626
    """Shutdown the instance.
3627

3628
    """
3629
    instance = self.instance
3630
    node_current = instance.primary_node
3631
    self.cfg.MarkInstanceDown(instance.name)
3632
    result = self.rpc.call_instance_shutdown(node_current, instance)
3633
    msg = result.fail_msg
3634
    if msg:
3635
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3636

    
3637
    _ShutdownInstanceDisks(self, instance)
3638

    
3639

    
3640
class LUReinstallInstance(LogicalUnit):
3641
  """Reinstall an instance.
3642

3643
  """
3644
  HPATH = "instance-reinstall"
3645
  HTYPE = constants.HTYPE_INSTANCE
3646
  _OP_REQP = ["instance_name"]
3647
  REQ_BGL = False
3648

    
3649
  def ExpandNames(self):
3650
    self._ExpandAndLockInstance()
3651

    
3652
  def BuildHooksEnv(self):
3653
    """Build hooks env.
3654

3655
    This runs on master, primary and secondary nodes of the instance.
3656

3657
    """
3658
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3659
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3660
    return env, nl, nl
3661

    
3662
  def CheckPrereq(self):
3663
    """Check prerequisites.
3664

3665
    This checks that the instance is in the cluster and is not running.
3666

3667
    """
3668
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3669
    assert instance is not None, \
3670
      "Cannot retrieve locked instance %s" % self.op.instance_name
3671
    _CheckNodeOnline(self, instance.primary_node)
3672

    
3673
    if instance.disk_template == constants.DT_DISKLESS:
3674
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3675
                                 self.op.instance_name)
3676
    if instance.admin_up:
3677
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3678
                                 self.op.instance_name)
3679
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3680
                                              instance.name,
3681
                                              instance.hypervisor)
3682
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3683
                      prereq=True)
3684
    if remote_info.payload:
3685
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3686
                                 (self.op.instance_name,
3687
                                  instance.primary_node))
3688

    
3689
    self.op.os_type = getattr(self.op, "os_type", None)
3690
    if self.op.os_type is not None:
3691
      # OS verification
3692
      pnode = self.cfg.GetNodeInfo(
3693
        self.cfg.ExpandNodeName(instance.primary_node))
3694
      if pnode is None:
3695
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3696
                                   self.op.pnode)
3697
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3698
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3699
                   (self.op.os_type, pnode.name), prereq=True)
3700

    
3701
    self.instance = instance
3702

    
3703
  def Exec(self, feedback_fn):
3704
    """Reinstall the instance.
3705

3706
    """
3707
    inst = self.instance
3708

    
3709
    if self.op.os_type is not None:
3710
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3711
      inst.os = self.op.os_type
3712
      self.cfg.Update(inst)
3713

    
3714
    _StartInstanceDisks(self, inst, None)
3715
    try:
3716
      feedback_fn("Running the instance OS create scripts...")
3717
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3718
      result.Raise("Could not install OS for instance %s on node %s" %
3719
                   (inst.name, inst.primary_node))
3720
    finally:
3721
      _ShutdownInstanceDisks(self, inst)
3722

    
3723

    
3724
class LURecreateInstanceDisks(LogicalUnit):
3725
  """Recreate an instance's missing disks.
3726

3727
  """
3728
  HPATH = "instance-recreate-disks"
3729
  HTYPE = constants.HTYPE_INSTANCE
3730
  _OP_REQP = ["instance_name", "disks"]
3731
  REQ_BGL = False
3732

    
3733
  def CheckArguments(self):
3734
    """Check the arguments.
3735

3736
    """
3737
    if not isinstance(self.op.disks, list):
3738
      raise errors.OpPrereqError("Invalid disks parameter")
3739
    for item in self.op.disks:
3740
      if (not isinstance(item, int) or
3741
          item < 0):
3742
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3743
                                   str(item))
3744

    
3745
  def ExpandNames(self):
3746
    self._ExpandAndLockInstance()
3747

    
3748
  def BuildHooksEnv(self):
3749
    """Build hooks env.
3750

3751
    This runs on master, primary and secondary nodes of the instance.
3752

3753
    """
3754
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3755
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3756
    return env, nl, nl
3757

    
3758
  def CheckPrereq(self):
3759
    """Check prerequisites.
3760

3761
    This checks that the instance is in the cluster and is not running.
3762

3763
    """
3764
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3765
    assert instance is not None, \
3766
      "Cannot retrieve locked instance %s" % self.op.instance_name
3767
    _CheckNodeOnline(self, instance.primary_node)
3768

    
3769
    if instance.disk_template == constants.DT_DISKLESS:
3770
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3771
                                 self.op.instance_name)
3772
    if instance.admin_up:
3773
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3774
                                 self.op.instance_name)
3775
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3776
                                              instance.name,
3777
                                              instance.hypervisor)
3778
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3779
                      prereq=True)
3780
    if remote_info.payload:
3781
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3782
                                 (self.op.instance_name,
3783
                                  instance.primary_node))
3784

    
3785
    if not self.op.disks:
3786
      self.op.disks = range(len(instance.disks))
3787
    else:
3788
      for idx in self.op.disks:
3789
        if idx >= len(instance.disks):
3790
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3791

    
3792
    self.instance = instance
3793

    
3794
  def Exec(self, feedback_fn):
3795
    """Recreate the disks.
3796

3797
    """
3798
    to_skip = []
3799
    for idx, disk in enumerate(self.instance.disks):
3800
      if idx not in self.op.disks: # disk idx has not been passed in
3801
        to_skip.append(idx)
3802
        continue
3803

    
3804
    _CreateDisks(self, self.instance, to_skip=to_skip)
3805

    
3806

    
3807
class LURenameInstance(LogicalUnit):
3808
  """Rename an instance.
3809

3810
  """
3811
  HPATH = "instance-rename"
3812
  HTYPE = constants.HTYPE_INSTANCE
3813
  _OP_REQP = ["instance_name", "new_name"]
3814

    
3815
  def BuildHooksEnv(self):
3816
    """Build hooks env.
3817

3818
    This runs on master, primary and secondary nodes of the instance.
3819

3820
    """
3821
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3822
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3823
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3824
    return env, nl, nl
3825

    
3826
  def CheckPrereq(self):
3827
    """Check prerequisites.
3828

3829
    This checks that the instance is in the cluster and is not running.
3830

3831
    """
3832
    instance = self.cfg.GetInstanceInfo(
3833
      self.cfg.ExpandInstanceName(self.op.instance_name))
3834
    if instance is None:
3835
      raise errors.OpPrereqError("Instance '%s' not known" %
3836
                                 self.op.instance_name)
3837
    _CheckNodeOnline(self, instance.primary_node)
3838

    
3839
    if instance.admin_up:
3840
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3841
                                 self.op.instance_name)
3842
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3843
                                              instance.name,
3844
                                              instance.hypervisor)
3845
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3846
                      prereq=True)
3847
    if remote_info.payload:
3848
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3849
                                 (self.op.instance_name,
3850
                                  instance.primary_node))
3851
    self.instance = instance
3852

    
3853
    # new name verification
3854
    name_info = utils.HostInfo(self.op.new_name)
3855

    
3856
    self.op.new_name = new_name = name_info.name
3857
    instance_list = self.cfg.GetInstanceList()
3858
    if new_name in instance_list:
3859
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3860
                                 new_name)
3861

    
3862
    if not getattr(self.op, "ignore_ip", False):
3863
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3864
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3865
                                   (name_info.ip, new_name))
3866

    
3867

    
3868
  def Exec(self, feedback_fn):
3869
    """Reinstall the instance.
3870

3871
    """
3872
    inst = self.instance
3873
    old_name = inst.name
3874

    
3875
    if inst.disk_template == constants.DT_FILE:
3876
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3877

    
3878
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3879
    # Change the instance lock. This is definitely safe while we hold the BGL
3880
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3881
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3882

    
3883
    # re-read the instance from the configuration after rename
3884
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3885

    
3886
    if inst.disk_template == constants.DT_FILE:
3887
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3888
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3889
                                                     old_file_storage_dir,
3890
                                                     new_file_storage_dir)
3891
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3892
                   " (but the instance has been renamed in Ganeti)" %
3893
                   (inst.primary_node, old_file_storage_dir,
3894
                    new_file_storage_dir))
3895

    
3896
    _StartInstanceDisks(self, inst, None)
3897
    try:
3898
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3899
                                                 old_name)
3900
      msg = result.fail_msg
3901
      if msg:
3902
        msg = ("Could not run OS rename script for instance %s on node %s"
3903
               " (but the instance has been renamed in Ganeti): %s" %
3904
               (inst.name, inst.primary_node, msg))
3905
        self.proc.LogWarning(msg)
3906
    finally:
3907
      _ShutdownInstanceDisks(self, inst)
3908

    
3909

    
3910
class LURemoveInstance(LogicalUnit):
3911
  """Remove an instance.
3912

3913
  """
3914
  HPATH = "instance-remove"
3915
  HTYPE = constants.HTYPE_INSTANCE
3916
  _OP_REQP = ["instance_name", "ignore_failures"]
3917
  REQ_BGL = False
3918

    
3919
  def ExpandNames(self):
3920
    self._ExpandAndLockInstance()
3921
    self.needed_locks[locking.LEVEL_NODE] = []
3922
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3923

    
3924
  def DeclareLocks(self, level):
3925
    if level == locking.LEVEL_NODE:
3926
      self._LockInstancesNodes()
3927

    
3928
  def BuildHooksEnv(self):
3929
    """Build hooks env.
3930

3931
    This runs on master, primary and secondary nodes of the instance.
3932

3933
    """
3934
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3935
    nl = [self.cfg.GetMasterNode()]
3936
    return env, nl, nl
3937

    
3938
  def CheckPrereq(self):
3939
    """Check prerequisites.
3940

3941
    This checks that the instance is in the cluster.
3942

3943
    """
3944
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3945
    assert self.instance is not None, \
3946
      "Cannot retrieve locked instance %s" % self.op.instance_name
3947

    
3948
  def Exec(self, feedback_fn):
3949
    """Remove the instance.
3950

3951
    """
3952
    instance = self.instance
3953
    logging.info("Shutting down instance %s on node %s",
3954
                 instance.name, instance.primary_node)
3955

    
3956
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3957
    msg = result.fail_msg
3958
    if msg:
3959
      if self.op.ignore_failures:
3960
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3961
      else:
3962
        raise errors.OpExecError("Could not shutdown instance %s on"
3963
                                 " node %s: %s" %
3964
                                 (instance.name, instance.primary_node, msg))
3965

    
3966
    logging.info("Removing block devices for instance %s", instance.name)
3967

    
3968
    if not _RemoveDisks(self, instance):
3969
      if self.op.ignore_failures:
3970
        feedback_fn("Warning: can't remove instance's disks")
3971
      else:
3972
        raise errors.OpExecError("Can't remove instance's disks")
3973

    
3974
    logging.info("Removing instance %s out of cluster config", instance.name)
3975

    
3976
    self.cfg.RemoveInstance(instance.name)
3977
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3978

    
3979

    
3980
class LUQueryInstances(NoHooksLU):
3981
  """Logical unit for querying instances.
3982

3983
  """
3984
  _OP_REQP = ["output_fields", "names", "use_locking"]
3985
  REQ_BGL = False
3986
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3987
                                    "admin_state",
3988
                                    "disk_template", "ip", "mac", "bridge",
3989
                                    "nic_mode", "nic_link",
3990
                                    "sda_size", "sdb_size", "vcpus", "tags",
3991
                                    "network_port", "beparams",
3992
                                    r"(disk)\.(size)/([0-9]+)",
3993
                                    r"(disk)\.(sizes)", "disk_usage",
3994
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3995
                                    r"(nic)\.(bridge)/([0-9]+)",
3996
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3997
                                    r"(disk|nic)\.(count)",
3998
                                    "serial_no", "hypervisor", "hvparams",
3999
                                    "ctime", "mtime",
4000
                                    ] +
4001
                                  ["hv/%s" % name
4002
                                   for name in constants.HVS_PARAMETERS] +
4003
                                  ["be/%s" % name
4004
                                   for name in constants.BES_PARAMETERS])
4005
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4006

    
4007

    
4008
  def ExpandNames(self):
4009
    _CheckOutputFields(static=self._FIELDS_STATIC,
4010
                       dynamic=self._FIELDS_DYNAMIC,
4011
                       selected=self.op.output_fields)
4012

    
4013
    self.needed_locks = {}
4014
    self.share_locks[locking.LEVEL_INSTANCE] = 1
4015
    self.share_locks[locking.LEVEL_NODE] = 1
4016

    
4017
    if self.op.names:
4018
      self.wanted = _GetWantedInstances(self, self.op.names)
4019
    else:
4020
      self.wanted = locking.ALL_SET
4021

    
4022
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4023
    self.do_locking = self.do_node_query and self.op.use_locking
4024
    if self.do_locking:
4025
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4026
      self.needed_locks[locking.LEVEL_NODE] = []
4027
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4028

    
4029
  def DeclareLocks(self, level):
4030
    if level == locking.LEVEL_NODE and self.do_locking:
4031
      self._LockInstancesNodes()
4032

    
4033
  def CheckPrereq(self):
4034
    """Check prerequisites.
4035

4036
    """
4037
    pass
4038

    
4039
  def Exec(self, feedback_fn):
4040
    """Computes the list of nodes and their attributes.
4041

4042
    """
4043
    all_info = self.cfg.GetAllInstancesInfo()
4044
    if self.wanted == locking.ALL_SET:
4045
      # caller didn't specify instance names, so ordering is not important
4046
      if self.do_locking:
4047
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4048
      else:
4049
        instance_names = all_info.keys()
4050
      instance_names = utils.NiceSort(instance_names)
4051
    else:
4052
      # caller did specify names, so we must keep the ordering
4053
      if self.do_locking:
4054
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4055
      else:
4056
        tgt_set = all_info.keys()
4057
      missing = set(self.wanted).difference(tgt_set)
4058
      if missing:
4059
        raise errors.OpExecError("Some instances were removed before"
4060
                                 " retrieving their data: %s" % missing)
4061
      instance_names = self.wanted
4062

    
4063
    instance_list = [all_info[iname] for iname in instance_names]
4064

    
4065
    # begin data gathering
4066

    
4067
    nodes = frozenset([inst.primary_node for inst in instance_list])
4068
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4069

    
4070
    bad_nodes = []
4071
    off_nodes = []
4072
    if self.do_node_query:
4073
      live_data = {}
4074
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4075
      for name in nodes:
4076
        result = node_data[name]
4077
        if result.offline:
4078
          # offline nodes will be in both lists
4079
          off_nodes.append(name)
4080
        if result.fail_msg:
4081
          bad_nodes.append(name)
4082
        else:
4083
          if result.payload:
4084
            live_data.update(result.payload)
4085
          # else no instance is alive
4086
    else:
4087
      live_data = dict([(name, {}) for name in instance_names])
4088

    
4089
    # end data gathering
4090

    
4091
    HVPREFIX = "hv/"
4092
    BEPREFIX = "be/"
4093
    output = []
4094
    cluster = self.cfg.GetClusterInfo()
4095
    for instance in instance_list:
4096
      iout = []
4097
      i_hv = cluster.FillHV(instance)
4098
      i_be = cluster.FillBE(instance)
4099
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4100
                                 nic.nicparams) for nic in instance.nics]
4101
      for field in self.op.output_fields:
4102
        st_match = self._FIELDS_STATIC.Matches(field)
4103
        if field == "name":
4104
          val = instance.name
4105
        elif field == "os":
4106
          val = instance.os
4107
        elif field == "pnode":
4108
          val = instance.primary_node
4109
        elif field == "snodes":
4110
          val = list(instance.secondary_nodes)
4111
        elif field == "admin_state":
4112
          val = instance.admin_up
4113
        elif field == "oper_state":
4114
          if instance.primary_node in bad_nodes:
4115
            val = None
4116
          else:
4117
            val = bool(live_data.get(instance.name))
4118
        elif field == "status":
4119
          if instance.primary_node in off_nodes:
4120
            val = "ERROR_nodeoffline"
4121
          elif instance.primary_node in bad_nodes:
4122
            val = "ERROR_nodedown"
4123
          else:
4124
            running = bool(live_data.get(instance.name))
4125
            if running:
4126
              if instance.admin_up:
4127
                val = "running"
4128
              else:
4129
                val = "ERROR_up"
4130
            else:
4131
              if instance.admin_up:
4132
                val = "ERROR_down"
4133
              else:
4134
                val = "ADMIN_down"
4135
        elif field == "oper_ram":
4136
          if instance.primary_node in bad_nodes:
4137
            val = None
4138
          elif instance.name in live_data:
4139
            val = live_data[instance.name].get("memory", "?")
4140
          else:
4141
            val = "-"
4142
        elif field == "vcpus":
4143
          val = i_be[constants.BE_VCPUS]
4144
        elif field == "disk_template":
4145
          val = instance.disk_template
4146
        elif field == "ip":
4147
          if instance.nics:
4148
            val = instance.nics[0].ip
4149
          else:
4150
            val = None
4151
        elif field == "nic_mode":
4152
          if instance.nics:
4153
            val = i_nicp[0][constants.NIC_MODE]
4154
          else:
4155
            val = None
4156
        elif field == "nic_link":
4157
          if instance.nics:
4158
            val = i_nicp[0][constants.NIC_LINK]
4159
          else:
4160
            val = None
4161
        elif field == "bridge":
4162
          if (instance.nics and
4163
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4164
            val = i_nicp[0][constants.NIC_LINK]
4165
          else:
4166
            val = None
4167
        elif field == "mac":
4168
          if instance.nics:
4169
            val = instance.nics[0].mac
4170
          else:
4171
            val = None
4172
        elif field == "sda_size" or field == "sdb_size":
4173
          idx = ord(field[2]) - ord('a')
4174
          try:
4175
            val = instance.FindDisk(idx).size
4176
          except errors.OpPrereqError:
4177
            val = None
4178
        elif field == "disk_usage": # total disk usage per node
4179
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4180
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4181
        elif field == "tags":
4182
          val = list(instance.GetTags())
4183
        elif field == "serial_no":
4184
          val = instance.serial_no
4185
        elif field == "ctime":
4186
          val = instance.ctime
4187
        elif field == "mtime":
4188
          val = instance.mtime
4189
        elif field == "network_port":
4190
          val = instance.network_port
4191
        elif field == "hypervisor":
4192
          val = instance.hypervisor
4193
        elif field == "hvparams":
4194
          val = i_hv
4195
        elif (field.startswith(HVPREFIX) and
4196
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4197
          val = i_hv.get(field[len(HVPREFIX):], None)
4198
        elif field == "beparams":
4199
          val = i_be
4200
        elif (field.startswith(BEPREFIX) and
4201
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4202
          val = i_be.get(field[len(BEPREFIX):], None)
4203
        elif st_match and st_match.groups():
4204
          # matches a variable list
4205
          st_groups = st_match.groups()
4206
          if st_groups and st_groups[0] == "disk":
4207
            if st_groups[1] == "count":
4208
              val = len(instance.disks)
4209
            elif st_groups[1] == "sizes":
4210
              val = [disk.size for disk in instance.disks]
4211
            elif st_groups[1] == "size":
4212
              try:
4213
                val = instance.FindDisk(st_groups[2]).size
4214
              except errors.OpPrereqError:
4215
                val = None
4216
            else:
4217
              assert False, "Unhandled disk parameter"
4218
          elif st_groups[0] == "nic":
4219
            if st_groups[1] == "count":
4220
              val = len(instance.nics)
4221
            elif st_groups[1] == "macs":
4222
              val = [nic.mac for nic in instance.nics]
4223
            elif st_groups[1] == "ips":
4224
              val = [nic.ip for nic in instance.nics]
4225
            elif st_groups[1] == "modes":
4226
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4227
            elif st_groups[1] == "links":
4228
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4229
            elif st_groups[1] == "bridges":
4230
              val = []
4231
              for nicp in i_nicp:
4232
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4233
                  val.append(nicp[constants.NIC_LINK])
4234
                else:
4235
                  val.append(None)
4236
            else:
4237
              # index-based item
4238
              nic_idx = int(st_groups[2])
4239
              if nic_idx >= len(instance.nics):
4240
                val = None
4241
              else:
4242
                if st_groups[1] == "mac":
4243
                  val = instance.nics[nic_idx].mac
4244
                elif st_groups[1] == "ip":
4245
                  val = instance.nics[nic_idx].ip
4246
                elif st_groups[1] == "mode":
4247
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4248
                elif st_groups[1] == "link":
4249
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4250
                elif st_groups[1] == "bridge":
4251
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4252
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4253
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4254
                  else:
4255
                    val = None
4256
                else:
4257
                  assert False, "Unhandled NIC parameter"
4258
          else:
4259
            assert False, ("Declared but unhandled variable parameter '%s'" %
4260
                           field)
4261
        else:
4262
          assert False, "Declared but unhandled parameter '%s'" % field
4263
        iout.append(val)
4264
      output.append(iout)
4265

    
4266
    return output
4267

    
4268

    
4269
class LUFailoverInstance(LogicalUnit):
4270
  """Failover an instance.
4271

4272
  """
4273
  HPATH = "instance-failover"
4274
  HTYPE = constants.HTYPE_INSTANCE
4275
  _OP_REQP = ["instance_name", "ignore_consistency"]
4276
  REQ_BGL = False
4277

    
4278
  def ExpandNames(self):
4279
    self._ExpandAndLockInstance()
4280
    self.needed_locks[locking.LEVEL_NODE] = []
4281
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4282

    
4283
  def DeclareLocks(self, level):
4284
    if level == locking.LEVEL_NODE:
4285
      self._LockInstancesNodes()
4286

    
4287
  def BuildHooksEnv(self):
4288
    """Build hooks env.
4289

4290
    This runs on master, primary and secondary nodes of the instance.
4291

4292
    """
4293
    env = {
4294
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4295
      }
4296
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4297
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4298
    return env, nl, nl
4299

    
4300
  def CheckPrereq(self):
4301
    """Check prerequisites.
4302

4303
    This checks that the instance is in the cluster.
4304

4305
    """
4306
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4307
    assert self.instance is not None, \
4308
      "Cannot retrieve locked instance %s" % self.op.instance_name
4309

    
4310
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4311
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4312
      raise errors.OpPrereqError("Instance's disk layout is not"
4313
                                 " network mirrored, cannot failover.")
4314

    
4315
    secondary_nodes = instance.secondary_nodes
4316
    if not secondary_nodes:
4317
      raise errors.ProgrammerError("no secondary node but using "
4318
                                   "a mirrored disk template")
4319

    
4320
    target_node = secondary_nodes[0]
4321
    _CheckNodeOnline(self, target_node)
4322
    _CheckNodeNotDrained(self, target_node)
4323
    if instance.admin_up:
4324
      # check memory requirements on the secondary node
4325
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4326
                           instance.name, bep[constants.BE_MEMORY],
4327
                           instance.hypervisor)
4328
    else:
4329
      self.LogInfo("Not checking memory on the secondary node as"
4330
                   " instance will not be started")
4331

    
4332
    # check bridge existance
4333
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4334

    
4335
  def Exec(self, feedback_fn):
4336
    """Failover an instance.
4337

4338
    The failover is done by shutting it down on its present node and
4339
    starting it on the secondary.
4340

4341
    """
4342
    instance = self.instance
4343

    
4344
    source_node = instance.primary_node
4345
    target_node = instance.secondary_nodes[0]
4346

    
4347
    feedback_fn("* checking disk consistency between source and target")
4348
    for dev in instance.disks:
4349
      # for drbd, these are drbd over lvm
4350
      if not _CheckDiskConsistency(self, dev, target_node, False):
4351
        if instance.admin_up and not self.op.ignore_consistency:
4352
          raise errors.OpExecError("Disk %s is degraded on target node,"
4353
                                   " aborting failover." % dev.iv_name)
4354

    
4355
    feedback_fn("* shutting down instance on source node")
4356
    logging.info("Shutting down instance %s on node %s",
4357
                 instance.name, source_node)
4358

    
4359
    result = self.rpc.call_instance_shutdown(source_node, instance)
4360
    msg = result.fail_msg
4361
    if msg:
4362
      if self.op.ignore_consistency:
4363
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4364
                             " Proceeding anyway. Please make sure node"
4365
                             " %s is down. Error details: %s",
4366
                             instance.name, source_node, source_node, msg)
4367
      else:
4368
        raise errors.OpExecError("Could not shutdown instance %s on"
4369
                                 " node %s: %s" %
4370
                                 (instance.name, source_node, msg))
4371

    
4372
    feedback_fn("* deactivating the instance's disks on source node")
4373
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4374
      raise errors.OpExecError("Can't shut down the instance's disks.")
4375

    
4376
    instance.primary_node = target_node
4377
    # distribute new instance config to the other nodes
4378
    self.cfg.Update(instance)
4379

    
4380
    # Only start the instance if it's marked as up
4381
    if instance.admin_up:
4382
      feedback_fn("* activating the instance's disks on target node")
4383
      logging.info("Starting instance %s on node %s",
4384
                   instance.name, target_node)
4385

    
4386
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4387
                                               ignore_secondaries=True)
4388
      if not disks_ok:
4389
        _ShutdownInstanceDisks(self, instance)
4390
        raise errors.OpExecError("Can't activate the instance's disks")
4391

    
4392
      feedback_fn("* starting the instance on the target node")
4393
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4394
      msg = result.fail_msg
4395
      if msg:
4396
        _ShutdownInstanceDisks(self, instance)
4397
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4398
                                 (instance.name, target_node, msg))
4399

    
4400

    
4401
class LUMigrateInstance(LogicalUnit):
4402
  """Migrate an instance.
4403

4404
  This is migration without shutting down, compared to the failover,
4405
  which is done with shutdown.
4406

4407
  """
4408
  HPATH = "instance-migrate"
4409
  HTYPE = constants.HTYPE_INSTANCE
4410
  _OP_REQP = ["instance_name", "live", "cleanup"]
4411

    
4412
  REQ_BGL = False
4413

    
4414
  def ExpandNames(self):
4415
    self._ExpandAndLockInstance()
4416

    
4417
    self.needed_locks[locking.LEVEL_NODE] = []
4418
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4419

    
4420
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4421
                                       self.op.live, self.op.cleanup)
4422
    self.tasklets = [self._migrater]
4423

    
4424
  def DeclareLocks(self, level):
4425
    if level == locking.LEVEL_NODE:
4426
      self._LockInstancesNodes()
4427

    
4428
  def BuildHooksEnv(self):
4429
    """Build hooks env.
4430

4431
    This runs on master, primary and secondary nodes of the instance.
4432

4433
    """
4434
    instance = self._migrater.instance
4435
    env = _BuildInstanceHookEnvByObject(self, instance)
4436
    env["MIGRATE_LIVE"] = self.op.live
4437
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4438
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4439
    return env, nl, nl
4440

    
4441

    
4442
class LUMoveInstance(LogicalUnit):
4443
  """Move an instance by data-copying.
4444

4445
  """
4446
  HPATH = "instance-move"
4447
  HTYPE = constants.HTYPE_INSTANCE
4448
  _OP_REQP = ["instance_name", "target_node"]
4449
  REQ_BGL = False
4450

    
4451
  def ExpandNames(self):
4452
    self._ExpandAndLockInstance()
4453
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4454
    if target_node is None:
4455
      raise errors.OpPrereqError("Node '%s' not known" %
4456
                                  self.op.target_node)
4457
    self.op.target_node = target_node
4458
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4459
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4460

    
4461
  def DeclareLocks(self, level):
4462
    if level == locking.LEVEL_NODE:
4463
      self._LockInstancesNodes(primary_only=True)
4464

    
4465
  def BuildHooksEnv(self):
4466
    """Build hooks env.
4467

4468
    This runs on master, primary and secondary nodes of the instance.
4469

4470
    """
4471
    env = {
4472
      "TARGET_NODE": self.op.target_node,
4473
      }
4474
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4475
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4476
                                       self.op.target_node]
4477
    return env, nl, nl
4478

    
4479
  def CheckPrereq(self):
4480
    """Check prerequisites.
4481

4482
    This checks that the instance is in the cluster.
4483

4484
    """
4485
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4486
    assert self.instance is not None, \
4487
      "Cannot retrieve locked instance %s" % self.op.instance_name
4488

    
4489
    node = self.cfg.GetNodeInfo(self.op.target_node)
4490
    assert node is not None, \
4491
      "Cannot retrieve locked node %s" % self.op.target_node
4492

    
4493
    self.target_node = target_node = node.name
4494

    
4495
    if target_node == instance.primary_node:
4496
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4497
                                 (instance.name, target_node))
4498

    
4499
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4500

    
4501
    for idx, dsk in enumerate(instance.disks):
4502
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4503
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4504
                                   " cannot copy")
4505

    
4506
    _CheckNodeOnline(self, target_node)
4507
    _CheckNodeNotDrained(self, target_node)
4508

    
4509
    if instance.admin_up:
4510
      # check memory requirements on the secondary node
4511
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4512
                           instance.name, bep[constants.BE_MEMORY],
4513
                           instance.hypervisor)
4514
    else:
4515
      self.LogInfo("Not checking memory on the secondary node as"
4516
                   " instance will not be started")
4517

    
4518
    # check bridge existance
4519
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4520

    
4521
  def Exec(self, feedback_fn):
4522
    """Move an instance.
4523

4524
    The move is done by shutting it down on its present node, copying
4525
    the data over (slow) and starting it on the new node.
4526

4527
    """
4528
    instance = self.instance
4529

    
4530
    source_node = instance.primary_node
4531
    target_node = self.target_node
4532

    
4533
    self.LogInfo("Shutting down instance %s on source node %s",
4534
                 instance.name, source_node)
4535

    
4536
    result = self.rpc.call_instance_shutdown(source_node, instance)
4537
    msg = result.fail_msg
4538
    if msg:
4539
      if self.op.ignore_consistency:
4540
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4541
                             " Proceeding anyway. Please make sure node"
4542
                             " %s is down. Error details: %s",
4543
                             instance.name, source_node, source_node, msg)
4544
      else:
4545
        raise errors.OpExecError("Could not shutdown instance %s on"
4546
                                 " node %s: %s" %
4547
                                 (instance.name, source_node, msg))
4548

    
4549
    # create the target disks
4550
    try:
4551
      _CreateDisks(self, instance, target_node=target_node)
4552
    except errors.OpExecError:
4553
      self.LogWarning("Device creation failed, reverting...")
4554
      try:
4555
        _RemoveDisks(self, instance, target_node=target_node)
4556
      finally:
4557
        self.cfg.ReleaseDRBDMinors(instance.name)
4558
        raise
4559

    
4560
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4561

    
4562
    errs = []
4563
    # activate, get path, copy the data over
4564
    for idx, disk in enumerate(instance.disks):
4565
      self.LogInfo("Copying data for disk %d", idx)
4566
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4567
                                               instance.name, True)
4568
      if result.fail_msg:
4569
        self.LogWarning("Can't assemble newly created disk %d: %s",
4570
                        idx, result.fail_msg)
4571
        errs.append(result.fail_msg)
4572
        break
4573
      dev_path = result.payload
4574
      result = self.rpc.call_blockdev_export(source_node, disk,
4575
                                             target_node, dev_path,
4576
                                             cluster_name)
4577
      if result.fail_msg:
4578
        self.LogWarning("Can't copy data over for disk %d: %s",
4579
                        idx, result.fail_msg)
4580
        errs.append(result.fail_msg)
4581
        break
4582

    
4583
    if errs:
4584
      self.LogWarning("Some disks failed to copy, aborting")
4585
      try:
4586
        _RemoveDisks(self, instance, target_node=target_node)
4587
      finally:
4588
        self.cfg.ReleaseDRBDMinors(instance.name)
4589
        raise errors.OpExecError("Errors during disk copy: %s" %
4590
                                 (",".join(errs),))
4591

    
4592
    instance.primary_node = target_node
4593
    self.cfg.Update(instance)
4594

    
4595
    self.LogInfo("Removing the disks on the original node")
4596
    _RemoveDisks(self, instance, target_node=source_node)
4597

    
4598
    # Only start the instance if it's marked as up
4599
    if instance.admin_up:
4600
      self.LogInfo("Starting instance %s on node %s",
4601
                   instance.name, target_node)
4602

    
4603
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4604
                                           ignore_secondaries=True)
4605
      if not disks_ok:
4606
        _ShutdownInstanceDisks(self, instance)
4607
        raise errors.OpExecError("Can't activate the instance's disks")
4608

    
4609
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4610
      msg = result.fail_msg
4611
      if msg:
4612
        _ShutdownInstanceDisks(self, instance)
4613
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4614
                                 (instance.name, target_node, msg))
4615

    
4616

    
4617
class LUMigrateNode(LogicalUnit):
4618
  """Migrate all instances from a node.
4619

4620
  """
4621
  HPATH = "node-migrate"
4622
  HTYPE = constants.HTYPE_NODE
4623
  _OP_REQP = ["node_name", "live"]
4624
  REQ_BGL = False
4625

    
4626
  def ExpandNames(self):
4627
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4628
    if self.op.node_name is None:
4629
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4630

    
4631
    self.needed_locks = {
4632
      locking.LEVEL_NODE: [self.op.node_name],
4633
      }
4634

    
4635
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4636

    
4637
    # Create tasklets for migrating instances for all instances on this node
4638
    names = []
4639
    tasklets = []
4640

    
4641
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4642
      logging.debug("Migrating instance %s", inst.name)
4643
      names.append(inst.name)
4644

    
4645
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4646

    
4647
    self.tasklets = tasklets
4648

    
4649
    # Declare instance locks
4650
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4651

    
4652
  def DeclareLocks(self, level):
4653
    if level == locking.LEVEL_NODE:
4654
      self._LockInstancesNodes()
4655

    
4656
  def BuildHooksEnv(self):
4657
    """Build hooks env.
4658

4659
    This runs on the master, the primary and all the secondaries.
4660

4661
    """
4662
    env = {
4663
      "NODE_NAME": self.op.node_name,
4664
      }
4665

    
4666
    nl = [self.cfg.GetMasterNode()]
4667

    
4668
    return (env, nl, nl)
4669

    
4670

    
4671
class TLMigrateInstance(Tasklet):
4672
  def __init__(self, lu, instance_name, live, cleanup):
4673
    """Initializes this class.
4674

4675
    """
4676
    Tasklet.__init__(self, lu)
4677

    
4678
    # Parameters
4679
    self.instance_name = instance_name
4680
    self.live = live
4681
    self.cleanup = cleanup
4682

    
4683
  def CheckPrereq(self):
4684
    """Check prerequisites.
4685

4686
    This checks that the instance is in the cluster.
4687

4688
    """
4689
    instance = self.cfg.GetInstanceInfo(
4690
      self.cfg.ExpandInstanceName(self.instance_name))
4691
    if instance is None:
4692
      raise errors.OpPrereqError("Instance '%s' not known" %
4693
                                 self.instance_name)
4694

    
4695
    if instance.disk_template != constants.DT_DRBD8:
4696
      raise errors.OpPrereqError("Instance's disk layout is not"
4697
                                 " drbd8, cannot migrate.")
4698

    
4699
    secondary_nodes = instance.secondary_nodes
4700
    if not secondary_nodes:
4701
      raise errors.ConfigurationError("No secondary node but using"
4702
                                      " drbd8 disk template")
4703

    
4704
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4705

    
4706
    target_node = secondary_nodes[0]
4707
    # check memory requirements on the secondary node
4708
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4709
                         instance.name, i_be[constants.BE_MEMORY],
4710
                         instance.hypervisor)
4711

    
4712
    # check bridge existance
4713
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4714

    
4715
    if not self.cleanup:
4716
      _CheckNodeNotDrained(self, target_node)
4717
      result = self.rpc.call_instance_migratable(instance.primary_node,
4718
                                                 instance)
4719
      result.Raise("Can't migrate, please use failover", prereq=True)
4720

    
4721
    self.instance = instance
4722

    
4723
  def _WaitUntilSync(self):
4724
    """Poll with custom rpc for disk sync.
4725

4726
    This uses our own step-based rpc call.
4727

4728
    """
4729
    self.feedback_fn("* wait until resync is done")
4730
    all_done = False
4731
    while not all_done:
4732
      all_done = True
4733
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4734
                                            self.nodes_ip,
4735
                                            self.instance.disks)
4736
      min_percent = 100
4737
      for node, nres in result.items():
4738
        nres.Raise("Cannot resync disks on node %s" % node)
4739
        node_done, node_percent = nres.payload
4740
        all_done = all_done and node_done
4741
        if node_percent is not None:
4742
          min_percent = min(min_percent, node_percent)
4743
      if not all_done:
4744
        if min_percent < 100:
4745
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4746
        time.sleep(2)
4747

    
4748
  def _EnsureSecondary(self, node):
4749
    """Demote a node to secondary.
4750

4751
    """
4752
    self.feedback_fn("* switching node %s to secondary mode" % node)
4753

    
4754
    for dev in self.instance.disks:
4755
      self.cfg.SetDiskID(dev, node)
4756

    
4757
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4758
                                          self.instance.disks)
4759
    result.Raise("Cannot change disk to secondary on node %s" % node)
4760

    
4761
  def _GoStandalone(self):
4762
    """Disconnect from the network.
4763

4764
    """
4765
    self.feedback_fn("* changing into standalone mode")
4766
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4767
                                               self.instance.disks)
4768
    for node, nres in result.items():
4769
      nres.Raise("Cannot disconnect disks node %s" % node)
4770

    
4771
  def _GoReconnect(self, multimaster):
4772
    """Reconnect to the network.
4773

4774
    """
4775
    if multimaster:
4776
      msg = "dual-master"
4777
    else:
4778
      msg = "single-master"
4779
    self.feedback_fn("* changing disks into %s mode" % msg)
4780
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4781
                                           self.instance.disks,
4782
                                           self.instance.name, multimaster)
4783
    for node, nres in result.items():
4784
      nres.Raise("Cannot change disks config on node %s" % node)
4785

    
4786
  def _ExecCleanup(self):
4787
    """Try to cleanup after a failed migration.
4788

4789
    The cleanup is done by:
4790
      - check that the instance is running only on one node
4791
        (and update the config if needed)
4792
      - change disks on its secondary node to secondary
4793
      - wait until disks are fully synchronized
4794
      - disconnect from the network
4795
      - change disks into single-master mode
4796
      - wait again until disks are fully synchronized
4797

4798
    """
4799
    instance = self.instance
4800
    target_node = self.target_node
4801
    source_node = self.source_node
4802

    
4803
    # check running on only one node
4804
    self.feedback_fn("* checking where the instance actually runs"
4805
                     " (if this hangs, the hypervisor might be in"
4806
                     " a bad state)")
4807
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4808
    for node, result in ins_l.items():
4809
      result.Raise("Can't contact node %s" % node)
4810

    
4811
    runningon_source = instance.name in ins_l[source_node].payload
4812
    runningon_target = instance.name in ins_l[target_node].payload
4813

    
4814
    if runningon_source and runningon_target:
4815
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4816
                               " or the hypervisor is confused. You will have"
4817
                               " to ensure manually that it runs only on one"
4818
                               " and restart this operation.")
4819

    
4820
    if not (runningon_source or runningon_target):
4821
      raise errors.OpExecError("Instance does not seem to be running at all."
4822
                               " In this case, it's safer to repair by"
4823
                               " running 'gnt-instance stop' to ensure disk"
4824
                               " shutdown, and then restarting it.")
4825

    
4826
    if runningon_target:
4827
      # the migration has actually succeeded, we need to update the config
4828
      self.feedback_fn("* instance running on secondary node (%s),"
4829
                       " updating config" % target_node)
4830
      instance.primary_node = target_node
4831
      self.cfg.Update(instance)
4832
      demoted_node = source_node
4833
    else:
4834
      self.feedback_fn("* instance confirmed to be running on its"
4835
                       " primary node (%s)" % source_node)
4836
      demoted_node = target_node
4837

    
4838
    self._EnsureSecondary(demoted_node)
4839
    try:
4840
      self._WaitUntilSync()
4841
    except errors.OpExecError:
4842
      # we ignore here errors, since if the device is standalone, it
4843
      # won't be able to sync
4844
      pass
4845
    self._GoStandalone()
4846
    self._GoReconnect(False)
4847
    self._WaitUntilSync()
4848

    
4849
    self.feedback_fn("* done")
4850

    
4851
  def _RevertDiskStatus(self):
4852
    """Try to revert the disk status after a failed migration.
4853

4854
    """
4855
    target_node = self.target_node
4856
    try:
4857
      self._EnsureSecondary(target_node)
4858
      self._GoStandalone()
4859
      self._GoReconnect(False)
4860
      self._WaitUntilSync()
4861
    except errors.OpExecError, err:
4862
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4863
                         " drives: error '%s'\n"
4864
                         "Please look and recover the instance status" %
4865
                         str(err))
4866

    
4867
  def _AbortMigration(self):
4868
    """Call the hypervisor code to abort a started migration.
4869

4870
    """
4871
    instance = self.instance
4872
    target_node = self.target_node
4873
    migration_info = self.migration_info
4874

    
4875
    abort_result = self.rpc.call_finalize_migration(target_node,
4876
                                                    instance,
4877
                                                    migration_info,
4878
                                                    False)
4879
    abort_msg = abort_result.fail_msg
4880
    if abort_msg:
4881
      logging.error("Aborting migration failed on target node %s: %s" %
4882
                    (target_node, abort_msg))
4883
      # Don't raise an exception here, as we stil have to try to revert the
4884
      # disk status, even if this step failed.
4885

    
4886
  def _ExecMigration(self):
4887
    """Migrate an instance.
4888

4889
    The migrate is done by:
4890
      - change the disks into dual-master mode
4891
      - wait until disks are fully synchronized again
4892
      - migrate the instance
4893
      - change disks on the new secondary node (the old primary) to secondary
4894
      - wait until disks are fully synchronized
4895
      - change disks into single-master mode
4896

4897
    """
4898
    instance = self.instance
4899
    target_node = self.target_node
4900
    source_node = self.source_node
4901

    
4902
    self.feedback_fn("* checking disk consistency between source and target")
4903
    for dev in instance.disks:
4904
      if not _CheckDiskConsistency(self, dev, target_node, False):
4905
        raise errors.OpExecError("Disk %s is degraded or not fully"
4906
                                 " synchronized on target node,"
4907
                                 " aborting migrate." % dev.iv_name)
4908

    
4909
    # First get the migration information from the remote node
4910
    result = self.rpc.call_migration_info(source_node, instance)
4911
    msg = result.fail_msg
4912
    if msg:
4913
      log_err = ("Failed fetching source migration information from %s: %s" %
4914
                 (source_node, msg))
4915
      logging.error(log_err)
4916
      raise errors.OpExecError(log_err)
4917

    
4918
    self.migration_info = migration_info = result.payload
4919

    
4920
    # Then switch the disks to master/master mode
4921
    self._EnsureSecondary(target_node)
4922
    self._GoStandalone()
4923
    self._GoReconnect(True)
4924
    self._WaitUntilSync()
4925

    
4926
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4927
    result = self.rpc.call_accept_instance(target_node,
4928
                                           instance,
4929
                                           migration_info,
4930
                                           self.nodes_ip[target_node])
4931

    
4932
    msg = result.fail_msg
4933
    if msg:
4934
      logging.error("Instance pre-migration failed, trying to revert"
4935
                    " disk status: %s", msg)
4936
      self._AbortMigration()
4937
      self._RevertDiskStatus()
4938
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4939
                               (instance.name, msg))
4940

    
4941
    self.feedback_fn("* migrating instance to %s" % target_node)
4942
    time.sleep(10)
4943
    result = self.rpc.call_instance_migrate(source_node, instance,
4944
                                            self.nodes_ip[target_node],
4945
                                            self.live)
4946
    msg = result.fail_msg
4947
    if msg:
4948
      logging.error("Instance migration failed, trying to revert"
4949
                    " disk status: %s", msg)
4950
      self._AbortMigration()
4951
      self._RevertDiskStatus()
4952
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4953
                               (instance.name, msg))
4954
    time.sleep(10)
4955

    
4956
    instance.primary_node = target_node
4957
    # distribute new instance config to the other nodes
4958
    self.cfg.Update(instance)
4959

    
4960
    result = self.rpc.call_finalize_migration(target_node,
4961
                                              instance,
4962
                                              migration_info,
4963
                                              True)
4964
    msg = result.fail_msg
4965
    if msg:
4966
      logging.error("Instance migration succeeded, but finalization failed:"
4967
                    " %s" % msg)
4968
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4969
                               msg)
4970

    
4971
    self._EnsureSecondary(source_node)
4972
    self._WaitUntilSync()
4973
    self._GoStandalone()
4974
    self._GoReconnect(False)
4975
    self._WaitUntilSync()
4976

    
4977
    self.feedback_fn("* done")
4978

    
4979
  def Exec(self, feedback_fn):
4980
    """Perform the migration.
4981

4982
    """
4983
    feedback_fn("Migrating instance %s" % self.instance.name)
4984

    
4985
    self.feedback_fn = feedback_fn
4986

    
4987
    self.source_node = self.instance.primary_node
4988
    self.target_node = self.instance.secondary_nodes[0]
4989
    self.all_nodes = [self.source_node, self.target_node]
4990
    self.nodes_ip = {
4991
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4992
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4993
      }
4994

    
4995
    if self.cleanup:
4996
      return self._ExecCleanup()
4997
    else:
4998
      return self._ExecMigration()
4999

    
5000

    
5001
def _CreateBlockDev(lu, node, instance, device, force_create,
5002
                    info, force_open):
5003
  """Create a tree of block devices on a given node.
5004

5005
  If this device type has to be created on secondaries, create it and
5006
  all its children.
5007

5008
  If not, just recurse to children keeping the same 'force' value.
5009

5010
  @param lu: the lu on whose behalf we execute
5011
  @param node: the node on which to create the device
5012
  @type instance: L{objects.Instance}
5013
  @param instance: the instance which owns the device
5014
  @type device: L{objects.Disk}
5015
  @param device: the device to create
5016
  @type force_create: boolean
5017
  @param force_create: whether to force creation of this device; this
5018
      will be change to True whenever we find a device which has
5019
      CreateOnSecondary() attribute
5020
  @param info: the extra 'metadata' we should attach to the device
5021
      (this will be represented as a LVM tag)
5022
  @type force_open: boolean
5023
  @param force_open: this parameter will be passes to the
5024
      L{backend.BlockdevCreate} function where it specifies
5025
      whether we run on primary or not, and it affects both
5026
      the child assembly and the device own Open() execution
5027

5028
  """
5029
  if device.CreateOnSecondary():
5030
    force_create = True
5031

    
5032
  if device.children:
5033
    for child in device.children:
5034
      _CreateBlockDev(lu, node, instance, child, force_create,
5035
                      info, force_open)
5036

    
5037
  if not force_create:
5038
    return
5039

    
5040
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5041

    
5042

    
5043
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5044
  """Create a single block device on a given node.
5045

5046
  This will not recurse over children of the device, so they must be
5047
  created in advance.
5048

5049
  @param lu: the lu on whose behalf we execute
5050
  @param node: the node on which to create the device
5051
  @type instance: L{objects.Instance}
5052
  @param instance: the instance which owns the device
5053
  @type device: L{objects.Disk}
5054
  @param device: the device to create
5055
  @param info: the extra 'metadata' we should attach to the device
5056
      (this will be represented as a LVM tag)
5057
  @type force_open: boolean
5058
  @param force_open: this parameter will be passes to the
5059
      L{backend.BlockdevCreate} function where it specifies
5060
      whether we run on primary or not, and it affects both
5061
      the child assembly and the device own Open() execution
5062

5063
  """
5064
  lu.cfg.SetDiskID(device, node)
5065
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5066
                                       instance.name, force_open, info)
5067
  result.Raise("Can't create block device %s on"
5068
               " node %s for instance %s" % (device, node, instance.name))
5069
  if device.physical_id is None:
5070
    device.physical_id = result.payload
5071

    
5072

    
5073
def _GenerateUniqueNames(lu, exts):
5074
  """Generate a suitable LV name.
5075

5076
  This will generate a logical volume name for the given instance.
5077

5078
  """
5079
  results = []
5080
  for val in exts:
5081
    new_id = lu.cfg.GenerateUniqueID()
5082
    results.append("%s%s" % (new_id, val))
5083
  return results
5084

    
5085

    
5086
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5087
                         p_minor, s_minor):
5088
  """Generate a drbd8 device complete with its children.
5089

5090
  """
5091
  port = lu.cfg.AllocatePort()
5092
  vgname = lu.cfg.GetVGName()
5093
  shared_secret = lu.cfg.GenerateDRBDSecret()
5094
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5095
                          logical_id=(vgname, names[0]))
5096
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5097
                          logical_id=(vgname, names[1]))
5098
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5099
                          logical_id=(primary, secondary, port,
5100
                                      p_minor, s_minor,
5101
                                      shared_secret),
5102
                          children=[dev_data, dev_meta],
5103
                          iv_name=iv_name)
5104
  return drbd_dev
5105

    
5106

    
5107
def _GenerateDiskTemplate(lu, template_name,
5108
                          instance_name, primary_node,
5109
                          secondary_nodes, disk_info,
5110
                          file_storage_dir, file_driver,
5111
                          base_index):
5112
  """Generate the entire disk layout for a given template type.
5113

5114
  """
5115
  #TODO: compute space requirements
5116

    
5117
  vgname = lu.cfg.GetVGName()
5118
  disk_count = len(disk_info)
5119
  disks = []
5120
  if template_name == constants.DT_DISKLESS:
5121
    pass
5122
  elif template_name == constants.DT_PLAIN:
5123
    if len(secondary_nodes) != 0:
5124
      raise errors.ProgrammerError("Wrong template configuration")
5125

    
5126
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5127
                                      for i in range(disk_count)])
5128
    for idx, disk in enumerate(disk_info):
5129
      disk_index = idx + base_index
5130
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5131
                              logical_id=(vgname, names[idx]),
5132
                              iv_name="disk/%d" % disk_index,
5133
                              mode=disk["mode"])
5134
      disks.append(disk_dev)
5135
  elif template_name == constants.DT_DRBD8:
5136
    if len(secondary_nodes) != 1:
5137
      raise errors.ProgrammerError("Wrong template configuration")
5138
    remote_node = secondary_nodes[0]
5139
    minors = lu.cfg.AllocateDRBDMinor(
5140
      [primary_node, remote_node] * len(disk_info), instance_name)
5141

    
5142
    names = []
5143
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5144
                                               for i in range(disk_count)]):
5145
      names.append(lv_prefix + "_data")
5146
      names.append(lv_prefix + "_meta")
5147
    for idx, disk in enumerate(disk_info):
5148
      disk_index = idx + base_index
5149
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5150
                                      disk["size"], names[idx*2:idx*2+2],
5151
                                      "disk/%d" % disk_index,
5152
                                      minors[idx*2], minors[idx*2+1])
5153
      disk_dev.mode = disk["mode"]
5154
      disks.append(disk_dev)
5155
  elif template_name == constants.DT_FILE:
5156
    if len(secondary_nodes) != 0:
5157
      raise errors.ProgrammerError("Wrong template configuration")
5158

    
5159
    for idx, disk in enumerate(disk_info):
5160
      disk_index = idx + base_index
5161
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5162
                              iv_name="disk/%d" % disk_index,
5163
                              logical_id=(file_driver,
5164
                                          "%s/disk%d" % (file_storage_dir,
5165
                                                         disk_index)),
5166
                              mode=disk["mode"])
5167
      disks.append(disk_dev)
5168
  else:
5169
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5170
  return disks
5171

    
5172

    
5173
def _GetInstanceInfoText(instance):
5174
  """Compute that text that should be added to the disk's metadata.
5175

5176
  """
5177
  return "originstname+%s" % instance.name
5178

    
5179

    
5180
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5181
  """Create all disks for an instance.
5182

5183
  This abstracts away some work from AddInstance.
5184

5185
  @type lu: L{LogicalUnit}
5186
  @param lu: the logical unit on whose behalf we execute
5187
  @type instance: L{objects.Instance}
5188
  @param instance: the instance whose disks we should create
5189
  @type to_skip: list
5190
  @param to_skip: list of indices to skip
5191
  @type target_node: string
5192
  @param target_node: if passed, overrides the target node for creation
5193
  @rtype: boolean
5194
  @return: the success of the creation
5195

5196
  """
5197
  info = _GetInstanceInfoText(instance)
5198
  if target_node is None:
5199
    pnode = instance.primary_node
5200
    all_nodes = instance.all_nodes
5201
  else:
5202
    pnode = target_node
5203
    all_nodes = [pnode]
5204

    
5205
  if instance.disk_template == constants.DT_FILE:
5206
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5207
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5208

    
5209
    result.Raise("Failed to create directory '%s' on"
5210
                 " node %s" % (file_storage_dir, pnode))
5211

    
5212
  # Note: this needs to be kept in sync with adding of disks in
5213
  # LUSetInstanceParams
5214
  for idx, device in enumerate(instance.disks):
5215
    if to_skip and idx in to_skip:
5216
      continue
5217
    logging.info("Creating volume %s for instance %s",
5218
                 device.iv_name, instance.name)
5219
    #HARDCODE
5220
    for node in all_nodes:
5221
      f_create = node == pnode
5222
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5223

    
5224

    
5225
def _RemoveDisks(lu, instance, target_node=None):
5226
  """Remove all disks for an instance.
5227

5228
  This abstracts away some work from `AddInstance()` and
5229
  `RemoveInstance()`. Note that in case some of the devices couldn't
5230
  be removed, the removal will continue with the other ones (compare
5231
  with `_CreateDisks()`).
5232

5233
  @type lu: L{LogicalUnit}
5234
  @param lu: the logical unit on whose behalf we execute
5235
  @type instance: L{objects.Instance}
5236
  @param instance: the instance whose disks we should remove
5237
  @type target_node: string
5238
  @param target_node: used to override the node on which to remove the disks
5239
  @rtype: boolean
5240
  @return: the success of the removal
5241

5242
  """
5243
  logging.info("Removing block devices for instance %s", instance.name)
5244

    
5245
  all_result = True
5246
  for device in instance.disks:
5247
    if target_node:
5248
      edata = [(target_node, device)]
5249
    else:
5250
      edata = device.ComputeNodeTree(instance.primary_node)
5251
    for node, disk in edata:
5252
      lu.cfg.SetDiskID(disk, node)
5253
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5254
      if msg:
5255
        lu.LogWarning("Could not remove block device %s on node %s,"
5256
                      " continuing anyway: %s", device.iv_name, node, msg)
5257
        all_result = False
5258

    
5259
  if instance.disk_template == constants.DT_FILE:
5260
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5261
    if target_node:
5262
      tgt = target_node
5263
    else:
5264
      tgt = instance.primary_node
5265
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5266
    if result.fail_msg:
5267
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5268
                    file_storage_dir, instance.primary_node, result.fail_msg)
5269
      all_result = False
5270

    
5271
  return all_result
5272

    
5273

    
5274
def _ComputeDiskSize(disk_template, disks):
5275
  """Compute disk size requirements in the volume group
5276

5277
  """
5278
  # Required free disk space as a function of disk and swap space
5279
  req_size_dict = {
5280
    constants.DT_DISKLESS: None,
5281
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5282
    # 128 MB are added for drbd metadata for each disk
5283
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5284
    constants.DT_FILE: None,
5285
  }
5286

    
5287
  if disk_template not in req_size_dict:
5288
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5289
                                 " is unknown" %  disk_template)
5290

    
5291
  return req_size_dict[disk_template]
5292

    
5293

    
5294
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5295
  """Hypervisor parameter validation.
5296

5297
  This function abstract the hypervisor parameter validation to be
5298
  used in both instance create and instance modify.
5299

5300
  @type lu: L{LogicalUnit}
5301
  @param lu: the logical unit for which we check
5302
  @type nodenames: list
5303
  @param nodenames: the list of nodes on which we should check
5304
  @type hvname: string
5305
  @param hvname: the name of the hypervisor we should use
5306
  @type hvparams: dict
5307
  @param hvparams: the parameters which we need to check
5308
  @raise errors.OpPrereqError: if the parameters are not valid
5309

5310
  """
5311
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5312
                                                  hvname,
5313
                                                  hvparams)
5314
  for node in nodenames:
5315
    info = hvinfo[node]
5316
    if info.offline:
5317
      continue
5318
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5319

    
5320

    
5321
class LUCreateInstance(LogicalUnit):
5322
  """Create an instance.
5323

5324
  """
5325
  HPATH = "instance-add"
5326
  HTYPE = constants.HTYPE_INSTANCE
5327
  _OP_REQP = ["instance_name", "disks", "disk_template",
5328
              "mode", "start",
5329
              "wait_for_sync", "ip_check", "nics",
5330
              "hvparams", "beparams"]
5331
  REQ_BGL = False
5332

    
5333
  def _ExpandNode(self, node):
5334
    """Expands and checks one node name.
5335

5336
    """
5337
    node_full = self.cfg.ExpandNodeName(node)
5338
    if node_full is None:
5339
      raise errors.OpPrereqError("Unknown node %s" % node)
5340
    return node_full
5341

    
5342
  def ExpandNames(self):
5343
    """ExpandNames for CreateInstance.
5344

5345
    Figure out the right locks for instance creation.
5346

5347
    """
5348
    self.needed_locks = {}
5349

    
5350
    # set optional parameters to none if they don't exist
5351
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5352
      if not hasattr(self.op, attr):
5353
        setattr(self.op, attr, None)
5354

    
5355
    # cheap checks, mostly valid constants given
5356

    
5357
    # verify creation mode
5358
    if self.op.mode not in (constants.INSTANCE_CREATE,
5359
                            constants.INSTANCE_IMPORT):
5360
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5361
                                 self.op.mode)
5362

    
5363
    # disk template and mirror node verification
5364
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5365
      raise errors.OpPrereqError("Invalid disk template name")
5366

    
5367
    if self.op.hypervisor is None:
5368
      self.op.hypervisor = self.cfg.GetHypervisorType()
5369

    
5370
    cluster = self.cfg.GetClusterInfo()
5371
    enabled_hvs = cluster.enabled_hypervisors
5372
    if self.op.hypervisor not in enabled_hvs:
5373
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5374
                                 " cluster (%s)" % (self.op.hypervisor,
5375
                                  ",".join(enabled_hvs)))
5376

    
5377
    # check hypervisor parameter syntax (locally)
5378
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5379
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5380
                                  self.op.hvparams)
5381
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5382
    hv_type.CheckParameterSyntax(filled_hvp)
5383
    self.hv_full = filled_hvp
5384

    
5385
    # fill and remember the beparams dict
5386
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5387
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5388
                                    self.op.beparams)
5389

    
5390
    #### instance parameters check
5391

    
5392
    # instance name verification
5393
    hostname1 = utils.HostInfo(self.op.instance_name)
5394
    self.op.instance_name = instance_name = hostname1.name
5395

    
5396
    # this is just a preventive check, but someone might still add this
5397
    # instance in the meantime, and creation will fail at lock-add time
5398
    if instance_name in self.cfg.GetInstanceList():
5399
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5400
                                 instance_name)
5401

    
5402
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5403

    
5404
    # NIC buildup
5405
    self.nics = []
5406
    for idx, nic in enumerate(self.op.nics):
5407
      nic_mode_req = nic.get("mode", None)
5408
      nic_mode = nic_mode_req
5409
      if nic_mode is None:
5410
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5411

    
5412
      # in routed mode, for the first nic, the default ip is 'auto'
5413
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5414
        default_ip_mode = constants.VALUE_AUTO
5415
      else:
5416
        default_ip_mode = constants.VALUE_NONE
5417

    
5418
      # ip validity checks
5419
      ip = nic.get("ip", default_ip_mode)
5420
      if ip is None or ip.lower() == constants.VALUE_NONE:
5421
        nic_ip = None
5422
      elif ip.lower() == constants.VALUE_AUTO:
5423
        nic_ip = hostname1.ip
5424
      else:
5425
        if not utils.IsValidIP(ip):
5426
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5427
                                     " like a valid IP" % ip)
5428
        nic_ip = ip
5429

    
5430
      # TODO: check the ip for uniqueness !!
5431
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5432
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5433

    
5434
      # MAC address verification
5435
      mac = nic.get("mac", constants.VALUE_AUTO)
5436
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5437
        if not utils.IsValidMac(mac.lower()):
5438
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5439
                                     mac)
5440
        else:
5441
          # or validate/reserve the current one
5442
          if self.cfg.IsMacInUse(mac):
5443
            raise errors.OpPrereqError("MAC address %s already in use"
5444
                                       " in cluster" % mac)
5445

    
5446
      # bridge verification
5447
      bridge = nic.get("bridge", None)
5448
      link = nic.get("link", None)
5449
      if bridge and link:
5450
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5451
                                   " at the same time")
5452
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5453
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5454
      elif bridge:
5455
        link = bridge
5456

    
5457
      nicparams = {}
5458
      if nic_mode_req:
5459
        nicparams[constants.NIC_MODE] = nic_mode_req
5460
      if link:
5461
        nicparams[constants.NIC_LINK] = link
5462

    
5463
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5464
                                      nicparams)
5465
      objects.NIC.CheckParameterSyntax(check_params)
5466
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5467

    
5468
    # disk checks/pre-build
5469
    self.disks = []
5470
    for disk in self.op.disks:
5471
      mode = disk.get("mode", constants.DISK_RDWR)
5472
      if mode not in constants.DISK_ACCESS_SET:
5473
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5474
                                   mode)
5475
      size = disk.get("size", None)
5476
      if size is None:
5477
        raise errors.OpPrereqError("Missing disk size")
5478
      try:
5479
        size = int(size)
5480
      except ValueError:
5481
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5482
      self.disks.append({"size": size, "mode": mode})
5483

    
5484
    # used in CheckPrereq for ip ping check
5485
    self.check_ip = hostname1.ip
5486

    
5487
    # file storage checks
5488
    if (self.op.file_driver and
5489
        not self.op.file_driver in constants.FILE_DRIVER):
5490
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5491
                                 self.op.file_driver)
5492

    
5493
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5494
      raise errors.OpPrereqError("File storage directory path not absolute")
5495

    
5496
    ### Node/iallocator related checks
5497
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5498
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5499
                                 " node must be given")
5500

    
5501
    if self.op.iallocator:
5502
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5503
    else:
5504
      self.op.pnode = self._ExpandNode(self.op.pnode)
5505
      nodelist = [self.op.pnode]
5506
      if self.op.snode is not None:
5507
        self.op.snode = self._ExpandNode(self.op.snode)
5508
        nodelist.append(self.op.snode)
5509
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5510

    
5511
    # in case of import lock the source node too
5512
    if self.op.mode == constants.INSTANCE_IMPORT:
5513
      src_node = getattr(self.op, "src_node", None)
5514
      src_path = getattr(self.op, "src_path", None)
5515

    
5516
      if src_path is None:
5517
        self.op.src_path = src_path = self.op.instance_name
5518

    
5519
      if src_node is None:
5520
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5521
        self.op.src_node = None
5522
        if os.path.isabs(src_path):
5523
          raise errors.OpPrereqError("Importing an instance from an absolute"
5524
                                     " path requires a source node option.")
5525
      else:
5526
        self.op.src_node = src_node = self._ExpandNode(src_node)
5527
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5528
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5529
        if not os.path.isabs(src_path):
5530
          self.op.src_path = src_path = \
5531
            os.path.join(constants.EXPORT_DIR, src_path)
5532

    
5533
    else: # INSTANCE_CREATE
5534
      if getattr(self.op, "os_type", None) is None:
5535
        raise errors.OpPrereqError("No guest OS specified")
5536

    
5537
  def _RunAllocator(self):
5538
    """Run the allocator based on input opcode.
5539

5540
    """
5541
    nics = [n.ToDict() for n in self.nics]
5542
    ial = IAllocator(self.cfg, self.rpc,
5543
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5544
                     name=self.op.instance_name,
5545
                     disk_template=self.op.disk_template,
5546
                     tags=[],
5547
                     os=self.op.os_type,
5548
                     vcpus=self.be_full[constants.BE_VCPUS],
5549
                     mem_size=self.be_full[constants.BE_MEMORY],
5550
                     disks=self.disks,
5551
                     nics=nics,
5552
                     hypervisor=self.op.hypervisor,
5553
                     )
5554

    
5555
    ial.Run(self.op.iallocator)
5556

    
5557
    if not ial.success:
5558
      raise errors.OpPrereqError("Can't compute nodes using"
5559
                                 " iallocator '%s': %s" % (self.op.iallocator,
5560
                                                           ial.info))
5561
    if len(ial.nodes) != ial.required_nodes:
5562
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5563
                                 " of nodes (%s), required %s" %
5564
                                 (self.op.iallocator, len(ial.nodes),
5565
                                  ial.required_nodes))
5566
    self.op.pnode = ial.nodes[0]
5567
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5568
                 self.op.instance_name, self.op.iallocator,
5569
                 ", ".join(ial.nodes))
5570
    if ial.required_nodes == 2:
5571
      self.op.snode = ial.nodes[1]
5572

    
5573
  def BuildHooksEnv(self):
5574
    """Build hooks env.
5575

5576
    This runs on master, primary and secondary nodes of the instance.
5577

5578
    """
5579
    env = {
5580
      "ADD_MODE": self.op.mode,
5581
      }
5582
    if self.op.mode == constants.INSTANCE_IMPORT:
5583
      env["SRC_NODE"] = self.op.src_node
5584
      env["SRC_PATH"] = self.op.src_path
5585
      env["SRC_IMAGES"] = self.src_images
5586

    
5587
    env.update(_BuildInstanceHookEnv(
5588
      name=self.op.instance_name,
5589
      primary_node=self.op.pnode,
5590
      secondary_nodes=self.secondaries,
5591
      status=self.op.start,
5592
      os_type=self.op.os_type,
5593
      memory=self.be_full[constants.BE_MEMORY],
5594
      vcpus=self.be_full[constants.BE_VCPUS],
5595
      nics=_NICListToTuple(self, self.nics),
5596
      disk_template=self.op.disk_template,
5597
      disks=[(d["size"], d["mode"]) for d in self.disks],
5598
      bep=self.be_full,
5599
      hvp=self.hv_full,
5600
      hypervisor_name=self.op.hypervisor,
5601
    ))
5602

    
5603
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5604
          self.secondaries)
5605
    return env, nl, nl
5606

    
5607

    
5608
  def CheckPrereq(self):
5609
    """Check prerequisites.
5610

5611
    """
5612
    if (not self.cfg.GetVGName() and
5613
        self.op.disk_template not in constants.DTS_NOT_LVM):
5614
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5615
                                 " instances")
5616

    
5617
    if self.op.mode == constants.INSTANCE_IMPORT:
5618
      src_node = self.op.src_node
5619
      src_path = self.op.src_path
5620

    
5621
      if src_node is None:
5622
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5623
        exp_list = self.rpc.call_export_list(locked_nodes)
5624
        found = False
5625
        for node in exp_list:
5626
          if exp_list[node].fail_msg:
5627
            continue
5628
          if src_path in exp_list[node].payload:
5629
            found = True
5630
            self.op.src_node = src_node = node
5631
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5632
                                                       src_path)
5633
            break
5634
        if not found:
5635
          raise errors.OpPrereqError("No export found for relative path %s" %
5636
                                      src_path)
5637

    
5638
      _CheckNodeOnline(self, src_node)
5639
      result = self.rpc.call_export_info(src_node, src_path)
5640
      result.Raise("No export or invalid export found in dir %s" % src_path)
5641

    
5642
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5643
      if not export_info.has_section(constants.INISECT_EXP):
5644
        raise errors.ProgrammerError("Corrupted export config")
5645

    
5646
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5647
      if (int(ei_version) != constants.EXPORT_VERSION):
5648
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5649
                                   (ei_version, constants.EXPORT_VERSION))
5650

    
5651
      # Check that the new instance doesn't have less disks than the export
5652
      instance_disks = len(self.disks)
5653
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5654
      if instance_disks < export_disks:
5655
        raise errors.OpPrereqError("Not enough disks to import."
5656
                                   " (instance: %d, export: %d)" %
5657
                                   (instance_disks, export_disks))
5658

    
5659
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5660
      disk_images = []
5661
      for idx in range(export_disks):
5662
        option = 'disk%d_dump' % idx
5663
        if export_info.has_option(constants.INISECT_INS, option):
5664
          # FIXME: are the old os-es, disk sizes, etc. useful?
5665
          export_name = export_info.get(constants.INISECT_INS, option)
5666
          image = os.path.join(src_path, export_name)
5667
          disk_images.append(image)
5668
        else:
5669
          disk_images.append(False)
5670

    
5671
      self.src_images = disk_images
5672

    
5673
      old_name = export_info.get(constants.INISECT_INS, 'name')
5674
      # FIXME: int() here could throw a ValueError on broken exports
5675
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5676
      if self.op.instance_name == old_name:
5677
        for idx, nic in enumerate(self.nics):
5678
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5679
            nic_mac_ini = 'nic%d_mac' % idx
5680
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5681

    
5682
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5683
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5684
    if self.op.start and not self.op.ip_check:
5685
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5686
                                 " adding an instance in start mode")
5687

    
5688
    if self.op.ip_check:
5689
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5690
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5691
                                   (self.check_ip, self.op.instance_name))
5692

    
5693
    #### mac address generation
5694
    # By generating here the mac address both the allocator and the hooks get
5695
    # the real final mac address rather than the 'auto' or 'generate' value.
5696
    # There is a race condition between the generation and the instance object
5697
    # creation, which means that we know the mac is valid now, but we're not
5698
    # sure it will be when we actually add the instance. If things go bad
5699
    # adding the instance will abort because of a duplicate mac, and the
5700
    # creation job will fail.
5701
    for nic in self.nics:
5702
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5703
        nic.mac = self.cfg.GenerateMAC()
5704

    
5705
    #### allocator run
5706

    
5707
    if self.op.iallocator is not None:
5708
      self._RunAllocator()
5709

    
5710
    #### node related checks
5711

    
5712
    # check primary node
5713
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5714
    assert self.pnode is not None, \
5715
      "Cannot retrieve locked node %s" % self.op.pnode
5716
    if pnode.offline:
5717
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5718
                                 pnode.name)
5719
    if pnode.drained:
5720
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5721
                                 pnode.name)
5722

    
5723
    self.secondaries = []
5724

    
5725
    # mirror node verification
5726
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5727
      if self.op.snode is None:
5728
        raise errors.OpPrereqError("The networked disk templates need"
5729
                                   " a mirror node")
5730
      if self.op.snode == pnode.name:
5731
        raise errors.OpPrereqError("The secondary node cannot be"
5732
                                   " the primary node.")
5733
      _CheckNodeOnline(self, self.op.snode)
5734
      _CheckNodeNotDrained(self, self.op.snode)
5735
      self.secondaries.append(self.op.snode)
5736

    
5737
    nodenames = [pnode.name] + self.secondaries
5738

    
5739
    req_size = _ComputeDiskSize(self.op.disk_template,
5740
                                self.disks)
5741

    
5742
    # Check lv size requirements
5743
    if req_size is not None:
5744
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5745
                                         self.op.hypervisor)
5746
      for node in nodenames:
5747
        info = nodeinfo[node]
5748
        info.Raise("Cannot get current information from node %s" % node)
5749
        info = info.payload
5750
        vg_free = info.get('vg_free', None)
5751
        if not isinstance(vg_free, int):
5752
          raise errors.OpPrereqError("Can't compute free disk space on"
5753
                                     " node %s" % node)
5754
        if req_size > vg_free:
5755
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5756
                                     " %d MB available, %d MB required" %
5757
                                     (node, vg_free, req_size))
5758

    
5759
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5760

    
5761
    # os verification
5762
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5763
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5764
                 (self.op.os_type, pnode.name), prereq=True)
5765

    
5766
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5767

    
5768
    # memory check on primary node
5769
    if self.op.start:
5770
      _CheckNodeFreeMemory(self, self.pnode.name,
5771
                           "creating instance %s" % self.op.instance_name,
5772
                           self.be_full[constants.BE_MEMORY],
5773
                           self.op.hypervisor)
5774

    
5775
    self.dry_run_result = list(nodenames)
5776

    
5777
  def Exec(self, feedback_fn):
5778
    """Create and add the instance to the cluster.
5779

5780
    """
5781
    instance = self.op.instance_name
5782
    pnode_name = self.pnode.name
5783

    
5784
    ht_kind = self.op.hypervisor
5785
    if ht_kind in constants.HTS_REQ_PORT:
5786
      network_port = self.cfg.AllocatePort()
5787
    else:
5788
      network_port = None
5789

    
5790
    ##if self.op.vnc_bind_address is None:
5791
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5792

    
5793
    # this is needed because os.path.join does not accept None arguments
5794
    if self.op.file_storage_dir is None:
5795
      string_file_storage_dir = ""
5796
    else:
5797
      string_file_storage_dir = self.op.file_storage_dir
5798

    
5799
    # build the full file storage dir path
5800
    file_storage_dir = os.path.normpath(os.path.join(
5801
                                        self.cfg.GetFileStorageDir(),
5802
                                        string_file_storage_dir, instance))
5803

    
5804

    
5805
    disks = _GenerateDiskTemplate(self,
5806
                                  self.op.disk_template,
5807
                                  instance, pnode_name,
5808
                                  self.secondaries,
5809
                                  self.disks,
5810
                                  file_storage_dir,
5811
                                  self.op.file_driver,
5812
                                  0)
5813

    
5814
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5815
                            primary_node=pnode_name,
5816
                            nics=self.nics, disks=disks,
5817
                            disk_template=self.op.disk_template,
5818
                            admin_up=False,
5819
                            network_port=network_port,
5820
                            beparams=self.op.beparams,
5821
                            hvparams=self.op.hvparams,
5822
                            hypervisor=self.op.hypervisor,
5823
                            )
5824

    
5825
    feedback_fn("* creating instance disks...")
5826
    try:
5827
      _CreateDisks(self, iobj)
5828
    except errors.OpExecError:
5829
      self.LogWarning("Device creation failed, reverting...")
5830
      try:
5831
        _RemoveDisks(self, iobj)
5832
      finally:
5833
        self.cfg.ReleaseDRBDMinors(instance)
5834
        raise
5835

    
5836
    feedback_fn("adding instance %s to cluster config" % instance)
5837

    
5838
    self.cfg.AddInstance(iobj)
5839
    # Declare that we don't want to remove the instance lock anymore, as we've
5840
    # added the instance to the config
5841
    del self.remove_locks[locking.LEVEL_INSTANCE]
5842
    # Unlock all the nodes
5843
    if self.op.mode == constants.INSTANCE_IMPORT:
5844
      nodes_keep = [self.op.src_node]
5845
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5846
                       if node != self.op.src_node]
5847
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5848
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5849
    else:
5850
      self.context.glm.release(locking.LEVEL_NODE)
5851
      del self.acquired_locks[locking.LEVEL_NODE]
5852

    
5853
    if self.op.wait_for_sync:
5854
      disk_abort = not _WaitForSync(self, iobj)
5855
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5856
      # make sure the disks are not degraded (still sync-ing is ok)
5857
      time.sleep(15)
5858
      feedback_fn("* checking mirrors status")
5859
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5860
    else:
5861
      disk_abort = False
5862

    
5863
    if disk_abort:
5864
      _RemoveDisks(self, iobj)
5865
      self.cfg.RemoveInstance(iobj.name)
5866
      # Make sure the instance lock gets removed
5867
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5868
      raise errors.OpExecError("There are some degraded disks for"
5869
                               " this instance")
5870

    
5871
    feedback_fn("creating os for instance %s on node %s" %
5872
                (instance, pnode_name))
5873

    
5874
    if iobj.disk_template != constants.DT_DISKLESS:
5875
      if self.op.mode == constants.INSTANCE_CREATE:
5876
        feedback_fn("* running the instance OS create scripts...")
5877
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5878
        result.Raise("Could not add os for instance %s"
5879
                     " on node %s" % (instance, pnode_name))
5880

    
5881
      elif self.op.mode == constants.INSTANCE_IMPORT:
5882
        feedback_fn("* running the instance OS import scripts...")
5883
        src_node = self.op.src_node
5884
        src_images = self.src_images
5885
        cluster_name = self.cfg.GetClusterName()
5886
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5887
                                                         src_node, src_images,
5888
                                                         cluster_name)
5889
        msg = import_result.fail_msg
5890
        if msg:
5891
          self.LogWarning("Error while importing the disk images for instance"
5892
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5893
      else:
5894
        # also checked in the prereq part
5895
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5896
                                     % self.op.mode)
5897

    
5898
    if self.op.start:
5899
      iobj.admin_up = True
5900
      self.cfg.Update(iobj)
5901
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5902
      feedback_fn("* starting instance...")
5903
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5904
      result.Raise("Could not start instance")
5905

    
5906
    return list(iobj.all_nodes)
5907

    
5908

    
5909
class LUConnectConsole(NoHooksLU):
5910
  """Connect to an instance's console.
5911

5912
  This is somewhat special in that it returns the command line that
5913
  you need to run on the master node in order to connect to the
5914
  console.
5915

5916
  """
5917
  _OP_REQP = ["instance_name"]
5918
  REQ_BGL = False
5919

    
5920
  def ExpandNames(self):
5921
    self._ExpandAndLockInstance()
5922

    
5923
  def CheckPrereq(self):
5924
    """Check prerequisites.
5925

5926
    This checks that the instance is in the cluster.
5927

5928
    """
5929
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5930
    assert self.instance is not None, \
5931
      "Cannot retrieve locked instance %s" % self.op.instance_name
5932
    _CheckNodeOnline(self, self.instance.primary_node)
5933

    
5934
  def Exec(self, feedback_fn):
5935
    """Connect to the console of an instance
5936

5937
    """
5938
    instance = self.instance
5939
    node = instance.primary_node
5940

    
5941
    node_insts = self.rpc.call_instance_list([node],
5942
                                             [instance.hypervisor])[node]
5943
    node_insts.Raise("Can't get node information from %s" % node)
5944

    
5945
    if instance.name not in node_insts.payload:
5946
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5947

    
5948
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5949

    
5950
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5951
    cluster = self.cfg.GetClusterInfo()
5952
    # beparams and hvparams are passed separately, to avoid editing the
5953
    # instance and then saving the defaults in the instance itself.
5954
    hvparams = cluster.FillHV(instance)
5955
    beparams = cluster.FillBE(instance)
5956
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5957

    
5958
    # build ssh cmdline
5959
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5960

    
5961

    
5962
class LUReplaceDisks(LogicalUnit):
5963
  """Replace the disks of an instance.
5964

5965
  """
5966
  HPATH = "mirrors-replace"
5967
  HTYPE = constants.HTYPE_INSTANCE
5968
  _OP_REQP = ["instance_name", "mode", "disks"]
5969
  REQ_BGL = False
5970

    
5971
  def CheckArguments(self):
5972
    if not hasattr(self.op, "remote_node"):
5973
      self.op.remote_node = None
5974
    if not hasattr(self.op, "iallocator"):
5975
      self.op.iallocator = None
5976

    
5977
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5978
                                  self.op.iallocator)
5979

    
5980
  def ExpandNames(self):
5981
    self._ExpandAndLockInstance()
5982

    
5983
    if self.op.iallocator is not None:
5984
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5985

    
5986
    elif self.op.remote_node is not None:
5987
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5988
      if remote_node is None:
5989
        raise errors.OpPrereqError("Node '%s' not known" %
5990
                                   self.op.remote_node)
5991

    
5992
      self.op.remote_node = remote_node
5993

    
5994
      # Warning: do not remove the locking of the new secondary here
5995
      # unless DRBD8.AddChildren is changed to work in parallel;
5996
      # currently it doesn't since parallel invocations of
5997
      # FindUnusedMinor will conflict
5998
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5999
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6000

    
6001
    else:
6002
      self.needed_locks[locking.LEVEL_NODE] = []
6003
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6004

    
6005
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
6006
                                   self.op.iallocator, self.op.remote_node,
6007
                                   self.op.disks)
6008

    
6009
    self.tasklets = [self.replacer]
6010

    
6011
  def DeclareLocks(self, level):
6012
    # If we're not already locking all nodes in the set we have to declare the
6013
    # instance's primary/secondary nodes.
6014
    if (level == locking.LEVEL_NODE and
6015
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6016
      self._LockInstancesNodes()
6017

    
6018
  def BuildHooksEnv(self):
6019
    """Build hooks env.
6020

6021
    This runs on the master, the primary and all the secondaries.
6022

6023
    """
6024
    instance = self.replacer.instance
6025
    env = {
6026
      "MODE": self.op.mode,
6027
      "NEW_SECONDARY": self.op.remote_node,
6028
      "OLD_SECONDARY": instance.secondary_nodes[0],
6029
      }
6030
    env.update(_BuildInstanceHookEnvByObject(self, instance))
6031
    nl = [
6032
      self.cfg.GetMasterNode(),
6033
      instance.primary_node,
6034
      ]
6035
    if self.op.remote_node is not None:
6036
      nl.append(self.op.remote_node)
6037
    return env, nl, nl
6038

    
6039

    
6040
class LUEvacuateNode(LogicalUnit):
6041
  """Relocate the secondary instances from a node.
6042

6043
  """
6044
  HPATH = "node-evacuate"
6045
  HTYPE = constants.HTYPE_NODE
6046
  _OP_REQP = ["node_name"]
6047
  REQ_BGL = False
6048

    
6049
  def CheckArguments(self):
6050
    if not hasattr(self.op, "remote_node"):
6051
      self.op.remote_node = None
6052
    if not hasattr(self.op, "iallocator"):
6053
      self.op.iallocator = None
6054

    
6055
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6056
                                  self.op.remote_node,
6057
                                  self.op.iallocator)
6058

    
6059
  def ExpandNames(self):
6060
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6061
    if self.op.node_name is None:
6062
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6063

    
6064
    self.needed_locks = {}
6065

    
6066
    # Declare node locks
6067
    if self.op.iallocator is not None:
6068
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6069

    
6070
    elif self.op.remote_node is not None:
6071
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6072
      if remote_node is None:
6073
        raise errors.OpPrereqError("Node '%s' not known" %
6074
                                   self.op.remote_node)
6075

    
6076
      self.op.remote_node = remote_node
6077

    
6078
      # Warning: do not remove the locking of the new secondary here
6079
      # unless DRBD8.AddChildren is changed to work in parallel;
6080
      # currently it doesn't since parallel invocations of
6081
      # FindUnusedMinor will conflict
6082
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6083
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6084

    
6085
    else:
6086
      raise errors.OpPrereqError("Invalid parameters")
6087

    
6088
    # Create tasklets for replacing disks for all secondary instances on this
6089
    # node
6090
    names = []
6091
    tasklets = []
6092

    
6093
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6094
      logging.debug("Replacing disks for instance %s", inst.name)
6095
      names.append(inst.name)
6096

    
6097
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6098
                                self.op.iallocator, self.op.remote_node, [])
6099
      tasklets.append(replacer)
6100

    
6101
    self.tasklets = tasklets
6102
    self.instance_names = names
6103

    
6104
    # Declare instance locks
6105
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6106

    
6107
  def DeclareLocks(self, level):
6108
    # If we're not already locking all nodes in the set we have to declare the
6109
    # instance's primary/secondary nodes.
6110
    if (level == locking.LEVEL_NODE and
6111
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6112
      self._LockInstancesNodes()
6113

    
6114
  def BuildHooksEnv(self):
6115
    """Build hooks env.
6116

6117
    This runs on the master, the primary and all the secondaries.
6118

6119
    """
6120
    env = {
6121
      "NODE_NAME": self.op.node_name,
6122
      }
6123

    
6124
    nl = [self.cfg.GetMasterNode()]
6125

    
6126
    if self.op.remote_node is not None:
6127
      env["NEW_SECONDARY"] = self.op.remote_node
6128
      nl.append(self.op.remote_node)
6129

    
6130
    return (env, nl, nl)
6131

    
6132

    
6133
class TLReplaceDisks(Tasklet):
6134
  """Replaces disks for an instance.
6135

6136
  Note: Locking is not within the scope of this class.
6137

6138
  """
6139
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6140
               disks):
6141
    """Initializes this class.
6142

6143
    """
6144
    Tasklet.__init__(self, lu)
6145

    
6146
    # Parameters
6147
    self.instance_name = instance_name
6148
    self.mode = mode
6149
    self.iallocator_name = iallocator_name
6150
    self.remote_node = remote_node
6151
    self.disks = disks
6152

    
6153
    # Runtime data
6154
    self.instance = None
6155
    self.new_node = None
6156
    self.target_node = None
6157
    self.other_node = None
6158
    self.remote_node_info = None
6159
    self.node_secondary_ip = None
6160

    
6161
  @staticmethod
6162
  def CheckArguments(mode, remote_node, iallocator):
6163
    """Helper function for users of this class.
6164

6165
    """
6166
    # check for valid parameter combination
6167
    if mode == constants.REPLACE_DISK_CHG:
6168
      if remote_node is None and iallocator is None:
6169
        raise errors.OpPrereqError("When changing the secondary either an"
6170
                                   " iallocator script must be used or the"
6171
                                   " new node given")
6172

    
6173
      if remote_node is not None and iallocator is not None:
6174
        raise errors.OpPrereqError("Give either the iallocator or the new"
6175
                                   " secondary, not both")
6176

    
6177
    elif remote_node is not None or iallocator is not None:
6178
      # Not replacing the secondary
6179
      raise errors.OpPrereqError("The iallocator and new node options can"
6180
                                 " only be used when changing the"
6181
                                 " secondary node")
6182

    
6183
  @staticmethod
6184
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6185
    """Compute a new secondary node using an IAllocator.
6186

6187
    """
6188
    ial = IAllocator(lu.cfg, lu.rpc,
6189
                     mode=constants.IALLOCATOR_MODE_RELOC,
6190
                     name=instance_name,
6191
                     relocate_from=relocate_from)
6192

    
6193
    ial.Run(iallocator_name)
6194

    
6195
    if not ial.success:
6196
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6197
                                 " %s" % (iallocator_name, ial.info))
6198

    
6199
    if len(ial.nodes) != ial.required_nodes:
6200
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6201
                                 " of nodes (%s), required %s" %
6202
                                 (len(ial.nodes), ial.required_nodes))
6203

    
6204
    remote_node_name = ial.nodes[0]
6205

    
6206
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6207
               instance_name, remote_node_name)
6208

    
6209
    return remote_node_name
6210

    
6211
  def _FindFaultyDisks(self, node_name):
6212
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6213
                                    node_name, True)
6214

    
6215
  def CheckPrereq(self):
6216
    """Check prerequisites.
6217

6218
    This checks that the instance is in the cluster.
6219

6220
    """
6221
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
6222
    assert self.instance is not None, \
6223
      "Cannot retrieve locked instance %s" % self.instance_name
6224

    
6225
    if self.instance.disk_template != constants.DT_DRBD8:
6226
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6227
                                 " instances")
6228

    
6229
    if len(self.instance.secondary_nodes) != 1:
6230
      raise errors.OpPrereqError("The instance has a strange layout,"
6231
                                 " expected one secondary but found %d" %
6232
                                 len(self.instance.secondary_nodes))
6233

    
6234
    secondary_node = self.instance.secondary_nodes[0]
6235

    
6236
    if self.iallocator_name is None:
6237
      remote_node = self.remote_node
6238
    else:
6239
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6240
                                       self.instance.name, secondary_node)
6241

    
6242
    if remote_node is not None:
6243
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6244
      assert self.remote_node_info is not None, \
6245
        "Cannot retrieve locked node %s" % remote_node
6246
    else:
6247
      self.remote_node_info = None
6248

    
6249
    if remote_node == self.instance.primary_node:
6250
      raise errors.OpPrereqError("The specified node is the primary node of"
6251
                                 " the instance.")
6252

    
6253
    if remote_node == secondary_node:
6254
      raise errors.OpPrereqError("The specified node is already the"
6255
                                 " secondary node of the instance.")
6256

    
6257
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6258
                                    constants.REPLACE_DISK_CHG):
6259
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
6260

    
6261
    if self.mode == constants.REPLACE_DISK_AUTO:
6262
      faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
6263
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6264

    
6265
      if faulty_primary and faulty_secondary:
6266
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6267
                                   " one node and can not be repaired"
6268
                                   " automatically" % self.instance_name)
6269

    
6270
      if faulty_primary:
6271
        self.disks = faulty_primary
6272
        self.target_node = self.instance.primary_node
6273
        self.other_node = secondary_node
6274
        check_nodes = [self.target_node, self.other_node]
6275
      elif faulty_secondary:
6276
        self.disks = faulty_secondary
6277
        self.target_node = secondary_node
6278
        self.other_node = self.instance.primary_node
6279
        check_nodes = [self.target_node, self.other_node]
6280
      else:
6281
        self.disks = []
6282
        check_nodes = []
6283

    
6284
    else:
6285
      # Non-automatic modes
6286
      if self.mode == constants.REPLACE_DISK_PRI:
6287
        self.target_node = self.instance.primary_node
6288
        self.other_node = secondary_node
6289
        check_nodes = [self.target_node, self.other_node]
6290

    
6291
      elif self.mode == constants.REPLACE_DISK_SEC:
6292
        self.target_node = secondary_node
6293
        self.other_node = self.instance.primary_node
6294
        check_nodes = [self.target_node, self.other_node]
6295

    
6296
      elif self.mode == constants.REPLACE_DISK_CHG:
6297
        self.new_node = remote_node
6298
        self.other_node = self.instance.primary_node
6299
        self.target_node = secondary_node
6300
        check_nodes = [self.new_node, self.other_node]
6301

    
6302
        _CheckNodeNotDrained(self.lu, remote_node)
6303

    
6304
      else:
6305
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6306
                                     self.mode)
6307

    
6308
      # If not specified all disks should be replaced
6309
      if not self.disks:
6310
        self.disks = range(len(self.instance.disks))
6311

    
6312
    for node in check_nodes:
6313
      _CheckNodeOnline(self.lu, node)
6314

    
6315
    # Check whether disks are valid
6316
    for disk_idx in self.disks:
6317
      self.instance.FindDisk(disk_idx)
6318

    
6319
    # Get secondary node IP addresses
6320
    node_2nd_ip = {}
6321

    
6322
    for node_name in [self.target_node, self.other_node, self.new_node]:
6323
      if node_name is not None:
6324
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6325

    
6326
    self.node_secondary_ip = node_2nd_ip
6327

    
6328
  def Exec(self, feedback_fn):
6329
    """Execute disk replacement.
6330

6331
    This dispatches the disk replacement to the appropriate handler.
6332

6333
    """
6334
    if not self.disks:
6335
      feedback_fn("No disks need replacement")
6336
      return
6337

    
6338
    feedback_fn("Replacing disk(s) %s for %s" %
6339
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6340

    
6341
    activate_disks = (not self.instance.admin_up)
6342

    
6343
    # Activate the instance disks if we're replacing them on a down instance
6344
    if activate_disks:
6345
      _StartInstanceDisks(self.lu, self.instance, True)
6346

    
6347
    try:
6348
      # Should we replace the secondary node?
6349
      if self.new_node is not None:
6350
        return self._ExecDrbd8Secondary()
6351
      else:
6352
        return self._ExecDrbd8DiskOnly()
6353

    
6354
    finally:
6355
      # Deactivate the instance disks if we're replacing them on a down instance
6356
      if activate_disks:
6357
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6358

    
6359
  def _CheckVolumeGroup(self, nodes):
6360
    self.lu.LogInfo("Checking volume groups")
6361

    
6362
    vgname = self.cfg.GetVGName()
6363

    
6364
    # Make sure volume group exists on all involved nodes
6365
    results = self.rpc.call_vg_list(nodes)
6366
    if not results:
6367
      raise errors.OpExecError("Can't list volume groups on the nodes")
6368

    
6369
    for node in nodes:
6370
      res = results[node]
6371
      res.Raise("Error checking node %s" % node)
6372
      if vgname not in res.payload:
6373
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6374
                                 (vgname, node))
6375

    
6376
  def _CheckDisksExistence(self, nodes):
6377
    # Check disk existence
6378
    for idx, dev in enumerate(self.instance.disks):
6379
      if idx not in self.disks:
6380
        continue
6381

    
6382
      for node in nodes:
6383
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6384
        self.cfg.SetDiskID(dev, node)
6385

    
6386
        result = self.rpc.call_blockdev_find(node, dev)
6387

    
6388
        msg = result.fail_msg
6389
        if msg or not result.payload:
6390
          if not msg:
6391
            msg = "disk not found"
6392
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6393
                                   (idx, node, msg))
6394

    
6395
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6396
    for idx, dev in enumerate(self.instance.disks):
6397
      if idx not in self.disks:
6398
        continue
6399

    
6400
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6401
                      (idx, node_name))
6402

    
6403
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6404
                                   ldisk=ldisk):
6405
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6406
                                 " replace disks for instance %s" %
6407
                                 (node_name, self.instance.name))
6408

    
6409
  def _CreateNewStorage(self, node_name):
6410
    vgname = self.cfg.GetVGName()
6411
    iv_names = {}
6412

    
6413
    for idx, dev in enumerate(self.instance.disks):
6414
      if idx not in self.disks:
6415
        continue
6416

    
6417
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6418

    
6419
      self.cfg.SetDiskID(dev, node_name)
6420

    
6421
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6422
      names = _GenerateUniqueNames(self.lu, lv_names)
6423

    
6424
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6425
                             logical_id=(vgname, names[0]))
6426
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6427
                             logical_id=(vgname, names[1]))
6428

    
6429
      new_lvs = [lv_data, lv_meta]
6430
      old_lvs = dev.children
6431
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6432

    
6433
      # we pass force_create=True to force the LVM creation
6434
      for new_lv in new_lvs:
6435
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6436
                        _GetInstanceInfoText(self.instance), False)
6437

    
6438
    return iv_names
6439

    
6440
  def _CheckDevices(self, node_name, iv_names):
6441
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6442
      self.cfg.SetDiskID(dev, node_name)
6443

    
6444
      result = self.rpc.call_blockdev_find(node_name, dev)
6445

    
6446
      msg = result.fail_msg
6447
      if msg or not result.payload:
6448
        if not msg:
6449
          msg = "disk not found"
6450
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6451
                                 (name, msg))
6452

    
6453
      if result.payload.is_degraded:
6454
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6455

    
6456
  def _RemoveOldStorage(self, node_name, iv_names):
6457
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6458
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6459

    
6460
      for lv in old_lvs:
6461
        self.cfg.SetDiskID(lv, node_name)
6462

    
6463
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6464
        if msg:
6465
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6466
                             hint="remove unused LVs manually")
6467

    
6468
  def _ExecDrbd8DiskOnly(self):
6469
    """Replace a disk on the primary or secondary for DRBD 8.
6470

6471
    The algorithm for replace is quite complicated:
6472

6473
      1. for each disk to be replaced:
6474

6475
        1. create new LVs on the target node with unique names
6476
        1. detach old LVs from the drbd device
6477
        1. rename old LVs to name_replaced.<time_t>
6478
        1. rename new LVs to old LVs
6479
        1. attach the new LVs (with the old names now) to the drbd device
6480

6481
      1. wait for sync across all devices
6482

6483
      1. for each modified disk:
6484

6485
        1. remove old LVs (which have the name name_replaces.<time_t>)
6486

6487
    Failures are not very well handled.
6488

6489
    """
6490
    steps_total = 6
6491

    
6492
    # Step: check device activation
6493
    self.lu.LogStep(1, steps_total, "Check device existence")
6494
    self._CheckDisksExistence([self.other_node, self.target_node])
6495
    self._CheckVolumeGroup([self.target_node, self.other_node])
6496

    
6497
    # Step: check other node consistency
6498
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6499
    self._CheckDisksConsistency(self.other_node,
6500
                                self.other_node == self.instance.primary_node,
6501
                                False)
6502

    
6503
    # Step: create new storage
6504
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6505
    iv_names = self._CreateNewStorage(self.target_node)
6506

    
6507
    # Step: for each lv, detach+rename*2+attach
6508
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6509
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6510
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6511

    
6512
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
6513
                                                     old_lvs)
6514
      result.Raise("Can't detach drbd from local storage on node"
6515
                   " %s for device %s" % (self.target_node, dev.iv_name))
6516
      #dev.children = []
6517
      #cfg.Update(instance)
6518

    
6519
      # ok, we created the new LVs, so now we know we have the needed
6520
      # storage; as such, we proceed on the target node to rename
6521
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6522
      # using the assumption that logical_id == physical_id (which in
6523
      # turn is the unique_id on that node)
6524

    
6525
      # FIXME(iustin): use a better name for the replaced LVs
6526
      temp_suffix = int(time.time())
6527
      ren_fn = lambda d, suff: (d.physical_id[0],
6528
                                d.physical_id[1] + "_replaced-%s" % suff)
6529

    
6530
      # Build the rename list based on what LVs exist on the node
6531
      rename_old_to_new = []
6532
      for to_ren in old_lvs:
6533
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6534
        if not result.fail_msg and result.payload:
6535
          # device exists
6536
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6537

    
6538
      self.lu.LogInfo("Renaming the old LVs on the target node")
6539
      result = self.rpc.call_blockdev_rename(self.target_node,
6540
                                             rename_old_to_new)
6541
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6542

    
6543
      # Now we rename the new LVs to the old LVs
6544
      self.lu.LogInfo("Renaming the new LVs on the target node")
6545
      rename_new_to_old = [(new, old.physical_id)
6546
                           for old, new in zip(old_lvs, new_lvs)]
6547
      result = self.rpc.call_blockdev_rename(self.target_node,
6548
                                             rename_new_to_old)
6549
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6550

    
6551
      for old, new in zip(old_lvs, new_lvs):
6552
        new.logical_id = old.logical_id
6553
        self.cfg.SetDiskID(new, self.target_node)
6554

    
6555
      for disk in old_lvs:
6556
        disk.logical_id = ren_fn(disk, temp_suffix)
6557
        self.cfg.SetDiskID(disk, self.target_node)
6558

    
6559
      # Now that the new lvs have the old name, we can add them to the device
6560
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6561
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
6562
                                                  new_lvs)
6563
      msg = result.fail_msg
6564
      if msg:
6565
        for new_lv in new_lvs:
6566
          msg2 = self.rpc.call_blockdev_remove(self.target_node,
6567
                                               new_lv).fail_msg
6568
          if msg2:
6569
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6570
                               hint=("cleanup manually the unused logical"
6571
                                     "volumes"))
6572
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6573

    
6574
      dev.children = new_lvs
6575

    
6576
      self.cfg.Update(self.instance)
6577

    
6578
    # Wait for sync
6579
    # This can fail as the old devices are degraded and _WaitForSync
6580
    # does a combined result over all disks, so we don't check its return value
6581
    self.lu.LogStep(5, steps_total, "Sync devices")
6582
    _WaitForSync(self.lu, self.instance, unlock=True)
6583

    
6584
    # Check all devices manually
6585
    self._CheckDevices(self.instance.primary_node, iv_names)
6586

    
6587
    # Step: remove old storage
6588
    self.lu.LogStep(6, steps_total, "Removing old storage")
6589
    self._RemoveOldStorage(self.target_node, iv_names)
6590

    
6591
  def _ExecDrbd8Secondary(self):
6592
    """Replace the secondary node for DRBD 8.
6593

6594
    The algorithm for replace is quite complicated:
6595
      - for all disks of the instance:
6596
        - create new LVs on the new node with same names
6597
        - shutdown the drbd device on the old secondary
6598
        - disconnect the drbd network on the primary
6599
        - create the drbd device on the new secondary
6600
        - network attach the drbd on the primary, using an artifice:
6601
          the drbd code for Attach() will connect to the network if it
6602
          finds a device which is connected to the good local disks but
6603
          not network enabled
6604
      - wait for sync across all devices
6605
      - remove all disks from the old secondary
6606

6607
    Failures are not very well handled.
6608

6609
    """
6610
    steps_total = 6
6611

    
6612
    # Step: check device activation
6613
    self.lu.LogStep(1, steps_total, "Check device existence")
6614
    self._CheckDisksExistence([self.instance.primary_node])
6615
    self._CheckVolumeGroup([self.instance.primary_node])
6616

    
6617
    # Step: check other node consistency
6618
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6619
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6620

    
6621
    # Step: create new storage
6622
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6623
    for idx, dev in enumerate(self.instance.disks):
6624
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6625
                      (self.new_node, idx))
6626
      # we pass force_create=True to force LVM creation
6627
      for new_lv in dev.children:
6628
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6629
                        _GetInstanceInfoText(self.instance), False)
6630

    
6631
    # Step 4: dbrd minors and drbd setups changes
6632
    # after this, we must manually remove the drbd minors on both the
6633
    # error and the success paths
6634
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6635
    minors = self.cfg.AllocateDRBDMinor([self.new_node
6636
                                         for dev in self.instance.disks],
6637
                                        self.instance.name)
6638
    logging.debug("Allocated minors %r" % (minors,))
6639

    
6640
    iv_names = {}
6641
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6642
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
6643
                      (self.new_node, idx))
6644
      # create new devices on new_node; note that we create two IDs:
6645
      # one without port, so the drbd will be activated without
6646
      # networking information on the new node at this stage, and one
6647
      # with network, for the latter activation in step 4
6648
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6649
      if self.instance.primary_node == o_node1:
6650
        p_minor = o_minor1
6651
      else:
6652
        p_minor = o_minor2
6653

    
6654
      new_alone_id = (self.instance.primary_node, self.new_node, None,
6655
                      p_minor, new_minor, o_secret)
6656
      new_net_id = (self.instance.primary_node, self.new_node, o_port,
6657
                    p_minor, new_minor, o_secret)
6658

    
6659
      iv_names[idx] = (dev, dev.children, new_net_id)
6660
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6661
                    new_net_id)
6662
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6663
                              logical_id=new_alone_id,
6664
                              children=dev.children,
6665
                              size=dev.size)
6666
      try:
6667
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6668
                              _GetInstanceInfoText(self.instance), False)
6669
      except errors.GenericError:
6670
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6671
        raise
6672

    
6673
    # We have new devices, shutdown the drbd on the old secondary
6674
    for idx, dev in enumerate(self.instance.disks):
6675
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6676
      self.cfg.SetDiskID(dev, self.target_node)
6677
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6678
      if msg:
6679
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6680
                           "node: %s" % (idx, msg),
6681
                           hint=("Please cleanup this device manually as"
6682
                                 " soon as possible"))
6683

    
6684
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6685
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
6686
                                               self.node_secondary_ip,
6687
                                               self.instance.disks)\
6688
                                              [self.instance.primary_node]
6689

    
6690
    msg = result.fail_msg
6691
    if msg:
6692
      # detaches didn't succeed (unlikely)
6693
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6694
      raise errors.OpExecError("Can't detach the disks from the network on"
6695
                               " old node: %s" % (msg,))
6696

    
6697
    # if we managed to detach at least one, we update all the disks of
6698
    # the instance to point to the new secondary
6699
    self.lu.LogInfo("Updating instance configuration")
6700
    for dev, _, new_logical_id in iv_names.itervalues():
6701
      dev.logical_id = new_logical_id
6702
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6703

    
6704
    self.cfg.Update(self.instance)
6705

    
6706
    # and now perform the drbd attach
6707
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6708
                    " (standalone => connected)")
6709
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
6710
                                            self.new_node],
6711
                                           self.node_secondary_ip,
6712
                                           self.instance.disks,
6713
                                           self.instance.name,
6714
                                           False)
6715
    for to_node, to_result in result.items():
6716
      msg = to_result.fail_msg
6717
      if msg:
6718
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
6719
                           to_node, msg,
6720
                           hint=("please do a gnt-instance info to see the"
6721
                                 " status of disks"))
6722

    
6723
    # Wait for sync
6724
    # This can fail as the old devices are degraded and _WaitForSync
6725
    # does a combined result over all disks, so we don't check its return value
6726
    self.lu.LogStep(5, steps_total, "Sync devices")
6727
    _WaitForSync(self.lu, self.instance, unlock=True)
6728

    
6729
    # Check all devices manually
6730
    self._CheckDevices(self.instance.primary_node, iv_names)
6731

    
6732
    # Step: remove old storage
6733
    self.lu.LogStep(6, steps_total, "Removing old storage")
6734
    self._RemoveOldStorage(self.target_node, iv_names)
6735

    
6736

    
6737
class LURepairNodeStorage(NoHooksLU):
6738
  """Repairs the volume group on a node.
6739

6740
  """
6741
  _OP_REQP = ["node_name"]
6742
  REQ_BGL = False
6743

    
6744
  def CheckArguments(self):
6745
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6746
    if node_name is None:
6747
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6748

    
6749
    self.op.node_name = node_name
6750

    
6751
  def ExpandNames(self):
6752
    self.needed_locks = {
6753
      locking.LEVEL_NODE: [self.op.node_name],
6754
      }
6755

    
6756
  def _CheckFaultyDisks(self, instance, node_name):
6757
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6758
                                node_name, True):
6759
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6760
                                 " node '%s'" % (instance.name, node_name))
6761

    
6762
  def CheckPrereq(self):
6763
    """Check prerequisites.
6764

6765
    """
6766
    storage_type = self.op.storage_type
6767

    
6768
    if (constants.SO_FIX_CONSISTENCY not in
6769
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6770
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6771
                                 " repaired" % storage_type)
6772

    
6773
    # Check whether any instance on this node has faulty disks
6774
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6775
      check_nodes = set(inst.all_nodes)
6776
      check_nodes.discard(self.op.node_name)
6777
      for inst_node_name in check_nodes:
6778
        self._CheckFaultyDisks(inst, inst_node_name)
6779

    
6780
  def Exec(self, feedback_fn):
6781
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6782
                (self.op.name, self.op.node_name))
6783

    
6784
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6785
    result = self.rpc.call_storage_execute(self.op.node_name,
6786
                                           self.op.storage_type, st_args,
6787
                                           self.op.name,
6788
                                           constants.SO_FIX_CONSISTENCY)
6789
    result.Raise("Failed to repair storage unit '%s' on %s" %
6790
                 (self.op.name, self.op.node_name))
6791

    
6792

    
6793
class LUGrowDisk(LogicalUnit):
6794
  """Grow a disk of an instance.
6795

6796
  """
6797
  HPATH = "disk-grow"
6798
  HTYPE = constants.HTYPE_INSTANCE
6799
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6800
  REQ_BGL = False
6801

    
6802
  def ExpandNames(self):
6803
    self._ExpandAndLockInstance()
6804
    self.needed_locks[locking.LEVEL_NODE] = []
6805
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6806

    
6807
  def DeclareLocks(self, level):
6808
    if level == locking.LEVEL_NODE:
6809
      self._LockInstancesNodes()
6810

    
6811
  def BuildHooksEnv(self):
6812
    """Build hooks env.
6813

6814
    This runs on the master, the primary and all the secondaries.
6815

6816
    """
6817
    env = {
6818
      "DISK": self.op.disk,
6819
      "AMOUNT": self.op.amount,
6820
      }
6821
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6822
    nl = [
6823
      self.cfg.GetMasterNode(),
6824
      self.instance.primary_node,
6825
      ]
6826
    return env, nl, nl
6827

    
6828
  def CheckPrereq(self):
6829
    """Check prerequisites.
6830

6831
    This checks that the instance is in the cluster.
6832

6833
    """
6834
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6835
    assert instance is not None, \
6836
      "Cannot retrieve locked instance %s" % self.op.instance_name
6837
    nodenames = list(instance.all_nodes)
6838
    for node in nodenames:
6839
      _CheckNodeOnline(self, node)
6840

    
6841

    
6842
    self.instance = instance
6843

    
6844
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6845
      raise errors.OpPrereqError("Instance's disk layout does not support"
6846
                                 " growing.")
6847

    
6848
    self.disk = instance.FindDisk(self.op.disk)
6849

    
6850
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6851
                                       instance.hypervisor)
6852
    for node in nodenames:
6853
      info = nodeinfo[node]
6854
      info.Raise("Cannot get current information from node %s" % node)
6855
      vg_free = info.payload.get('vg_free', None)
6856
      if not isinstance(vg_free, int):
6857
        raise errors.OpPrereqError("Can't compute free disk space on"
6858
                                   " node %s" % node)
6859
      if self.op.amount > vg_free:
6860
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
6861
                                   " %d MiB available, %d MiB required" %
6862
                                   (node, vg_free, self.op.amount))
6863

    
6864
  def Exec(self, feedback_fn):
6865
    """Execute disk grow.
6866

6867
    """
6868
    instance = self.instance
6869
    disk = self.disk
6870
    for node in instance.all_nodes:
6871
      self.cfg.SetDiskID(disk, node)
6872
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6873
      result.Raise("Grow request failed to node %s" % node)
6874
    disk.RecordGrow(self.op.amount)
6875
    self.cfg.Update(instance)
6876
    if self.op.wait_for_sync:
6877
      disk_abort = not _WaitForSync(self, instance)
6878
      if disk_abort:
6879
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6880
                             " status.\nPlease check the instance.")
6881

    
6882

    
6883
class LUQueryInstanceData(NoHooksLU):
6884
  """Query runtime instance data.
6885

6886
  """
6887
  _OP_REQP = ["instances", "static"]
6888
  REQ_BGL = False
6889

    
6890
  def ExpandNames(self):
6891
    self.needed_locks = {}
6892
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6893

    
6894
    if not isinstance(self.op.instances, list):
6895
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6896

    
6897
    if self.op.instances:
6898
      self.wanted_names = []
6899
      for name in self.op.instances:
6900
        full_name = self.cfg.ExpandInstanceName(name)
6901
        if full_name is None:
6902
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6903
        self.wanted_names.append(full_name)
6904
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6905
    else:
6906
      self.wanted_names = None
6907
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6908

    
6909
    self.needed_locks[locking.LEVEL_NODE] = []
6910
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6911

    
6912
  def DeclareLocks(self, level):
6913
    if level == locking.LEVEL_NODE:
6914
      self._LockInstancesNodes()
6915

    
6916
  def CheckPrereq(self):
6917
    """Check prerequisites.
6918

6919
    This only checks the optional instance list against the existing names.
6920

6921
    """
6922
    if self.wanted_names is None:
6923
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6924

    
6925
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6926
                             in self.wanted_names]
6927
    return
6928

    
6929
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
6930
    """Returns the status of a block device
6931

6932
    """
6933
    if self.op.static or not node:
6934
      return None
6935

    
6936
    self.cfg.SetDiskID(dev, node)
6937

    
6938
    result = self.rpc.call_blockdev_find(node, dev)
6939
    if result.offline:
6940
      return None
6941

    
6942
    result.Raise("Can't compute disk status for %s" % instance_name)
6943

    
6944
    status = result.payload
6945
    if status is None:
6946
      return None
6947

    
6948
    return (status.dev_path, status.major, status.minor,
6949
            status.sync_percent, status.estimated_time,
6950
            status.is_degraded, status.ldisk_status)
6951

    
6952
  def _ComputeDiskStatus(self, instance, snode, dev):
6953
    """Compute block device status.
6954

6955
    """
6956
    if dev.dev_type in constants.LDS_DRBD:
6957
      # we change the snode then (otherwise we use the one passed in)
6958
      if dev.logical_id[0] == instance.primary_node:
6959
        snode = dev.logical_id[1]
6960
      else:
6961
        snode = dev.logical_id[0]
6962

    
6963
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6964
                                              instance.name, dev)
6965
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6966

    
6967
    if dev.children:
6968
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6969
                      for child in dev.children]
6970
    else:
6971
      dev_children = []
6972

    
6973
    data = {
6974
      "iv_name": dev.iv_name,
6975
      "dev_type": dev.dev_type,
6976
      "logical_id": dev.logical_id,
6977
      "physical_id": dev.physical_id,
6978
      "pstatus": dev_pstatus,
6979
      "sstatus": dev_sstatus,
6980
      "children": dev_children,
6981
      "mode": dev.mode,
6982
      "size": dev.size,
6983
      }
6984

    
6985
    return data
6986

    
6987
  def Exec(self, feedback_fn):
6988
    """Gather and return data"""
6989
    result = {}
6990

    
6991
    cluster = self.cfg.GetClusterInfo()
6992

    
6993
    for instance in self.wanted_instances:
6994
      if not self.op.static:
6995
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6996
                                                  instance.name,
6997
                                                  instance.hypervisor)
6998
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6999
        remote_info = remote_info.payload
7000
        if remote_info and "state" in remote_info:
7001
          remote_state = "up"
7002
        else:
7003
          remote_state = "down"
7004
      else:
7005
        remote_state = None
7006
      if instance.admin_up:
7007
        config_state = "up"
7008
      else:
7009
        config_state = "down"
7010

    
7011
      disks = [self._ComputeDiskStatus(instance, None, device)
7012
               for device in instance.disks]
7013

    
7014
      idict = {
7015
        "name": instance.name,
7016
        "config_state": config_state,
7017
        "run_state": remote_state,
7018
        "pnode": instance.primary_node,
7019
        "snodes": instance.secondary_nodes,
7020
        "os": instance.os,
7021
        # this happens to be the same format used for hooks
7022
        "nics": _NICListToTuple(self, instance.nics),
7023
        "disks": disks,
7024
        "hypervisor": instance.hypervisor,
7025
        "network_port": instance.network_port,
7026
        "hv_instance": instance.hvparams,
7027
        "hv_actual": cluster.FillHV(instance),
7028
        "be_instance": instance.beparams,
7029
        "be_actual": cluster.FillBE(instance),
7030
        "serial_no": instance.serial_no,
7031
        "mtime": instance.mtime,
7032
        "ctime": instance.ctime,
7033
        }
7034

    
7035
      result[instance.name] = idict
7036

    
7037
    return result
7038

    
7039

    
7040
class LUSetInstanceParams(LogicalUnit):
7041
  """Modifies an instances's parameters.
7042

7043
  """
7044
  HPATH = "instance-modify"
7045
  HTYPE = constants.HTYPE_INSTANCE
7046
  _OP_REQP = ["instance_name"]
7047
  REQ_BGL = False
7048

    
7049
  def CheckArguments(self):
7050
    if not hasattr(self.op, 'nics'):
7051
      self.op.nics = []
7052
    if not hasattr(self.op, 'disks'):
7053
      self.op.disks = []
7054
    if not hasattr(self.op, 'beparams'):
7055
      self.op.beparams = {}
7056
    if not hasattr(self.op, 'hvparams'):
7057
      self.op.hvparams = {}
7058
    self.op.force = getattr(self.op, "force", False)
7059
    if not (self.op.nics or self.op.disks or
7060
            self.op.hvparams or self.op.beparams):
7061
      raise errors.OpPrereqError("No changes submitted")
7062

    
7063
    # Disk validation
7064
    disk_addremove = 0
7065
    for disk_op, disk_dict in self.op.disks:
7066
      if disk_op == constants.DDM_REMOVE:
7067
        disk_addremove += 1
7068
        continue
7069
      elif disk_op == constants.DDM_ADD:
7070
        disk_addremove += 1
7071
      else:
7072
        if not isinstance(disk_op, int):
7073
          raise errors.OpPrereqError("Invalid disk index")
7074
        if not isinstance(disk_dict, dict):
7075
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7076
          raise errors.OpPrereqError(msg)
7077

    
7078
      if disk_op == constants.DDM_ADD:
7079
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7080
        if mode not in constants.DISK_ACCESS_SET:
7081
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7082
        size = disk_dict.get('size', None)
7083
        if size is None:
7084
          raise errors.OpPrereqError("Required disk parameter size missing")
7085
        try:
7086
          size = int(size)
7087
        except ValueError, err:
7088
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7089
                                     str(err))
7090
        disk_dict['size'] = size
7091
      else:
7092
        # modification of disk
7093
        if 'size' in disk_dict:
7094
          raise errors.OpPrereqError("Disk size change not possible, use"
7095
                                     " grow-disk")
7096

    
7097
    if disk_addremove > 1:
7098
      raise errors.OpPrereqError("Only one disk add or remove operation"
7099
                                 " supported at a time")
7100

    
7101
    # NIC validation
7102
    nic_addremove = 0
7103
    for nic_op, nic_dict in self.op.nics:
7104
      if nic_op == constants.DDM_REMOVE:
7105
        nic_addremove += 1
7106
        continue
7107
      elif nic_op == constants.DDM_ADD:
7108
        nic_addremove += 1
7109
      else:
7110
        if not isinstance(nic_op, int):
7111
          raise errors.OpPrereqError("Invalid nic index")
7112
        if not isinstance(nic_dict, dict):
7113
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7114
          raise errors.OpPrereqError(msg)
7115

    
7116
      # nic_dict should be a dict
7117
      nic_ip = nic_dict.get('ip', None)
7118
      if nic_ip is not None:
7119
        if nic_ip.lower() == constants.VALUE_NONE:
7120
          nic_dict['ip'] = None
7121
        else:
7122
          if not utils.IsValidIP(nic_ip):
7123
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7124

    
7125
      nic_bridge = nic_dict.get('bridge', None)
7126
      nic_link = nic_dict.get('link', None)
7127
      if nic_bridge and nic_link:
7128
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7129
                                   " at the same time")
7130
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7131
        nic_dict['bridge'] = None
7132
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7133
        nic_dict['link'] = None
7134

    
7135
      if nic_op == constants.DDM_ADD:
7136
        nic_mac = nic_dict.get('mac', None)
7137
        if nic_mac is None:
7138
          nic_dict['mac'] = constants.VALUE_AUTO
7139

    
7140
      if 'mac' in nic_dict:
7141
        nic_mac = nic_dict['mac']
7142
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7143
          if not utils.IsValidMac(nic_mac):
7144
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7145
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7146
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7147
                                     " modifying an existing nic")
7148

    
7149
    if nic_addremove > 1:
7150
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7151
                                 " supported at a time")
7152

    
7153
  def ExpandNames(self):
7154
    self._ExpandAndLockInstance()
7155
    self.needed_locks[locking.LEVEL_NODE] = []
7156
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7157

    
7158
  def DeclareLocks(self, level):
7159
    if level == locking.LEVEL_NODE:
7160
      self._LockInstancesNodes()
7161

    
7162
  def BuildHooksEnv(self):
7163
    """Build hooks env.
7164

7165
    This runs on the master, primary and secondaries.
7166

7167
    """
7168
    args = dict()
7169
    if constants.BE_MEMORY in self.be_new:
7170
      args['memory'] = self.be_new[constants.BE_MEMORY]
7171
    if constants.BE_VCPUS in self.be_new:
7172
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7173
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7174
    # information at all.
7175
    if self.op.nics:
7176
      args['nics'] = []
7177
      nic_override = dict(self.op.nics)
7178
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7179
      for idx, nic in enumerate(self.instance.nics):
7180
        if idx in nic_override:
7181
          this_nic_override = nic_override[idx]
7182
        else:
7183
          this_nic_override = {}
7184
        if 'ip' in this_nic_override:
7185
          ip = this_nic_override['ip']
7186
        else:
7187
          ip = nic.ip
7188
        if 'mac' in this_nic_override:
7189
          mac = this_nic_override['mac']
7190
        else:
7191
          mac = nic.mac
7192
        if idx in self.nic_pnew:
7193
          nicparams = self.nic_pnew[idx]
7194
        else:
7195
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7196
        mode = nicparams[constants.NIC_MODE]
7197
        link = nicparams[constants.NIC_LINK]
7198
        args['nics'].append((ip, mac, mode, link))
7199
      if constants.DDM_ADD in nic_override:
7200
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7201
        mac = nic_override[constants.DDM_ADD]['mac']
7202
        nicparams = self.nic_pnew[constants.DDM_ADD]
7203
        mode = nicparams[constants.NIC_MODE]
7204
        link = nicparams[constants.NIC_LINK]
7205
        args['nics'].append((ip, mac, mode, link))
7206
      elif constants.DDM_REMOVE in nic_override:
7207
        del args['nics'][-1]
7208

    
7209
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7210
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7211
    return env, nl, nl
7212

    
7213
  def _GetUpdatedParams(self, old_params, update_dict,
7214
                        default_values, parameter_types):
7215
    """Return the new params dict for the given params.
7216

7217
    @type old_params: dict
7218
    @param old_params: old parameters
7219
    @type update_dict: dict
7220
    @param update_dict: dict containing new parameter values,
7221
                        or constants.VALUE_DEFAULT to reset the
7222
                        parameter to its default value
7223
    @type default_values: dict
7224
    @param default_values: default values for the filled parameters
7225
    @type parameter_types: dict
7226
    @param parameter_types: dict mapping target dict keys to types
7227
                            in constants.ENFORCEABLE_TYPES
7228
    @rtype: (dict, dict)
7229
    @return: (new_parameters, filled_parameters)
7230

7231
    """
7232
    params_copy = copy.deepcopy(old_params)
7233
    for key, val in update_dict.iteritems():
7234
      if val == constants.VALUE_DEFAULT:
7235
        try:
7236
          del params_copy[key]
7237
        except KeyError:
7238
          pass
7239
      else:
7240
        params_copy[key] = val
7241
    utils.ForceDictType(params_copy, parameter_types)
7242
    params_filled = objects.FillDict(default_values, params_copy)
7243
    return (params_copy, params_filled)
7244

    
7245
  def CheckPrereq(self):
7246
    """Check prerequisites.
7247

7248
    This only checks the instance list against the existing names.
7249

7250
    """
7251
    self.force = self.op.force
7252

    
7253
    # checking the new params on the primary/secondary nodes
7254

    
7255
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7256
    cluster = self.cluster = self.cfg.GetClusterInfo()
7257
    assert self.instance is not None, \
7258
      "Cannot retrieve locked instance %s" % self.op.instance_name
7259
    pnode = instance.primary_node
7260
    nodelist = list(instance.all_nodes)
7261

    
7262
    # hvparams processing
7263
    if self.op.hvparams:
7264
      i_hvdict, hv_new = self._GetUpdatedParams(
7265
                             instance.hvparams, self.op.hvparams,
7266
                             cluster.hvparams[instance.hypervisor],
7267
                             constants.HVS_PARAMETER_TYPES)
7268
      # local check
7269
      hypervisor.GetHypervisor(
7270
        instance.hypervisor).CheckParameterSyntax(hv_new)
7271
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7272
      self.hv_new = hv_new # the new actual values
7273
      self.hv_inst = i_hvdict # the new dict (without defaults)
7274
    else:
7275
      self.hv_new = self.hv_inst = {}
7276

    
7277
    # beparams processing
7278
    if self.op.beparams:
7279
      i_bedict, be_new = self._GetUpdatedParams(
7280
                             instance.beparams, self.op.beparams,
7281
                             cluster.beparams[constants.PP_DEFAULT],
7282
                             constants.BES_PARAMETER_TYPES)
7283
      self.be_new = be_new # the new actual values
7284
      self.be_inst = i_bedict # the new dict (without defaults)
7285
    else:
7286
      self.be_new = self.be_inst = {}
7287

    
7288
    self.warn = []
7289

    
7290
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7291
      mem_check_list = [pnode]
7292
      if be_new[constants.BE_AUTO_BALANCE]:
7293
        # either we changed auto_balance to yes or it was from before
7294
        mem_check_list.extend(instance.secondary_nodes)
7295
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7296
                                                  instance.hypervisor)
7297
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7298
                                         instance.hypervisor)
7299
      pninfo = nodeinfo[pnode]
7300
      msg = pninfo.fail_msg
7301
      if msg:
7302
        # Assume the primary node is unreachable and go ahead
7303
        self.warn.append("Can't get info from primary node %s: %s" %
7304
                         (pnode,  msg))
7305
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7306
        self.warn.append("Node data from primary node %s doesn't contain"
7307
                         " free memory information" % pnode)
7308
      elif instance_info.fail_msg:
7309
        self.warn.append("Can't get instance runtime information: %s" %
7310
                        instance_info.fail_msg)
7311
      else:
7312
        if instance_info.payload:
7313
          current_mem = int(instance_info.payload['memory'])
7314
        else:
7315
          # Assume instance not running
7316
          # (there is a slight race condition here, but it's not very probable,
7317
          # and we have no other way to check)
7318
          current_mem = 0
7319
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7320
                    pninfo.payload['memory_free'])
7321
        if miss_mem > 0:
7322
          raise errors.OpPrereqError("This change will prevent the instance"
7323
                                     " from starting, due to %d MB of memory"
7324
                                     " missing on its primary node" % miss_mem)
7325

    
7326
      if be_new[constants.BE_AUTO_BALANCE]:
7327
        for node, nres in nodeinfo.items():
7328
          if node not in instance.secondary_nodes:
7329
            continue
7330
          msg = nres.fail_msg
7331
          if msg:
7332
            self.warn.append("Can't get info from secondary node %s: %s" %
7333
                             (node, msg))
7334
          elif not isinstance(nres.payload.get('memory_free', None), int):
7335
            self.warn.append("Secondary node %s didn't return free"
7336
                             " memory information" % node)
7337
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7338
            self.warn.append("Not enough memory to failover instance to"
7339
                             " secondary node %s" % node)
7340

    
7341
    # NIC processing
7342
    self.nic_pnew = {}
7343
    self.nic_pinst = {}
7344
    for nic_op, nic_dict in self.op.nics:
7345
      if nic_op == constants.DDM_REMOVE:
7346
        if not instance.nics:
7347
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7348
        continue
7349
      if nic_op != constants.DDM_ADD:
7350
        # an existing nic
7351
        if nic_op < 0 or nic_op >= len(instance.nics):
7352
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7353
                                     " are 0 to %d" %
7354
                                     (nic_op, len(instance.nics)))
7355
        old_nic_params = instance.nics[nic_op].nicparams
7356
        old_nic_ip = instance.nics[nic_op].ip
7357
      else:
7358
        old_nic_params = {}
7359
        old_nic_ip = None
7360

    
7361
      update_params_dict = dict([(key, nic_dict[key])
7362
                                 for key in constants.NICS_PARAMETERS
7363
                                 if key in nic_dict])
7364

    
7365
      if 'bridge' in nic_dict:
7366
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7367

    
7368
      new_nic_params, new_filled_nic_params = \
7369
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7370
                                 cluster.nicparams[constants.PP_DEFAULT],
7371
                                 constants.NICS_PARAMETER_TYPES)
7372
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7373
      self.nic_pinst[nic_op] = new_nic_params
7374
      self.nic_pnew[nic_op] = new_filled_nic_params
7375
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7376

    
7377
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7378
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7379
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7380
        if msg:
7381
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7382
          if self.force:
7383
            self.warn.append(msg)
7384
          else:
7385
            raise errors.OpPrereqError(msg)
7386
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7387
        if 'ip' in nic_dict:
7388
          nic_ip = nic_dict['ip']
7389
        else:
7390
          nic_ip = old_nic_ip
7391
        if nic_ip is None:
7392
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7393
                                     ' on a routed nic')
7394
      if 'mac' in nic_dict:
7395
        nic_mac = nic_dict['mac']
7396
        if nic_mac is None:
7397
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7398
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7399
          # otherwise generate the mac
7400
          nic_dict['mac'] = self.cfg.GenerateMAC()
7401
        else:
7402
          # or validate/reserve the current one
7403
          if self.cfg.IsMacInUse(nic_mac):
7404
            raise errors.OpPrereqError("MAC address %s already in use"
7405
                                       " in cluster" % nic_mac)
7406

    
7407
    # DISK processing
7408
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7409
      raise errors.OpPrereqError("Disk operations not supported for"
7410
                                 " diskless instances")
7411
    for disk_op, disk_dict in self.op.disks:
7412
      if disk_op == constants.DDM_REMOVE:
7413
        if len(instance.disks) == 1:
7414
          raise errors.OpPrereqError("Cannot remove the last disk of"
7415
                                     " an instance")
7416
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7417
        ins_l = ins_l[pnode]
7418
        msg = ins_l.fail_msg
7419
        if msg:
7420
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7421
                                     (pnode, msg))
7422
        if instance.name in ins_l.payload:
7423
          raise errors.OpPrereqError("Instance is running, can't remove"
7424
                                     " disks.")
7425

    
7426
      if (disk_op == constants.DDM_ADD and
7427
          len(instance.nics) >= constants.MAX_DISKS):
7428
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7429
                                   " add more" % constants.MAX_DISKS)
7430
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7431
        # an existing disk
7432
        if disk_op < 0 or disk_op >= len(instance.disks):
7433
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7434
                                     " are 0 to %d" %
7435
                                     (disk_op, len(instance.disks)))
7436

    
7437
    return
7438

    
7439
  def Exec(self, feedback_fn):
7440
    """Modifies an instance.
7441

7442
    All parameters take effect only at the next restart of the instance.
7443

7444
    """
7445
    # Process here the warnings from CheckPrereq, as we don't have a
7446
    # feedback_fn there.
7447
    for warn in self.warn:
7448
      feedback_fn("WARNING: %s" % warn)
7449

    
7450
    result = []
7451
    instance = self.instance
7452
    cluster = self.cluster
7453
    # disk changes
7454
    for disk_op, disk_dict in self.op.disks:
7455
      if disk_op == constants.DDM_REMOVE:
7456
        # remove the last disk
7457
        device = instance.disks.pop()
7458
        device_idx = len(instance.disks)
7459
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7460
          self.cfg.SetDiskID(disk, node)
7461
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7462
          if msg:
7463
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7464
                            " continuing anyway", device_idx, node, msg)
7465
        result.append(("disk/%d" % device_idx, "remove"))
7466
      elif disk_op == constants.DDM_ADD:
7467
        # add a new disk
7468
        if instance.disk_template == constants.DT_FILE:
7469
          file_driver, file_path = instance.disks[0].logical_id
7470
          file_path = os.path.dirname(file_path)
7471
        else:
7472
          file_driver = file_path = None
7473
        disk_idx_base = len(instance.disks)
7474
        new_disk = _GenerateDiskTemplate(self,
7475
                                         instance.disk_template,
7476
                                         instance.name, instance.primary_node,
7477
                                         instance.secondary_nodes,
7478
                                         [disk_dict],
7479
                                         file_path,
7480
                                         file_driver,
7481
                                         disk_idx_base)[0]
7482
        instance.disks.append(new_disk)
7483
        info = _GetInstanceInfoText(instance)
7484

    
7485
        logging.info("Creating volume %s for instance %s",
7486
                     new_disk.iv_name, instance.name)
7487
        # Note: this needs to be kept in sync with _CreateDisks
7488
        #HARDCODE
7489
        for node in instance.all_nodes:
7490
          f_create = node == instance.primary_node
7491
          try:
7492
            _CreateBlockDev(self, node, instance, new_disk,
7493
                            f_create, info, f_create)
7494
          except errors.OpExecError, err:
7495
            self.LogWarning("Failed to create volume %s (%s) on"
7496
                            " node %s: %s",
7497
                            new_disk.iv_name, new_disk, node, err)
7498
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7499
                       (new_disk.size, new_disk.mode)))
7500
      else:
7501
        # change a given disk
7502
        instance.disks[disk_op].mode = disk_dict['mode']
7503
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7504
    # NIC changes
7505
    for nic_op, nic_dict in self.op.nics:
7506
      if nic_op == constants.DDM_REMOVE:
7507
        # remove the last nic
7508
        del instance.nics[-1]
7509
        result.append(("nic.%d" % len(instance.nics), "remove"))
7510
      elif nic_op == constants.DDM_ADD:
7511
        # mac and bridge should be set, by now
7512
        mac = nic_dict['mac']
7513
        ip = nic_dict.get('ip', None)
7514
        nicparams = self.nic_pinst[constants.DDM_ADD]
7515
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7516
        instance.nics.append(new_nic)
7517
        result.append(("nic.%d" % (len(instance.nics) - 1),
7518
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7519
                       (new_nic.mac, new_nic.ip,
7520
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7521
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7522
                       )))
7523
      else:
7524
        for key in 'mac', 'ip':
7525
          if key in nic_dict:
7526
            setattr(instance.nics[nic_op], key, nic_dict[key])
7527
        if nic_op in self.nic_pnew:
7528
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7529
        for key, val in nic_dict.iteritems():
7530
          result.append(("nic.%s/%d" % (key, nic_op), val))
7531

    
7532
    # hvparams changes
7533
    if self.op.hvparams:
7534
      instance.hvparams = self.hv_inst
7535
      for key, val in self.op.hvparams.iteritems():
7536
        result.append(("hv/%s" % key, val))
7537

    
7538
    # beparams changes
7539
    if self.op.beparams:
7540
      instance.beparams = self.be_inst
7541
      for key, val in self.op.beparams.iteritems():
7542
        result.append(("be/%s" % key, val))
7543

    
7544
    self.cfg.Update(instance)
7545

    
7546
    return result
7547

    
7548

    
7549
class LUQueryExports(NoHooksLU):
7550
  """Query the exports list
7551

7552
  """
7553
  _OP_REQP = ['nodes']
7554
  REQ_BGL = False
7555

    
7556
  def ExpandNames(self):
7557
    self.needed_locks = {}
7558
    self.share_locks[locking.LEVEL_NODE] = 1
7559
    if not self.op.nodes:
7560
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7561
    else:
7562
      self.needed_locks[locking.LEVEL_NODE] = \
7563
        _GetWantedNodes(self, self.op.nodes)
7564

    
7565
  def CheckPrereq(self):
7566
    """Check prerequisites.
7567

7568
    """
7569
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7570

    
7571
  def Exec(self, feedback_fn):
7572
    """Compute the list of all the exported system images.
7573

7574
    @rtype: dict
7575
    @return: a dictionary with the structure node->(export-list)
7576
        where export-list is a list of the instances exported on
7577
        that node.
7578

7579
    """
7580
    rpcresult = self.rpc.call_export_list(self.nodes)
7581
    result = {}
7582
    for node in rpcresult:
7583
      if rpcresult[node].fail_msg:
7584
        result[node] = False
7585
      else:
7586
        result[node] = rpcresult[node].payload
7587

    
7588
    return result
7589

    
7590

    
7591
class LUExportInstance(LogicalUnit):
7592
  """Export an instance to an image in the cluster.
7593

7594
  """
7595
  HPATH = "instance-export"
7596
  HTYPE = constants.HTYPE_INSTANCE
7597
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7598
  REQ_BGL = False
7599

    
7600
  def ExpandNames(self):
7601
    self._ExpandAndLockInstance()
7602
    # FIXME: lock only instance primary and destination node
7603
    #
7604
    # Sad but true, for now we have do lock all nodes, as we don't know where
7605
    # the previous export might be, and and in this LU we search for it and
7606
    # remove it from its current node. In the future we could fix this by:
7607
    #  - making a tasklet to search (share-lock all), then create the new one,
7608
    #    then one to remove, after
7609
    #  - removing the removal operation altogether
7610
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7611

    
7612
  def DeclareLocks(self, level):
7613
    """Last minute lock declaration."""
7614
    # All nodes are locked anyway, so nothing to do here.
7615

    
7616
  def BuildHooksEnv(self):
7617
    """Build hooks env.
7618

7619
    This will run on the master, primary node and target node.
7620

7621
    """
7622
    env = {
7623
      "EXPORT_NODE": self.op.target_node,
7624
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7625
      }
7626
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7627
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7628
          self.op.target_node]
7629
    return env, nl, nl
7630

    
7631
  def CheckPrereq(self):
7632
    """Check prerequisites.
7633

7634
    This checks that the instance and node names are valid.
7635

7636
    """
7637
    instance_name = self.op.instance_name
7638
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7639
    assert self.instance is not None, \
7640
          "Cannot retrieve locked instance %s" % self.op.instance_name
7641
    _CheckNodeOnline(self, self.instance.primary_node)
7642

    
7643
    self.dst_node = self.cfg.GetNodeInfo(
7644
      self.cfg.ExpandNodeName(self.op.target_node))
7645

    
7646
    if self.dst_node is None:
7647
      # This is wrong node name, not a non-locked node
7648
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7649
    _CheckNodeOnline(self, self.dst_node.name)
7650
    _CheckNodeNotDrained(self, self.dst_node.name)
7651

    
7652
    # instance disk type verification
7653
    for disk in self.instance.disks:
7654
      if disk.dev_type == constants.LD_FILE:
7655
        raise errors.OpPrereqError("Export not supported for instances with"
7656
                                   " file-based disks")
7657

    
7658
  def Exec(self, feedback_fn):
7659
    """Export an instance to an image in the cluster.
7660

7661
    """
7662
    instance = self.instance
7663
    dst_node = self.dst_node
7664
    src_node = instance.primary_node
7665

    
7666
    if self.op.shutdown:
7667
      # shutdown the instance, but not the disks
7668
      feedback_fn("Shutting down instance %s" % instance.name)
7669
      result = self.rpc.call_instance_shutdown(src_node, instance)
7670
      result.Raise("Could not shutdown instance %s on"
7671
                   " node %s" % (instance.name, src_node))
7672

    
7673
    vgname = self.cfg.GetVGName()
7674

    
7675
    snap_disks = []
7676

    
7677
    # set the disks ID correctly since call_instance_start needs the
7678
    # correct drbd minor to create the symlinks
7679
    for disk in instance.disks:
7680
      self.cfg.SetDiskID(disk, src_node)
7681

    
7682
    # per-disk results
7683
    dresults = []
7684
    try:
7685
      for idx, disk in enumerate(instance.disks):
7686
        feedback_fn("Creating a snapshot of disk/%s on node %s" %
7687
                    (idx, src_node))
7688

    
7689
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7690
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7691
        msg = result.fail_msg
7692
        if msg:
7693
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7694
                          idx, src_node, msg)
7695
          snap_disks.append(False)
7696
        else:
7697
          disk_id = (vgname, result.payload)
7698
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7699
                                 logical_id=disk_id, physical_id=disk_id,
7700
                                 iv_name=disk.iv_name)
7701
          snap_disks.append(new_dev)
7702

    
7703
    finally:
7704
      if self.op.shutdown and instance.admin_up:
7705
        feedback_fn("Starting instance %s" % instance.name)
7706
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7707
        msg = result.fail_msg
7708
        if msg:
7709
          _ShutdownInstanceDisks(self, instance)
7710
          raise errors.OpExecError("Could not start instance: %s" % msg)
7711

    
7712
    # TODO: check for size
7713

    
7714
    cluster_name = self.cfg.GetClusterName()
7715
    for idx, dev in enumerate(snap_disks):
7716
      feedback_fn("Exporting snapshot %s from %s to %s" %
7717
                  (idx, src_node, dst_node.name))
7718
      if dev:
7719
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7720
                                               instance, cluster_name, idx)
7721
        msg = result.fail_msg
7722
        if msg:
7723
          self.LogWarning("Could not export disk/%s from node %s to"
7724
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7725
          dresults.append(False)
7726
        else:
7727
          dresults.append(True)
7728
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7729
        if msg:
7730
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7731
                          " %s: %s", idx, src_node, msg)
7732
      else:
7733
        dresults.append(False)
7734

    
7735
    feedback_fn("Finalizing export on %s" % dst_node.name)
7736
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7737
    fin_resu = True
7738
    msg = result.fail_msg
7739
    if msg:
7740
      self.LogWarning("Could not finalize export for instance %s"
7741
                      " on node %s: %s", instance.name, dst_node.name, msg)
7742
      fin_resu = False
7743

    
7744
    nodelist = self.cfg.GetNodeList()
7745
    nodelist.remove(dst_node.name)
7746

    
7747
    # on one-node clusters nodelist will be empty after the removal
7748
    # if we proceed the backup would be removed because OpQueryExports
7749
    # substitutes an empty list with the full cluster node list.
7750
    iname = instance.name
7751
    if nodelist:
7752
      feedback_fn("Removing old exports for instance %s" % iname)
7753
      exportlist = self.rpc.call_export_list(nodelist)
7754
      for node in exportlist:
7755
        if exportlist[node].fail_msg:
7756
          continue
7757
        if iname in exportlist[node].payload:
7758
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7759
          if msg:
7760
            self.LogWarning("Could not remove older export for instance %s"
7761
                            " on node %s: %s", iname, node, msg)
7762
    return fin_resu, dresults
7763

    
7764

    
7765
class LURemoveExport(NoHooksLU):
7766
  """Remove exports related to the named instance.
7767

7768
  """
7769
  _OP_REQP = ["instance_name"]
7770
  REQ_BGL = False
7771

    
7772
  def ExpandNames(self):
7773
    self.needed_locks = {}
7774
    # We need all nodes to be locked in order for RemoveExport to work, but we
7775
    # don't need to lock the instance itself, as nothing will happen to it (and
7776
    # we can remove exports also for a removed instance)
7777
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7778

    
7779
  def CheckPrereq(self):
7780
    """Check prerequisites.
7781
    """
7782
    pass
7783

    
7784
  def Exec(self, feedback_fn):
7785
    """Remove any export.
7786

7787
    """
7788
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7789
    # If the instance was not found we'll try with the name that was passed in.
7790
    # This will only work if it was an FQDN, though.
7791
    fqdn_warn = False
7792
    if not instance_name:
7793
      fqdn_warn = True
7794
      instance_name = self.op.instance_name
7795

    
7796
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7797
    exportlist = self.rpc.call_export_list(locked_nodes)
7798
    found = False
7799
    for node in exportlist:
7800
      msg = exportlist[node].fail_msg
7801
      if msg:
7802
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7803
        continue
7804
      if instance_name in exportlist[node].payload:
7805
        found = True
7806
        result = self.rpc.call_export_remove(node, instance_name)
7807
        msg = result.fail_msg
7808
        if msg:
7809
          logging.error("Could not remove export for instance %s"
7810
                        " on node %s: %s", instance_name, node, msg)
7811

    
7812
    if fqdn_warn and not found:
7813
      feedback_fn("Export not found. If trying to remove an export belonging"
7814
                  " to a deleted instance please use its Fully Qualified"
7815
                  " Domain Name.")
7816

    
7817

    
7818
class TagsLU(NoHooksLU):
7819
  """Generic tags LU.
7820

7821
  This is an abstract class which is the parent of all the other tags LUs.
7822

7823
  """
7824

    
7825
  def ExpandNames(self):
7826
    self.needed_locks = {}
7827
    if self.op.kind == constants.TAG_NODE:
7828
      name = self.cfg.ExpandNodeName(self.op.name)
7829
      if name is None:
7830
        raise errors.OpPrereqError("Invalid node name (%s)" %
7831
                                   (self.op.name,))
7832
      self.op.name = name
7833
      self.needed_locks[locking.LEVEL_NODE] = name
7834
    elif self.op.kind == constants.TAG_INSTANCE:
7835
      name = self.cfg.ExpandInstanceName(self.op.name)
7836
      if name is None:
7837
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7838
                                   (self.op.name,))
7839
      self.op.name = name
7840
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7841

    
7842
  def CheckPrereq(self):
7843
    """Check prerequisites.
7844

7845
    """
7846
    if self.op.kind == constants.TAG_CLUSTER:
7847
      self.target = self.cfg.GetClusterInfo()
7848
    elif self.op.kind == constants.TAG_NODE:
7849
      self.target = self.cfg.GetNodeInfo(self.op.name)
7850
    elif self.op.kind == constants.TAG_INSTANCE:
7851
      self.target = self.cfg.GetInstanceInfo(self.op.name)
7852
    else:
7853
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7854
                                 str(self.op.kind))
7855

    
7856

    
7857
class LUGetTags(TagsLU):
7858
  """Returns the tags of a given object.
7859

7860
  """
7861
  _OP_REQP = ["kind", "name"]
7862
  REQ_BGL = False
7863

    
7864
  def Exec(self, feedback_fn):
7865
    """Returns the tag list.
7866

7867
    """
7868
    return list(self.target.GetTags())
7869

    
7870

    
7871
class LUSearchTags(NoHooksLU):
7872
  """Searches the tags for a given pattern.
7873

7874
  """
7875
  _OP_REQP = ["pattern"]
7876
  REQ_BGL = False
7877

    
7878
  def ExpandNames(self):
7879
    self.needed_locks = {}
7880

    
7881
  def CheckPrereq(self):
7882
    """Check prerequisites.
7883

7884
    This checks the pattern passed for validity by compiling it.
7885

7886
    """
7887
    try:
7888
      self.re = re.compile(self.op.pattern)
7889
    except re.error, err:
7890
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7891
                                 (self.op.pattern, err))
7892

    
7893
  def Exec(self, feedback_fn):
7894
    """Returns the tag list.
7895

7896
    """
7897
    cfg = self.cfg
7898
    tgts = [("/cluster", cfg.GetClusterInfo())]
7899
    ilist = cfg.GetAllInstancesInfo().values()
7900
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7901
    nlist = cfg.GetAllNodesInfo().values()
7902
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7903
    results = []
7904
    for path, target in tgts:
7905
      for tag in target.GetTags():
7906
        if self.re.search(tag):
7907
          results.append((path, tag))
7908
    return results
7909

    
7910

    
7911
class LUAddTags(TagsLU):
7912
  """Sets a tag on a given object.
7913

7914
  """
7915
  _OP_REQP = ["kind", "name", "tags"]
7916
  REQ_BGL = False
7917

    
7918
  def CheckPrereq(self):
7919
    """Check prerequisites.
7920

7921
    This checks the type and length of the tag name and value.
7922

7923
    """
7924
    TagsLU.CheckPrereq(self)
7925
    for tag in self.op.tags:
7926
      objects.TaggableObject.ValidateTag(tag)
7927

    
7928
  def Exec(self, feedback_fn):
7929
    """Sets the tag.
7930

7931
    """
7932
    try:
7933
      for tag in self.op.tags:
7934
        self.target.AddTag(tag)
7935
    except errors.TagError, err:
7936
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7937
    try:
7938
      self.cfg.Update(self.target)
7939
    except errors.ConfigurationError:
7940
      raise errors.OpRetryError("There has been a modification to the"
7941
                                " config file and the operation has been"
7942
                                " aborted. Please retry.")
7943

    
7944

    
7945
class LUDelTags(TagsLU):
7946
  """Delete a list of tags from a given object.
7947

7948
  """
7949
  _OP_REQP = ["kind", "name", "tags"]
7950
  REQ_BGL = False
7951

    
7952
  def CheckPrereq(self):
7953
    """Check prerequisites.
7954

7955
    This checks that we have the given tag.
7956

7957
    """
7958
    TagsLU.CheckPrereq(self)
7959
    for tag in self.op.tags:
7960
      objects.TaggableObject.ValidateTag(tag)
7961
    del_tags = frozenset(self.op.tags)
7962
    cur_tags = self.target.GetTags()
7963
    if not del_tags <= cur_tags:
7964
      diff_tags = del_tags - cur_tags
7965
      diff_names = ["'%s'" % tag for tag in diff_tags]
7966
      diff_names.sort()
7967
      raise errors.OpPrereqError("Tag(s) %s not found" %
7968
                                 (",".join(diff_names)))
7969

    
7970
  def Exec(self, feedback_fn):
7971
    """Remove the tag from the object.
7972

7973
    """
7974
    for tag in self.op.tags:
7975
      self.target.RemoveTag(tag)
7976
    try:
7977
      self.cfg.Update(self.target)
7978
    except errors.ConfigurationError:
7979
      raise errors.OpRetryError("There has been a modification to the"
7980
                                " config file and the operation has been"
7981
                                " aborted. Please retry.")
7982

    
7983

    
7984
class LUTestDelay(NoHooksLU):
7985
  """Sleep for a specified amount of time.
7986

7987
  This LU sleeps on the master and/or nodes for a specified amount of
7988
  time.
7989

7990
  """
7991
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7992
  REQ_BGL = False
7993

    
7994
  def ExpandNames(self):
7995
    """Expand names and set required locks.
7996

7997
    This expands the node list, if any.
7998

7999
    """
8000
    self.needed_locks = {}
8001
    if self.op.on_nodes:
8002
      # _GetWantedNodes can be used here, but is not always appropriate to use
8003
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
8004
      # more information.
8005
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
8006
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
8007

    
8008
  def CheckPrereq(self):
8009
    """Check prerequisites.
8010

8011
    """
8012

    
8013
  def Exec(self, feedback_fn):
8014
    """Do the actual sleep.
8015

8016
    """
8017
    if self.op.on_master:
8018
      if not utils.TestDelay(self.op.duration):
8019
        raise errors.OpExecError("Error during master delay test")
8020
    if self.op.on_nodes:
8021
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
8022
      for node, node_result in result.items():
8023
        node_result.Raise("Failure during rpc call to node %s" % node)
8024

    
8025

    
8026
class IAllocator(object):
8027
  """IAllocator framework.
8028

8029
  An IAllocator instance has three sets of attributes:
8030
    - cfg that is needed to query the cluster
8031
    - input data (all members of the _KEYS class attribute are required)
8032
    - four buffer attributes (in|out_data|text), that represent the
8033
      input (to the external script) in text and data structure format,
8034
      and the output from it, again in two formats
8035
    - the result variables from the script (success, info, nodes) for
8036
      easy usage
8037

8038
  """
8039
  _ALLO_KEYS = [
8040
    "mem_size", "disks", "disk_template",
8041
    "os", "tags", "nics", "vcpus", "hypervisor",
8042
    ]
8043
  _RELO_KEYS = [
8044
    "relocate_from",
8045
    ]
8046

    
8047
  def __init__(self, cfg, rpc, mode, name, **kwargs):
8048
    self.cfg = cfg
8049
    self.rpc = rpc
8050
    # init buffer variables
8051
    self.in_text = self.out_text = self.in_data = self.out_data = None
8052
    # init all input fields so that pylint is happy
8053
    self.mode = mode
8054
    self.name = name
8055
    self.mem_size = self.disks = self.disk_template = None
8056
    self.os = self.tags = self.nics = self.vcpus = None
8057
    self.hypervisor = None
8058
    self.relocate_from = None
8059
    # computed fields
8060
    self.required_nodes = None
8061
    # init result fields
8062
    self.success = self.info = self.nodes = None
8063
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8064
      keyset = self._ALLO_KEYS
8065
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8066
      keyset = self._RELO_KEYS
8067
    else:
8068
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8069
                                   " IAllocator" % self.mode)
8070
    for key in kwargs:
8071
      if key not in keyset:
8072
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8073
                                     " IAllocator" % key)
8074
      setattr(self, key, kwargs[key])
8075
    for key in keyset:
8076
      if key not in kwargs:
8077
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8078
                                     " IAllocator" % key)
8079
    self._BuildInputData()
8080

    
8081
  def _ComputeClusterData(self):
8082
    """Compute the generic allocator input data.
8083

8084
    This is the data that is independent of the actual operation.
8085

8086
    """
8087
    cfg = self.cfg
8088
    cluster_info = cfg.GetClusterInfo()
8089
    # cluster data
8090
    data = {
8091
      "version": constants.IALLOCATOR_VERSION,
8092
      "cluster_name": cfg.GetClusterName(),
8093
      "cluster_tags": list(cluster_info.GetTags()),
8094
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8095
      # we don't have job IDs
8096
      }
8097
    iinfo = cfg.GetAllInstancesInfo().values()
8098
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8099

    
8100
    # node data
8101
    node_results = {}
8102
    node_list = cfg.GetNodeList()
8103

    
8104
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8105
      hypervisor_name = self.hypervisor
8106
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8107
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8108

    
8109
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8110
                                        hypervisor_name)
8111
    node_iinfo = \
8112
      self.rpc.call_all_instances_info(node_list,
8113
                                       cluster_info.enabled_hypervisors)
8114
    for nname, nresult in node_data.items():
8115
      # first fill in static (config-based) values
8116
      ninfo = cfg.GetNodeInfo(nname)
8117
      pnr = {
8118
        "tags": list(ninfo.GetTags()),
8119
        "primary_ip": ninfo.primary_ip,
8120
        "secondary_ip": ninfo.secondary_ip,
8121
        "offline": ninfo.offline,
8122
        "drained": ninfo.drained,
8123
        "master_candidate": ninfo.master_candidate,
8124
        }
8125

    
8126
      if not (ninfo.offline or ninfo.drained):
8127
        nresult.Raise("Can't get data for node %s" % nname)
8128
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8129
                                nname)
8130
        remote_info = nresult.payload
8131

    
8132
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8133
                     'vg_size', 'vg_free', 'cpu_total']:
8134
          if attr not in remote_info:
8135
            raise errors.OpExecError("Node '%s' didn't return attribute"
8136
                                     " '%s'" % (nname, attr))
8137
          if not isinstance(remote_info[attr], int):
8138
            raise errors.OpExecError("Node '%s' returned invalid value"
8139
                                     " for '%s': %s" %
8140
                                     (nname, attr, remote_info[attr]))
8141
        # compute memory used by primary instances
8142
        i_p_mem = i_p_up_mem = 0
8143
        for iinfo, beinfo in i_list:
8144
          if iinfo.primary_node == nname:
8145
            i_p_mem += beinfo[constants.BE_MEMORY]
8146
            if iinfo.name not in node_iinfo[nname].payload:
8147
              i_used_mem = 0
8148
            else:
8149
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8150
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8151
            remote_info['memory_free'] -= max(0, i_mem_diff)
8152

    
8153
            if iinfo.admin_up:
8154
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8155

    
8156
        # compute memory used by instances
8157
        pnr_dyn = {
8158
          "total_memory": remote_info['memory_total'],
8159
          "reserved_memory": remote_info['memory_dom0'],
8160
          "free_memory": remote_info['memory_free'],
8161
          "total_disk": remote_info['vg_size'],
8162
          "free_disk": remote_info['vg_free'],
8163
          "total_cpus": remote_info['cpu_total'],
8164
          "i_pri_memory": i_p_mem,
8165
          "i_pri_up_memory": i_p_up_mem,
8166
          }
8167
        pnr.update(pnr_dyn)
8168

    
8169
      node_results[nname] = pnr
8170
    data["nodes"] = node_results
8171

    
8172
    # instance data
8173
    instance_data = {}
8174
    for iinfo, beinfo in i_list:
8175
      nic_data = []
8176
      for nic in iinfo.nics:
8177
        filled_params = objects.FillDict(
8178
            cluster_info.nicparams[constants.PP_DEFAULT],
8179
            nic.nicparams)
8180
        nic_dict = {"mac": nic.mac,
8181
                    "ip": nic.ip,
8182
                    "mode": filled_params[constants.NIC_MODE],
8183
                    "link": filled_params[constants.NIC_LINK],
8184
                   }
8185
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8186
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8187
        nic_data.append(nic_dict)
8188
      pir = {
8189
        "tags": list(iinfo.GetTags()),
8190
        "admin_up": iinfo.admin_up,
8191
        "vcpus": beinfo[constants.BE_VCPUS],
8192
        "memory": beinfo[constants.BE_MEMORY],
8193
        "os": iinfo.os,
8194
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8195
        "nics": nic_data,
8196
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8197
        "disk_template": iinfo.disk_template,
8198
        "hypervisor": iinfo.hypervisor,
8199
        }
8200
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8201
                                                 pir["disks"])
8202
      instance_data[iinfo.name] = pir
8203

    
8204
    data["instances"] = instance_data
8205

    
8206
    self.in_data = data
8207

    
8208
  def _AddNewInstance(self):
8209
    """Add new instance data to allocator structure.
8210

8211
    This in combination with _AllocatorGetClusterData will create the
8212
    correct structure needed as input for the allocator.
8213

8214
    The checks for the completeness of the opcode must have already been
8215
    done.
8216

8217
    """
8218
    data = self.in_data
8219

    
8220
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8221

    
8222
    if self.disk_template in constants.DTS_NET_MIRROR:
8223
      self.required_nodes = 2
8224
    else:
8225
      self.required_nodes = 1
8226
    request = {
8227
      "type": "allocate",
8228
      "name": self.name,
8229
      "disk_template": self.disk_template,
8230
      "tags": self.tags,
8231
      "os": self.os,
8232
      "vcpus": self.vcpus,
8233
      "memory": self.mem_size,
8234
      "disks": self.disks,
8235
      "disk_space_total": disk_space,
8236
      "nics": self.nics,
8237
      "required_nodes": self.required_nodes,
8238
      }
8239
    data["request"] = request
8240

    
8241
  def _AddRelocateInstance(self):
8242
    """Add relocate instance data to allocator structure.
8243

8244
    This in combination with _IAllocatorGetClusterData will create the
8245
    correct structure needed as input for the allocator.
8246

8247
    The checks for the completeness of the opcode must have already been
8248
    done.
8249

8250
    """
8251
    instance = self.cfg.GetInstanceInfo(self.name)
8252
    if instance is None:
8253
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8254
                                   " IAllocator" % self.name)
8255

    
8256
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8257
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8258

    
8259
    if len(instance.secondary_nodes) != 1:
8260
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
8261

    
8262
    self.required_nodes = 1
8263
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8264
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8265

    
8266
    request = {
8267
      "type": "relocate",
8268
      "name": self.name,
8269
      "disk_space_total": disk_space,
8270
      "required_nodes": self.required_nodes,
8271
      "relocate_from": self.relocate_from,
8272
      }
8273
    self.in_data["request"] = request
8274

    
8275
  def _BuildInputData(self):
8276
    """Build input data structures.
8277

8278
    """
8279
    self._ComputeClusterData()
8280

    
8281
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8282
      self._AddNewInstance()
8283
    else:
8284
      self._AddRelocateInstance()
8285

    
8286
    self.in_text = serializer.Dump(self.in_data)
8287

    
8288
  def Run(self, name, validate=True, call_fn=None):
8289
    """Run an instance allocator and return the results.
8290

8291
    """
8292
    if call_fn is None:
8293
      call_fn = self.rpc.call_iallocator_runner
8294

    
8295
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8296
    result.Raise("Failure while running the iallocator script")
8297

    
8298
    self.out_text = result.payload
8299
    if validate:
8300
      self._ValidateResult()
8301

    
8302
  def _ValidateResult(self):
8303
    """Process the allocator results.
8304

8305
    This will process and if successful save the result in
8306
    self.out_data and the other parameters.
8307

8308
    """
8309
    try:
8310
      rdict = serializer.Load(self.out_text)
8311
    except Exception, err:
8312
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8313

    
8314
    if not isinstance(rdict, dict):
8315
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8316

    
8317
    for key in "success", "info", "nodes":
8318
      if key not in rdict:
8319
        raise errors.OpExecError("Can't parse iallocator results:"
8320
                                 " missing key '%s'" % key)
8321
      setattr(self, key, rdict[key])
8322

    
8323
    if not isinstance(rdict["nodes"], list):
8324
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8325
                               " is not a list")
8326
    self.out_data = rdict
8327

    
8328

    
8329
class LUTestAllocator(NoHooksLU):
8330
  """Run allocator tests.
8331

8332
  This LU runs the allocator tests
8333

8334
  """
8335
  _OP_REQP = ["direction", "mode", "name"]
8336

    
8337
  def CheckPrereq(self):
8338
    """Check prerequisites.
8339

8340
    This checks the opcode parameters depending on the director and mode test.
8341

8342
    """
8343
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8344
      for attr in ["name", "mem_size", "disks", "disk_template",
8345
                   "os", "tags", "nics", "vcpus"]:
8346
        if not hasattr(self.op, attr):
8347
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8348
                                     attr)
8349
      iname = self.cfg.ExpandInstanceName(self.op.name)
8350
      if iname is not None:
8351
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8352
                                   iname)
8353
      if not isinstance(self.op.nics, list):
8354
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8355
      for row in self.op.nics:
8356
        if (not isinstance(row, dict) or
8357
            "mac" not in row or
8358
            "ip" not in row or
8359
            "bridge" not in row):
8360
          raise errors.OpPrereqError("Invalid contents of the"
8361
                                     " 'nics' parameter")
8362
      if not isinstance(self.op.disks, list):
8363
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8364
      for row in self.op.disks:
8365
        if (not isinstance(row, dict) or
8366
            "size" not in row or
8367
            not isinstance(row["size"], int) or
8368
            "mode" not in row or
8369
            row["mode"] not in ['r', 'w']):
8370
          raise errors.OpPrereqError("Invalid contents of the"
8371
                                     " 'disks' parameter")
8372
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8373
        self.op.hypervisor = self.cfg.GetHypervisorType()
8374
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8375
      if not hasattr(self.op, "name"):
8376
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8377
      fname = self.cfg.ExpandInstanceName(self.op.name)
8378
      if fname is None:
8379
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8380
                                   self.op.name)
8381
      self.op.name = fname
8382
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8383
    else:
8384
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8385
                                 self.op.mode)
8386

    
8387
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8388
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8389
        raise errors.OpPrereqError("Missing allocator name")
8390
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8391
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8392
                                 self.op.direction)
8393

    
8394
  def Exec(self, feedback_fn):
8395
    """Run the allocator test.
8396

8397
    """
8398
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8399
      ial = IAllocator(self.cfg, self.rpc,
8400
                       mode=self.op.mode,
8401
                       name=self.op.name,
8402
                       mem_size=self.op.mem_size,
8403
                       disks=self.op.disks,
8404
                       disk_template=self.op.disk_template,
8405
                       os=self.op.os,
8406
                       tags=self.op.tags,
8407
                       nics=self.op.nics,
8408
                       vcpus=self.op.vcpus,
8409
                       hypervisor=self.op.hypervisor,
8410
                       )
8411
    else:
8412
      ial = IAllocator(self.cfg, self.rpc,
8413
                       mode=self.op.mode,
8414
                       name=self.op.name,
8415
                       relocate_from=list(self.relocate_from),
8416
                       )
8417

    
8418
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8419
      result = ial.in_text
8420
    else:
8421
      ial.Run(self.op.allocator, validate=False)
8422
      result = ial.out_text
8423
    return result