Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 76aef8fc

History | View | Annotate | Download (278.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool()
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _CheckNicsBridgesExist(lu, target_nics, target_node,
690
                               profile=constants.PP_DEFAULT):
691
  """Check that the brigdes needed by a list of nics exist.
692

693
  """
694
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
695
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
696
                for nic in target_nics]
697
  brlist = [params[constants.NIC_LINK] for params in paramslist
698
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
699
  if brlist:
700
    result = lu.rpc.call_bridges_exist(target_node, brlist)
701
    result.Raise("Error checking bridges on destination node '%s'" %
702
                 target_node, prereq=True)
703

    
704

    
705
def _CheckInstanceBridgesExist(lu, instance, node=None):
706
  """Check that the brigdes needed by an instance exist.
707

708
  """
709
  if node is None:
710
    node = instance.primary_node
711
  _CheckNicsBridgesExist(lu, instance.nics, node)
712

    
713

    
714
def _GetNodeInstancesInner(cfg, fn):
715
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
716

    
717

    
718
def _GetNodeInstances(cfg, node_name):
719
  """Returns a list of all primary and secondary instances on a node.
720

721
  """
722

    
723
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
724

    
725

    
726
def _GetNodePrimaryInstances(cfg, node_name):
727
  """Returns primary instances on a node.
728

729
  """
730
  return _GetNodeInstancesInner(cfg,
731
                                lambda inst: node_name == inst.primary_node)
732

    
733

    
734
def _GetNodeSecondaryInstances(cfg, node_name):
735
  """Returns secondary instances on a node.
736

737
  """
738
  return _GetNodeInstancesInner(cfg,
739
                                lambda inst: node_name in inst.secondary_nodes)
740

    
741

    
742
def _GetStorageTypeArgs(cfg, storage_type):
743
  """Returns the arguments for a storage type.
744

745
  """
746
  # Special case for file storage
747
  if storage_type == constants.ST_FILE:
748
    # storage.FileStorage wants a list of storage directories
749
    return [[cfg.GetFileStorageDir()]]
750

    
751
  return []
752

    
753

    
754
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
755
  faulty = []
756

    
757
  for dev in instance.disks:
758
    cfg.SetDiskID(dev, node_name)
759

    
760
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
761
  result.Raise("Failed to get disk status from node %s" % node_name,
762
               prereq=prereq)
763

    
764
  for idx, bdev_status in enumerate(result.payload):
765
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
766
      faulty.append(idx)
767

    
768
  return faulty
769

    
770

    
771
class LUPostInitCluster(LogicalUnit):
772
  """Logical unit for running hooks after cluster initialization.
773

774
  """
775
  HPATH = "cluster-init"
776
  HTYPE = constants.HTYPE_CLUSTER
777
  _OP_REQP = []
778

    
779
  def BuildHooksEnv(self):
780
    """Build hooks env.
781

782
    """
783
    env = {"OP_TARGET": self.cfg.GetClusterName()}
784
    mn = self.cfg.GetMasterNode()
785
    return env, [], [mn]
786

    
787
  def CheckPrereq(self):
788
    """No prerequisites to check.
789

790
    """
791
    return True
792

    
793
  def Exec(self, feedback_fn):
794
    """Nothing to do.
795

796
    """
797
    return True
798

    
799

    
800
class LUDestroyCluster(NoHooksLU):
801
  """Logical unit for destroying the cluster.
802

803
  """
804
  _OP_REQP = []
805

    
806
  def CheckPrereq(self):
807
    """Check prerequisites.
808

809
    This checks whether the cluster is empty.
810

811
    Any errors are signaled by raising errors.OpPrereqError.
812

813
    """
814
    master = self.cfg.GetMasterNode()
815

    
816
    nodelist = self.cfg.GetNodeList()
817
    if len(nodelist) != 1 or nodelist[0] != master:
818
      raise errors.OpPrereqError("There are still %d node(s) in"
819
                                 " this cluster." % (len(nodelist) - 1))
820
    instancelist = self.cfg.GetInstanceList()
821
    if instancelist:
822
      raise errors.OpPrereqError("There are still %d instance(s) in"
823
                                 " this cluster." % len(instancelist))
824

    
825
  def Exec(self, feedback_fn):
826
    """Destroys the cluster.
827

828
    """
829
    master = self.cfg.GetMasterNode()
830
    result = self.rpc.call_node_stop_master(master, False)
831
    result.Raise("Could not disable the master role")
832
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
833
    utils.CreateBackup(priv_key)
834
    utils.CreateBackup(pub_key)
835
    return master
836

    
837

    
838
class LUVerifyCluster(LogicalUnit):
839
  """Verifies the cluster status.
840

841
  """
842
  HPATH = "cluster-verify"
843
  HTYPE = constants.HTYPE_CLUSTER
844
  _OP_REQP = ["skip_checks"]
845
  REQ_BGL = False
846

    
847
  def ExpandNames(self):
848
    self.needed_locks = {
849
      locking.LEVEL_NODE: locking.ALL_SET,
850
      locking.LEVEL_INSTANCE: locking.ALL_SET,
851
    }
852
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
853

    
854
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
855
                  node_result, feedback_fn, master_files,
856
                  drbd_map, vg_name):
857
    """Run multiple tests against a node.
858

859
    Test list:
860

861
      - compares ganeti version
862
      - checks vg existence and size > 20G
863
      - checks config file checksum
864
      - checks ssh to other nodes
865

866
    @type nodeinfo: L{objects.Node}
867
    @param nodeinfo: the node to check
868
    @param file_list: required list of files
869
    @param local_cksum: dictionary of local files and their checksums
870
    @param node_result: the results from the node
871
    @param feedback_fn: function used to accumulate results
872
    @param master_files: list of files that only masters should have
873
    @param drbd_map: the useddrbd minors for this node, in
874
        form of minor: (instance, must_exist) which correspond to instances
875
        and their running status
876
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
877

878
    """
879
    node = nodeinfo.name
880

    
881
    # main result, node_result should be a non-empty dict
882
    if not node_result or not isinstance(node_result, dict):
883
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
884
      return True
885

    
886
    # compares ganeti version
887
    local_version = constants.PROTOCOL_VERSION
888
    remote_version = node_result.get('version', None)
889
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
890
            len(remote_version) == 2):
891
      feedback_fn("  - ERROR: connection to %s failed" % (node))
892
      return True
893

    
894
    if local_version != remote_version[0]:
895
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
896
                  " node %s %s" % (local_version, node, remote_version[0]))
897
      return True
898

    
899
    # node seems compatible, we can actually try to look into its results
900

    
901
    bad = False
902

    
903
    # full package version
904
    if constants.RELEASE_VERSION != remote_version[1]:
905
      feedback_fn("  - WARNING: software version mismatch: master %s,"
906
                  " node %s %s" %
907
                  (constants.RELEASE_VERSION, node, remote_version[1]))
908

    
909
    # checks vg existence and size > 20G
910
    if vg_name is not None:
911
      vglist = node_result.get(constants.NV_VGLIST, None)
912
      if not vglist:
913
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
914
                        (node,))
915
        bad = True
916
      else:
917
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
918
                                              constants.MIN_VG_SIZE)
919
        if vgstatus:
920
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
921
          bad = True
922

    
923
    # checks config file checksum
924

    
925
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
926
    if not isinstance(remote_cksum, dict):
927
      bad = True
928
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
929
    else:
930
      for file_name in file_list:
931
        node_is_mc = nodeinfo.master_candidate
932
        must_have_file = file_name not in master_files
933
        if file_name not in remote_cksum:
934
          if node_is_mc or must_have_file:
935
            bad = True
936
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
937
        elif remote_cksum[file_name] != local_cksum[file_name]:
938
          if node_is_mc or must_have_file:
939
            bad = True
940
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
941
          else:
942
            # not candidate and this is not a must-have file
943
            bad = True
944
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
945
                        " candidates (and the file is outdated)" % file_name)
946
        else:
947
          # all good, except non-master/non-must have combination
948
          if not node_is_mc and not must_have_file:
949
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
950
                        " candidates" % file_name)
951

    
952
    # checks ssh to any
953

    
954
    if constants.NV_NODELIST not in node_result:
955
      bad = True
956
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
957
    else:
958
      if node_result[constants.NV_NODELIST]:
959
        bad = True
960
        for node in node_result[constants.NV_NODELIST]:
961
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
962
                          (node, node_result[constants.NV_NODELIST][node]))
963

    
964
    if constants.NV_NODENETTEST not in node_result:
965
      bad = True
966
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
967
    else:
968
      if node_result[constants.NV_NODENETTEST]:
969
        bad = True
970
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
971
        for node in nlist:
972
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
973
                          (node, node_result[constants.NV_NODENETTEST][node]))
974

    
975
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
976
    if isinstance(hyp_result, dict):
977
      for hv_name, hv_result in hyp_result.iteritems():
978
        if hv_result is not None:
979
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
980
                      (hv_name, hv_result))
981

    
982
    # check used drbd list
983
    if vg_name is not None:
984
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
985
      if not isinstance(used_minors, (tuple, list)):
986
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
987
                    str(used_minors))
988
      else:
989
        for minor, (iname, must_exist) in drbd_map.items():
990
          if minor not in used_minors and must_exist:
991
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
992
                        " not active" % (minor, iname))
993
            bad = True
994
        for minor in used_minors:
995
          if minor not in drbd_map:
996
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
997
                        minor)
998
            bad = True
999

    
1000
    return bad
1001

    
1002
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1003
                      node_instance, feedback_fn, n_offline):
1004
    """Verify an instance.
1005

1006
    This function checks to see if the required block devices are
1007
    available on the instance's node.
1008

1009
    """
1010
    bad = False
1011

    
1012
    node_current = instanceconfig.primary_node
1013

    
1014
    node_vol_should = {}
1015
    instanceconfig.MapLVsByNode(node_vol_should)
1016

    
1017
    for node in node_vol_should:
1018
      if node in n_offline:
1019
        # ignore missing volumes on offline nodes
1020
        continue
1021
      for volume in node_vol_should[node]:
1022
        if node not in node_vol_is or volume not in node_vol_is[node]:
1023
          feedback_fn("  - ERROR: volume %s missing on node %s" %
1024
                          (volume, node))
1025
          bad = True
1026

    
1027
    if instanceconfig.admin_up:
1028
      if ((node_current not in node_instance or
1029
          not instance in node_instance[node_current]) and
1030
          node_current not in n_offline):
1031
        feedback_fn("  - ERROR: instance %s not running on node %s" %
1032
                        (instance, node_current))
1033
        bad = True
1034

    
1035
    for node in node_instance:
1036
      if (not node == node_current):
1037
        if instance in node_instance[node]:
1038
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
1039
                          (instance, node))
1040
          bad = True
1041

    
1042
    return bad
1043

    
1044
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
1045
    """Verify if there are any unknown volumes in the cluster.
1046

1047
    The .os, .swap and backup volumes are ignored. All other volumes are
1048
    reported as unknown.
1049

1050
    """
1051
    bad = False
1052

    
1053
    for node in node_vol_is:
1054
      for volume in node_vol_is[node]:
1055
        if node not in node_vol_should or volume not in node_vol_should[node]:
1056
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
1057
                      (volume, node))
1058
          bad = True
1059
    return bad
1060

    
1061
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
1062
    """Verify the list of running instances.
1063

1064
    This checks what instances are running but unknown to the cluster.
1065

1066
    """
1067
    bad = False
1068
    for node in node_instance:
1069
      for runninginstance in node_instance[node]:
1070
        if runninginstance not in instancelist:
1071
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
1072
                          (runninginstance, node))
1073
          bad = True
1074
    return bad
1075

    
1076
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
1077
    """Verify N+1 Memory Resilience.
1078

1079
    Check that if one single node dies we can still start all the instances it
1080
    was primary for.
1081

1082
    """
1083
    bad = False
1084

    
1085
    for node, nodeinfo in node_info.iteritems():
1086
      # This code checks that every node which is now listed as secondary has
1087
      # enough memory to host all instances it is supposed to should a single
1088
      # other node in the cluster fail.
1089
      # FIXME: not ready for failover to an arbitrary node
1090
      # FIXME: does not support file-backed instances
1091
      # WARNING: we currently take into account down instances as well as up
1092
      # ones, considering that even if they're down someone might want to start
1093
      # them even in the event of a node failure.
1094
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1095
        needed_mem = 0
1096
        for instance in instances:
1097
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1098
          if bep[constants.BE_AUTO_BALANCE]:
1099
            needed_mem += bep[constants.BE_MEMORY]
1100
        if nodeinfo['mfree'] < needed_mem:
1101
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1102
                      " failovers should node %s fail" % (node, prinode))
1103
          bad = True
1104
    return bad
1105

    
1106
  def CheckPrereq(self):
1107
    """Check prerequisites.
1108

1109
    Transform the list of checks we're going to skip into a set and check that
1110
    all its members are valid.
1111

1112
    """
1113
    self.skip_set = frozenset(self.op.skip_checks)
1114
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1115
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1116

    
1117
  def BuildHooksEnv(self):
1118
    """Build hooks env.
1119

1120
    Cluster-Verify hooks just ran in the post phase and their failure makes
1121
    the output be logged in the verify output and the verification to fail.
1122

1123
    """
1124
    all_nodes = self.cfg.GetNodeList()
1125
    env = {
1126
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1127
      }
1128
    for node in self.cfg.GetAllNodesInfo().values():
1129
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1130

    
1131
    return env, [], all_nodes
1132

    
1133
  def Exec(self, feedback_fn):
1134
    """Verify integrity of cluster, performing various test on nodes.
1135

1136
    """
1137
    bad = False
1138
    feedback_fn("* Verifying global settings")
1139
    for msg in self.cfg.VerifyConfig():
1140
      feedback_fn("  - ERROR: %s" % msg)
1141

    
1142
    vg_name = self.cfg.GetVGName()
1143
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1144
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1145
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1146
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1147
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1148
                        for iname in instancelist)
1149
    i_non_redundant = [] # Non redundant instances
1150
    i_non_a_balanced = [] # Non auto-balanced instances
1151
    n_offline = [] # List of offline nodes
1152
    n_drained = [] # List of nodes being drained
1153
    node_volume = {}
1154
    node_instance = {}
1155
    node_info = {}
1156
    instance_cfg = {}
1157

    
1158
    # FIXME: verify OS list
1159
    # do local checksums
1160
    master_files = [constants.CLUSTER_CONF_FILE]
1161

    
1162
    file_names = ssconf.SimpleStore().GetFileList()
1163
    file_names.append(constants.SSL_CERT_FILE)
1164
    file_names.append(constants.RAPI_CERT_FILE)
1165
    file_names.extend(master_files)
1166

    
1167
    local_checksums = utils.FingerprintFiles(file_names)
1168

    
1169
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1170
    node_verify_param = {
1171
      constants.NV_FILELIST: file_names,
1172
      constants.NV_NODELIST: [node.name for node in nodeinfo
1173
                              if not node.offline],
1174
      constants.NV_HYPERVISOR: hypervisors,
1175
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1176
                                  node.secondary_ip) for node in nodeinfo
1177
                                 if not node.offline],
1178
      constants.NV_INSTANCELIST: hypervisors,
1179
      constants.NV_VERSION: None,
1180
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1181
      }
1182
    if vg_name is not None:
1183
      node_verify_param[constants.NV_VGLIST] = None
1184
      node_verify_param[constants.NV_LVLIST] = vg_name
1185
      node_verify_param[constants.NV_DRBDLIST] = None
1186
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1187
                                           self.cfg.GetClusterName())
1188

    
1189
    cluster = self.cfg.GetClusterInfo()
1190
    master_node = self.cfg.GetMasterNode()
1191
    all_drbd_map = self.cfg.ComputeDRBDMap()
1192

    
1193
    for node_i in nodeinfo:
1194
      node = node_i.name
1195

    
1196
      if node_i.offline:
1197
        feedback_fn("* Skipping offline node %s" % (node,))
1198
        n_offline.append(node)
1199
        continue
1200

    
1201
      if node == master_node:
1202
        ntype = "master"
1203
      elif node_i.master_candidate:
1204
        ntype = "master candidate"
1205
      elif node_i.drained:
1206
        ntype = "drained"
1207
        n_drained.append(node)
1208
      else:
1209
        ntype = "regular"
1210
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1211

    
1212
      msg = all_nvinfo[node].fail_msg
1213
      if msg:
1214
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1215
        bad = True
1216
        continue
1217

    
1218
      nresult = all_nvinfo[node].payload
1219
      node_drbd = {}
1220
      for minor, instance in all_drbd_map[node].items():
1221
        if instance not in instanceinfo:
1222
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1223
                      instance)
1224
          # ghost instance should not be running, but otherwise we
1225
          # don't give double warnings (both ghost instance and
1226
          # unallocated minor in use)
1227
          node_drbd[minor] = (instance, False)
1228
        else:
1229
          instance = instanceinfo[instance]
1230
          node_drbd[minor] = (instance.name, instance.admin_up)
1231
      result = self._VerifyNode(node_i, file_names, local_checksums,
1232
                                nresult, feedback_fn, master_files,
1233
                                node_drbd, vg_name)
1234
      bad = bad or result
1235

    
1236
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1237
      if vg_name is None:
1238
        node_volume[node] = {}
1239
      elif isinstance(lvdata, basestring):
1240
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1241
                    (node, utils.SafeEncode(lvdata)))
1242
        bad = True
1243
        node_volume[node] = {}
1244
      elif not isinstance(lvdata, dict):
1245
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1246
        bad = True
1247
        continue
1248
      else:
1249
        node_volume[node] = lvdata
1250

    
1251
      # node_instance
1252
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1253
      if not isinstance(idata, list):
1254
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1255
                    (node,))
1256
        bad = True
1257
        continue
1258

    
1259
      node_instance[node] = idata
1260

    
1261
      # node_info
1262
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1263
      if not isinstance(nodeinfo, dict):
1264
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1265
        bad = True
1266
        continue
1267

    
1268
      try:
1269
        node_info[node] = {
1270
          "mfree": int(nodeinfo['memory_free']),
1271
          "pinst": [],
1272
          "sinst": [],
1273
          # dictionary holding all instances this node is secondary for,
1274
          # grouped by their primary node. Each key is a cluster node, and each
1275
          # value is a list of instances which have the key as primary and the
1276
          # current node as secondary.  this is handy to calculate N+1 memory
1277
          # availability if you can only failover from a primary to its
1278
          # secondary.
1279
          "sinst-by-pnode": {},
1280
        }
1281
        # FIXME: devise a free space model for file based instances as well
1282
        if vg_name is not None:
1283
          if (constants.NV_VGLIST not in nresult or
1284
              vg_name not in nresult[constants.NV_VGLIST]):
1285
            feedback_fn("  - ERROR: node %s didn't return data for the"
1286
                        " volume group '%s' - it is either missing or broken" %
1287
                        (node, vg_name))
1288
            bad = True
1289
            continue
1290
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1291
      except (ValueError, KeyError):
1292
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1293
                    " from node %s" % (node,))
1294
        bad = True
1295
        continue
1296

    
1297
    node_vol_should = {}
1298

    
1299
    for instance in instancelist:
1300
      feedback_fn("* Verifying instance %s" % instance)
1301
      inst_config = instanceinfo[instance]
1302
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1303
                                     node_instance, feedback_fn, n_offline)
1304
      bad = bad or result
1305
      inst_nodes_offline = []
1306

    
1307
      inst_config.MapLVsByNode(node_vol_should)
1308

    
1309
      instance_cfg[instance] = inst_config
1310

    
1311
      pnode = inst_config.primary_node
1312
      if pnode in node_info:
1313
        node_info[pnode]['pinst'].append(instance)
1314
      elif pnode not in n_offline:
1315
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1316
                    " %s failed" % (instance, pnode))
1317
        bad = True
1318

    
1319
      if pnode in n_offline:
1320
        inst_nodes_offline.append(pnode)
1321

    
1322
      # If the instance is non-redundant we cannot survive losing its primary
1323
      # node, so we are not N+1 compliant. On the other hand we have no disk
1324
      # templates with more than one secondary so that situation is not well
1325
      # supported either.
1326
      # FIXME: does not support file-backed instances
1327
      if len(inst_config.secondary_nodes) == 0:
1328
        i_non_redundant.append(instance)
1329
      elif len(inst_config.secondary_nodes) > 1:
1330
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1331
                    % instance)
1332

    
1333
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1334
        i_non_a_balanced.append(instance)
1335

    
1336
      for snode in inst_config.secondary_nodes:
1337
        if snode in node_info:
1338
          node_info[snode]['sinst'].append(instance)
1339
          if pnode not in node_info[snode]['sinst-by-pnode']:
1340
            node_info[snode]['sinst-by-pnode'][pnode] = []
1341
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1342
        elif snode not in n_offline:
1343
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1344
                      " %s failed" % (instance, snode))
1345
          bad = True
1346
        if snode in n_offline:
1347
          inst_nodes_offline.append(snode)
1348

    
1349
      if inst_nodes_offline:
1350
        # warn that the instance lives on offline nodes, and set bad=True
1351
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1352
                    ", ".join(inst_nodes_offline))
1353
        bad = True
1354

    
1355
    feedback_fn("* Verifying orphan volumes")
1356
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1357
                                       feedback_fn)
1358
    bad = bad or result
1359

    
1360
    feedback_fn("* Verifying remaining instances")
1361
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1362
                                         feedback_fn)
1363
    bad = bad or result
1364

    
1365
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1366
      feedback_fn("* Verifying N+1 Memory redundancy")
1367
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1368
      bad = bad or result
1369

    
1370
    feedback_fn("* Other Notes")
1371
    if i_non_redundant:
1372
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1373
                  % len(i_non_redundant))
1374

    
1375
    if i_non_a_balanced:
1376
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1377
                  % len(i_non_a_balanced))
1378

    
1379
    if n_offline:
1380
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1381

    
1382
    if n_drained:
1383
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1384

    
1385
    return not bad
1386

    
1387
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1388
    """Analyze the post-hooks' result
1389

1390
    This method analyses the hook result, handles it, and sends some
1391
    nicely-formatted feedback back to the user.
1392

1393
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1394
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1395
    @param hooks_results: the results of the multi-node hooks rpc call
1396
    @param feedback_fn: function used send feedback back to the caller
1397
    @param lu_result: previous Exec result
1398
    @return: the new Exec result, based on the previous result
1399
        and hook results
1400

1401
    """
1402
    # We only really run POST phase hooks, and are only interested in
1403
    # their results
1404
    if phase == constants.HOOKS_PHASE_POST:
1405
      # Used to change hooks' output to proper indentation
1406
      indent_re = re.compile('^', re.M)
1407
      feedback_fn("* Hooks Results")
1408
      if not hooks_results:
1409
        feedback_fn("  - ERROR: general communication failure")
1410
        lu_result = 1
1411
      else:
1412
        for node_name in hooks_results:
1413
          show_node_header = True
1414
          res = hooks_results[node_name]
1415
          msg = res.fail_msg
1416
          if msg:
1417
            if res.offline:
1418
              # no need to warn or set fail return value
1419
              continue
1420
            feedback_fn("    Communication failure in hooks execution: %s" %
1421
                        msg)
1422
            lu_result = 1
1423
            continue
1424
          for script, hkr, output in res.payload:
1425
            if hkr == constants.HKR_FAIL:
1426
              # The node header is only shown once, if there are
1427
              # failing hooks on that node
1428
              if show_node_header:
1429
                feedback_fn("  Node %s:" % node_name)
1430
                show_node_header = False
1431
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1432
              output = indent_re.sub('      ', output)
1433
              feedback_fn("%s" % output)
1434
              lu_result = 1
1435

    
1436
      return lu_result
1437

    
1438

    
1439
class LUVerifyDisks(NoHooksLU):
1440
  """Verifies the cluster disks status.
1441

1442
  """
1443
  _OP_REQP = []
1444
  REQ_BGL = False
1445

    
1446
  def ExpandNames(self):
1447
    self.needed_locks = {
1448
      locking.LEVEL_NODE: locking.ALL_SET,
1449
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1450
    }
1451
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1452

    
1453
  def CheckPrereq(self):
1454
    """Check prerequisites.
1455

1456
    This has no prerequisites.
1457

1458
    """
1459
    pass
1460

    
1461
  def Exec(self, feedback_fn):
1462
    """Verify integrity of cluster disks.
1463

1464
    @rtype: tuple of three items
1465
    @return: a tuple of (dict of node-to-node_error, list of instances
1466
        which need activate-disks, dict of instance: (node, volume) for
1467
        missing volumes
1468

1469
    """
1470
    result = res_nodes, res_instances, res_missing = {}, [], {}
1471

    
1472
    vg_name = self.cfg.GetVGName()
1473
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1474
    instances = [self.cfg.GetInstanceInfo(name)
1475
                 for name in self.cfg.GetInstanceList()]
1476

    
1477
    nv_dict = {}
1478
    for inst in instances:
1479
      inst_lvs = {}
1480
      if (not inst.admin_up or
1481
          inst.disk_template not in constants.DTS_NET_MIRROR):
1482
        continue
1483
      inst.MapLVsByNode(inst_lvs)
1484
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1485
      for node, vol_list in inst_lvs.iteritems():
1486
        for vol in vol_list:
1487
          nv_dict[(node, vol)] = inst
1488

    
1489
    if not nv_dict:
1490
      return result
1491

    
1492
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1493

    
1494
    for node in nodes:
1495
      # node_volume
1496
      node_res = node_lvs[node]
1497
      if node_res.offline:
1498
        continue
1499
      msg = node_res.fail_msg
1500
      if msg:
1501
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1502
        res_nodes[node] = msg
1503
        continue
1504

    
1505
      lvs = node_res.payload
1506
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1507
        inst = nv_dict.pop((node, lv_name), None)
1508
        if (not lv_online and inst is not None
1509
            and inst.name not in res_instances):
1510
          res_instances.append(inst.name)
1511

    
1512
    # any leftover items in nv_dict are missing LVs, let's arrange the
1513
    # data better
1514
    for key, inst in nv_dict.iteritems():
1515
      if inst.name not in res_missing:
1516
        res_missing[inst.name] = []
1517
      res_missing[inst.name].append(key)
1518

    
1519
    return result
1520

    
1521

    
1522
class LURepairDiskSizes(NoHooksLU):
1523
  """Verifies the cluster disks sizes.
1524

1525
  """
1526
  _OP_REQP = ["instances"]
1527
  REQ_BGL = False
1528

    
1529
  def ExpandNames(self):
1530

    
1531
    if not isinstance(self.op.instances, list):
1532
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1533

    
1534
    if self.op.instances:
1535
      self.wanted_names = []
1536
      for name in self.op.instances:
1537
        full_name = self.cfg.ExpandInstanceName(name)
1538
        if full_name is None:
1539
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1540
        self.wanted_names.append(full_name)
1541
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1542
      self.needed_locks = {
1543
        locking.LEVEL_NODE: [],
1544
        locking.LEVEL_INSTANCE: self.wanted_names,
1545
        }
1546
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1547
    else:
1548
      self.wanted_names = None
1549
      self.needed_locks = {
1550
        locking.LEVEL_NODE: locking.ALL_SET,
1551
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1552
        }
1553
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1554

    
1555
  def DeclareLocks(self, level):
1556
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1557
      self._LockInstancesNodes(primary_only=True)
1558

    
1559
  def CheckPrereq(self):
1560
    """Check prerequisites.
1561

1562
    This only checks the optional instance list against the existing names.
1563

1564
    """
1565
    if self.wanted_names is None:
1566
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1567

    
1568
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1569
                             in self.wanted_names]
1570

    
1571
  def Exec(self, feedback_fn):
1572
    """Verify the size of cluster disks.
1573

1574
    """
1575
    # TODO: check child disks too
1576
    # TODO: check differences in size between primary/secondary nodes
1577
    per_node_disks = {}
1578
    for instance in self.wanted_instances:
1579
      pnode = instance.primary_node
1580
      if pnode not in per_node_disks:
1581
        per_node_disks[pnode] = []
1582
      for idx, disk in enumerate(instance.disks):
1583
        per_node_disks[pnode].append((instance, idx, disk))
1584

    
1585
    changed = []
1586
    for node, dskl in per_node_disks.items():
1587
      result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1588
      if result.failed:
1589
        self.LogWarning("Failure in blockdev_getsizes call to node"
1590
                        " %s, ignoring", node)
1591
        continue
1592
      if len(result.data) != len(dskl):
1593
        self.LogWarning("Invalid result from node %s, ignoring node results",
1594
                        node)
1595
        continue
1596
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1597
        if size is None:
1598
          self.LogWarning("Disk %d of instance %s did not return size"
1599
                          " information, ignoring", idx, instance.name)
1600
          continue
1601
        if not isinstance(size, (int, long)):
1602
          self.LogWarning("Disk %d of instance %s did not return valid"
1603
                          " size information, ignoring", idx, instance.name)
1604
          continue
1605
        size = size >> 20
1606
        if size != disk.size:
1607
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1608
                       " correcting: recorded %d, actual %d", idx,
1609
                       instance.name, disk.size, size)
1610
          disk.size = size
1611
          self.cfg.Update(instance)
1612
          changed.append((instance.name, idx, size))
1613
    return changed
1614

    
1615

    
1616
class LURenameCluster(LogicalUnit):
1617
  """Rename the cluster.
1618

1619
  """
1620
  HPATH = "cluster-rename"
1621
  HTYPE = constants.HTYPE_CLUSTER
1622
  _OP_REQP = ["name"]
1623

    
1624
  def BuildHooksEnv(self):
1625
    """Build hooks env.
1626

1627
    """
1628
    env = {
1629
      "OP_TARGET": self.cfg.GetClusterName(),
1630
      "NEW_NAME": self.op.name,
1631
      }
1632
    mn = self.cfg.GetMasterNode()
1633
    return env, [mn], [mn]
1634

    
1635
  def CheckPrereq(self):
1636
    """Verify that the passed name is a valid one.
1637

1638
    """
1639
    hostname = utils.HostInfo(self.op.name)
1640

    
1641
    new_name = hostname.name
1642
    self.ip = new_ip = hostname.ip
1643
    old_name = self.cfg.GetClusterName()
1644
    old_ip = self.cfg.GetMasterIP()
1645
    if new_name == old_name and new_ip == old_ip:
1646
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1647
                                 " cluster has changed")
1648
    if new_ip != old_ip:
1649
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1650
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1651
                                   " reachable on the network. Aborting." %
1652
                                   new_ip)
1653

    
1654
    self.op.name = new_name
1655

    
1656
  def Exec(self, feedback_fn):
1657
    """Rename the cluster.
1658

1659
    """
1660
    clustername = self.op.name
1661
    ip = self.ip
1662

    
1663
    # shutdown the master IP
1664
    master = self.cfg.GetMasterNode()
1665
    result = self.rpc.call_node_stop_master(master, False)
1666
    result.Raise("Could not disable the master role")
1667

    
1668
    try:
1669
      cluster = self.cfg.GetClusterInfo()
1670
      cluster.cluster_name = clustername
1671
      cluster.master_ip = ip
1672
      self.cfg.Update(cluster)
1673

    
1674
      # update the known hosts file
1675
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1676
      node_list = self.cfg.GetNodeList()
1677
      try:
1678
        node_list.remove(master)
1679
      except ValueError:
1680
        pass
1681
      result = self.rpc.call_upload_file(node_list,
1682
                                         constants.SSH_KNOWN_HOSTS_FILE)
1683
      for to_node, to_result in result.iteritems():
1684
        msg = to_result.fail_msg
1685
        if msg:
1686
          msg = ("Copy of file %s to node %s failed: %s" %
1687
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1688
          self.proc.LogWarning(msg)
1689

    
1690
    finally:
1691
      result = self.rpc.call_node_start_master(master, False, False)
1692
      msg = result.fail_msg
1693
      if msg:
1694
        self.LogWarning("Could not re-enable the master role on"
1695
                        " the master, please restart manually: %s", msg)
1696

    
1697

    
1698
def _RecursiveCheckIfLVMBased(disk):
1699
  """Check if the given disk or its children are lvm-based.
1700

1701
  @type disk: L{objects.Disk}
1702
  @param disk: the disk to check
1703
  @rtype: boolean
1704
  @return: boolean indicating whether a LD_LV dev_type was found or not
1705

1706
  """
1707
  if disk.children:
1708
    for chdisk in disk.children:
1709
      if _RecursiveCheckIfLVMBased(chdisk):
1710
        return True
1711
  return disk.dev_type == constants.LD_LV
1712

    
1713

    
1714
class LUSetClusterParams(LogicalUnit):
1715
  """Change the parameters of the cluster.
1716

1717
  """
1718
  HPATH = "cluster-modify"
1719
  HTYPE = constants.HTYPE_CLUSTER
1720
  _OP_REQP = []
1721
  REQ_BGL = False
1722

    
1723
  def CheckArguments(self):
1724
    """Check parameters
1725

1726
    """
1727
    if not hasattr(self.op, "candidate_pool_size"):
1728
      self.op.candidate_pool_size = None
1729
    if self.op.candidate_pool_size is not None:
1730
      try:
1731
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1732
      except (ValueError, TypeError), err:
1733
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1734
                                   str(err))
1735
      if self.op.candidate_pool_size < 1:
1736
        raise errors.OpPrereqError("At least one master candidate needed")
1737

    
1738
  def ExpandNames(self):
1739
    # FIXME: in the future maybe other cluster params won't require checking on
1740
    # all nodes to be modified.
1741
    self.needed_locks = {
1742
      locking.LEVEL_NODE: locking.ALL_SET,
1743
    }
1744
    self.share_locks[locking.LEVEL_NODE] = 1
1745

    
1746
  def BuildHooksEnv(self):
1747
    """Build hooks env.
1748

1749
    """
1750
    env = {
1751
      "OP_TARGET": self.cfg.GetClusterName(),
1752
      "NEW_VG_NAME": self.op.vg_name,
1753
      }
1754
    mn = self.cfg.GetMasterNode()
1755
    return env, [mn], [mn]
1756

    
1757
  def CheckPrereq(self):
1758
    """Check prerequisites.
1759

1760
    This checks whether the given params don't conflict and
1761
    if the given volume group is valid.
1762

1763
    """
1764
    if self.op.vg_name is not None and not self.op.vg_name:
1765
      instances = self.cfg.GetAllInstancesInfo().values()
1766
      for inst in instances:
1767
        for disk in inst.disks:
1768
          if _RecursiveCheckIfLVMBased(disk):
1769
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1770
                                       " lvm-based instances exist")
1771

    
1772
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1773

    
1774
    # if vg_name not None, checks given volume group on all nodes
1775
    if self.op.vg_name:
1776
      vglist = self.rpc.call_vg_list(node_list)
1777
      for node in node_list:
1778
        msg = vglist[node].fail_msg
1779
        if msg:
1780
          # ignoring down node
1781
          self.LogWarning("Error while gathering data on node %s"
1782
                          " (ignoring node): %s", node, msg)
1783
          continue
1784
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1785
                                              self.op.vg_name,
1786
                                              constants.MIN_VG_SIZE)
1787
        if vgstatus:
1788
          raise errors.OpPrereqError("Error on node '%s': %s" %
1789
                                     (node, vgstatus))
1790

    
1791
    self.cluster = cluster = self.cfg.GetClusterInfo()
1792
    # validate params changes
1793
    if self.op.beparams:
1794
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1795
      self.new_beparams = objects.FillDict(
1796
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1797

    
1798
    if self.op.nicparams:
1799
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1800
      self.new_nicparams = objects.FillDict(
1801
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1802
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1803

    
1804
    # hypervisor list/parameters
1805
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1806
    if self.op.hvparams:
1807
      if not isinstance(self.op.hvparams, dict):
1808
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1809
      for hv_name, hv_dict in self.op.hvparams.items():
1810
        if hv_name not in self.new_hvparams:
1811
          self.new_hvparams[hv_name] = hv_dict
1812
        else:
1813
          self.new_hvparams[hv_name].update(hv_dict)
1814

    
1815
    if self.op.enabled_hypervisors is not None:
1816
      self.hv_list = self.op.enabled_hypervisors
1817
      if not self.hv_list:
1818
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1819
                                   " least one member")
1820
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1821
      if invalid_hvs:
1822
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1823
                                   " entries: %s" % invalid_hvs)
1824
    else:
1825
      self.hv_list = cluster.enabled_hypervisors
1826

    
1827
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1828
      # either the enabled list has changed, or the parameters have, validate
1829
      for hv_name, hv_params in self.new_hvparams.items():
1830
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1831
            (self.op.enabled_hypervisors and
1832
             hv_name in self.op.enabled_hypervisors)):
1833
          # either this is a new hypervisor, or its parameters have changed
1834
          hv_class = hypervisor.GetHypervisor(hv_name)
1835
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1836
          hv_class.CheckParameterSyntax(hv_params)
1837
          _CheckHVParams(self, node_list, hv_name, hv_params)
1838

    
1839
  def Exec(self, feedback_fn):
1840
    """Change the parameters of the cluster.
1841

1842
    """
1843
    if self.op.vg_name is not None:
1844
      new_volume = self.op.vg_name
1845
      if not new_volume:
1846
        new_volume = None
1847
      if new_volume != self.cfg.GetVGName():
1848
        self.cfg.SetVGName(new_volume)
1849
      else:
1850
        feedback_fn("Cluster LVM configuration already in desired"
1851
                    " state, not changing")
1852
    if self.op.hvparams:
1853
      self.cluster.hvparams = self.new_hvparams
1854
    if self.op.enabled_hypervisors is not None:
1855
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1856
    if self.op.beparams:
1857
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1858
    if self.op.nicparams:
1859
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1860

    
1861
    if self.op.candidate_pool_size is not None:
1862
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1863
      # we need to update the pool size here, otherwise the save will fail
1864
      _AdjustCandidatePool(self)
1865

    
1866
    self.cfg.Update(self.cluster)
1867

    
1868

    
1869
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1870
  """Distribute additional files which are part of the cluster configuration.
1871

1872
  ConfigWriter takes care of distributing the config and ssconf files, but
1873
  there are more files which should be distributed to all nodes. This function
1874
  makes sure those are copied.
1875

1876
  @param lu: calling logical unit
1877
  @param additional_nodes: list of nodes not in the config to distribute to
1878

1879
  """
1880
  # 1. Gather target nodes
1881
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1882
  dist_nodes = lu.cfg.GetNodeList()
1883
  if additional_nodes is not None:
1884
    dist_nodes.extend(additional_nodes)
1885
  if myself.name in dist_nodes:
1886
    dist_nodes.remove(myself.name)
1887
  # 2. Gather files to distribute
1888
  dist_files = set([constants.ETC_HOSTS,
1889
                    constants.SSH_KNOWN_HOSTS_FILE,
1890
                    constants.RAPI_CERT_FILE,
1891
                    constants.RAPI_USERS_FILE,
1892
                    constants.HMAC_CLUSTER_KEY,
1893
                   ])
1894

    
1895
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1896
  for hv_name in enabled_hypervisors:
1897
    hv_class = hypervisor.GetHypervisor(hv_name)
1898
    dist_files.update(hv_class.GetAncillaryFiles())
1899

    
1900
  # 3. Perform the files upload
1901
  for fname in dist_files:
1902
    if os.path.exists(fname):
1903
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1904
      for to_node, to_result in result.items():
1905
        msg = to_result.fail_msg
1906
        if msg:
1907
          msg = ("Copy of file %s to node %s failed: %s" %
1908
                 (fname, to_node, msg))
1909
          lu.proc.LogWarning(msg)
1910

    
1911

    
1912
class LURedistributeConfig(NoHooksLU):
1913
  """Force the redistribution of cluster configuration.
1914

1915
  This is a very simple LU.
1916

1917
  """
1918
  _OP_REQP = []
1919
  REQ_BGL = False
1920

    
1921
  def ExpandNames(self):
1922
    self.needed_locks = {
1923
      locking.LEVEL_NODE: locking.ALL_SET,
1924
    }
1925
    self.share_locks[locking.LEVEL_NODE] = 1
1926

    
1927
  def CheckPrereq(self):
1928
    """Check prerequisites.
1929

1930
    """
1931

    
1932
  def Exec(self, feedback_fn):
1933
    """Redistribute the configuration.
1934

1935
    """
1936
    self.cfg.Update(self.cfg.GetClusterInfo())
1937
    _RedistributeAncillaryFiles(self)
1938

    
1939

    
1940
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1941
  """Sleep and poll for an instance's disk to sync.
1942

1943
  """
1944
  if not instance.disks:
1945
    return True
1946

    
1947
  if not oneshot:
1948
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1949

    
1950
  node = instance.primary_node
1951

    
1952
  for dev in instance.disks:
1953
    lu.cfg.SetDiskID(dev, node)
1954

    
1955
  retries = 0
1956
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1957
  while True:
1958
    max_time = 0
1959
    done = True
1960
    cumul_degraded = False
1961
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1962
    msg = rstats.fail_msg
1963
    if msg:
1964
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1965
      retries += 1
1966
      if retries >= 10:
1967
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1968
                                 " aborting." % node)
1969
      time.sleep(6)
1970
      continue
1971
    rstats = rstats.payload
1972
    retries = 0
1973
    for i, mstat in enumerate(rstats):
1974
      if mstat is None:
1975
        lu.LogWarning("Can't compute data for node %s/%s",
1976
                           node, instance.disks[i].iv_name)
1977
        continue
1978

    
1979
      cumul_degraded = (cumul_degraded or
1980
                        (mstat.is_degraded and mstat.sync_percent is None))
1981
      if mstat.sync_percent is not None:
1982
        done = False
1983
        if mstat.estimated_time is not None:
1984
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
1985
          max_time = mstat.estimated_time
1986
        else:
1987
          rem_time = "no time estimate"
1988
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1989
                        (instance.disks[i].iv_name, mstat.sync_percent, rem_time))
1990

    
1991
    # if we're done but degraded, let's do a few small retries, to
1992
    # make sure we see a stable and not transient situation; therefore
1993
    # we force restart of the loop
1994
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1995
      logging.info("Degraded disks found, %d retries left", degr_retries)
1996
      degr_retries -= 1
1997
      time.sleep(1)
1998
      continue
1999

    
2000
    if done or oneshot:
2001
      break
2002

    
2003
    time.sleep(min(60, max_time))
2004

    
2005
  if done:
2006
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2007
  return not cumul_degraded
2008

    
2009

    
2010
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2011
  """Check that mirrors are not degraded.
2012

2013
  The ldisk parameter, if True, will change the test from the
2014
  is_degraded attribute (which represents overall non-ok status for
2015
  the device(s)) to the ldisk (representing the local storage status).
2016

2017
  """
2018
  lu.cfg.SetDiskID(dev, node)
2019

    
2020
  result = True
2021

    
2022
  if on_primary or dev.AssembleOnSecondary():
2023
    rstats = lu.rpc.call_blockdev_find(node, dev)
2024
    msg = rstats.fail_msg
2025
    if msg:
2026
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2027
      result = False
2028
    elif not rstats.payload:
2029
      lu.LogWarning("Can't find disk on node %s", node)
2030
      result = False
2031
    else:
2032
      if ldisk:
2033
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2034
      else:
2035
        result = result and not rstats.payload.is_degraded
2036

    
2037
  if dev.children:
2038
    for child in dev.children:
2039
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2040

    
2041
  return result
2042

    
2043

    
2044
class LUDiagnoseOS(NoHooksLU):
2045
  """Logical unit for OS diagnose/query.
2046

2047
  """
2048
  _OP_REQP = ["output_fields", "names"]
2049
  REQ_BGL = False
2050
  _FIELDS_STATIC = utils.FieldSet()
2051
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
2052

    
2053
  def ExpandNames(self):
2054
    if self.op.names:
2055
      raise errors.OpPrereqError("Selective OS query not supported")
2056

    
2057
    _CheckOutputFields(static=self._FIELDS_STATIC,
2058
                       dynamic=self._FIELDS_DYNAMIC,
2059
                       selected=self.op.output_fields)
2060

    
2061
    # Lock all nodes, in shared mode
2062
    # Temporary removal of locks, should be reverted later
2063
    # TODO: reintroduce locks when they are lighter-weight
2064
    self.needed_locks = {}
2065
    #self.share_locks[locking.LEVEL_NODE] = 1
2066
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2067

    
2068
  def CheckPrereq(self):
2069
    """Check prerequisites.
2070

2071
    """
2072

    
2073
  @staticmethod
2074
  def _DiagnoseByOS(node_list, rlist):
2075
    """Remaps a per-node return list into an a per-os per-node dictionary
2076

2077
    @param node_list: a list with the names of all nodes
2078
    @param rlist: a map with node names as keys and OS objects as values
2079

2080
    @rtype: dict
2081
    @return: a dictionary with osnames as keys and as value another map, with
2082
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2083

2084
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2085
                                     (/srv/..., False, "invalid api")],
2086
                           "node2": [(/srv/..., True, "")]}
2087
          }
2088

2089
    """
2090
    all_os = {}
2091
    # we build here the list of nodes that didn't fail the RPC (at RPC
2092
    # level), so that nodes with a non-responding node daemon don't
2093
    # make all OSes invalid
2094
    good_nodes = [node_name for node_name in rlist
2095
                  if not rlist[node_name].fail_msg]
2096
    for node_name, nr in rlist.items():
2097
      if nr.fail_msg or not nr.payload:
2098
        continue
2099
      for name, path, status, diagnose in nr.payload:
2100
        if name not in all_os:
2101
          # build a list of nodes for this os containing empty lists
2102
          # for each node in node_list
2103
          all_os[name] = {}
2104
          for nname in good_nodes:
2105
            all_os[name][nname] = []
2106
        all_os[name][node_name].append((path, status, diagnose))
2107
    return all_os
2108

    
2109
  def Exec(self, feedback_fn):
2110
    """Compute the list of OSes.
2111

2112
    """
2113
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2114
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2115
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2116
    output = []
2117
    for os_name, os_data in pol.items():
2118
      row = []
2119
      for field in self.op.output_fields:
2120
        if field == "name":
2121
          val = os_name
2122
        elif field == "valid":
2123
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
2124
        elif field == "node_status":
2125
          # this is just a copy of the dict
2126
          val = {}
2127
          for node_name, nos_list in os_data.items():
2128
            val[node_name] = nos_list
2129
        else:
2130
          raise errors.ParameterError(field)
2131
        row.append(val)
2132
      output.append(row)
2133

    
2134
    return output
2135

    
2136

    
2137
class LURemoveNode(LogicalUnit):
2138
  """Logical unit for removing a node.
2139

2140
  """
2141
  HPATH = "node-remove"
2142
  HTYPE = constants.HTYPE_NODE
2143
  _OP_REQP = ["node_name"]
2144

    
2145
  def BuildHooksEnv(self):
2146
    """Build hooks env.
2147

2148
    This doesn't run on the target node in the pre phase as a failed
2149
    node would then be impossible to remove.
2150

2151
    """
2152
    env = {
2153
      "OP_TARGET": self.op.node_name,
2154
      "NODE_NAME": self.op.node_name,
2155
      }
2156
    all_nodes = self.cfg.GetNodeList()
2157
    all_nodes.remove(self.op.node_name)
2158
    return env, all_nodes, all_nodes
2159

    
2160
  def CheckPrereq(self):
2161
    """Check prerequisites.
2162

2163
    This checks:
2164
     - the node exists in the configuration
2165
     - it does not have primary or secondary instances
2166
     - it's not the master
2167

2168
    Any errors are signaled by raising errors.OpPrereqError.
2169

2170
    """
2171
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2172
    if node is None:
2173
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2174

    
2175
    instance_list = self.cfg.GetInstanceList()
2176

    
2177
    masternode = self.cfg.GetMasterNode()
2178
    if node.name == masternode:
2179
      raise errors.OpPrereqError("Node is the master node,"
2180
                                 " you need to failover first.")
2181

    
2182
    for instance_name in instance_list:
2183
      instance = self.cfg.GetInstanceInfo(instance_name)
2184
      if node.name in instance.all_nodes:
2185
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2186
                                   " please remove first." % instance_name)
2187
    self.op.node_name = node.name
2188
    self.node = node
2189

    
2190
  def Exec(self, feedback_fn):
2191
    """Removes the node from the cluster.
2192

2193
    """
2194
    node = self.node
2195
    logging.info("Stopping the node daemon and removing configs from node %s",
2196
                 node.name)
2197

    
2198
    self.context.RemoveNode(node.name)
2199

    
2200
    result = self.rpc.call_node_leave_cluster(node.name)
2201
    msg = result.fail_msg
2202
    if msg:
2203
      self.LogWarning("Errors encountered on the remote node while leaving"
2204
                      " the cluster: %s", msg)
2205

    
2206
    # Promote nodes to master candidate as needed
2207
    _AdjustCandidatePool(self)
2208

    
2209

    
2210
class LUQueryNodes(NoHooksLU):
2211
  """Logical unit for querying nodes.
2212

2213
  """
2214
  _OP_REQP = ["output_fields", "names", "use_locking"]
2215
  REQ_BGL = False
2216
  _FIELDS_DYNAMIC = utils.FieldSet(
2217
    "dtotal", "dfree",
2218
    "mtotal", "mnode", "mfree",
2219
    "bootid",
2220
    "ctotal", "cnodes", "csockets",
2221
    )
2222

    
2223
  _FIELDS_STATIC = utils.FieldSet(
2224
    "name", "pinst_cnt", "sinst_cnt",
2225
    "pinst_list", "sinst_list",
2226
    "pip", "sip", "tags",
2227
    "serial_no",
2228
    "master_candidate",
2229
    "master",
2230
    "offline",
2231
    "drained",
2232
    "role",
2233
    )
2234

    
2235
  def ExpandNames(self):
2236
    _CheckOutputFields(static=self._FIELDS_STATIC,
2237
                       dynamic=self._FIELDS_DYNAMIC,
2238
                       selected=self.op.output_fields)
2239

    
2240
    self.needed_locks = {}
2241
    self.share_locks[locking.LEVEL_NODE] = 1
2242

    
2243
    if self.op.names:
2244
      self.wanted = _GetWantedNodes(self, self.op.names)
2245
    else:
2246
      self.wanted = locking.ALL_SET
2247

    
2248
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2249
    self.do_locking = self.do_node_query and self.op.use_locking
2250
    if self.do_locking:
2251
      # if we don't request only static fields, we need to lock the nodes
2252
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2253

    
2254

    
2255
  def CheckPrereq(self):
2256
    """Check prerequisites.
2257

2258
    """
2259
    # The validation of the node list is done in the _GetWantedNodes,
2260
    # if non empty, and if empty, there's no validation to do
2261
    pass
2262

    
2263
  def Exec(self, feedback_fn):
2264
    """Computes the list of nodes and their attributes.
2265

2266
    """
2267
    all_info = self.cfg.GetAllNodesInfo()
2268
    if self.do_locking:
2269
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2270
    elif self.wanted != locking.ALL_SET:
2271
      nodenames = self.wanted
2272
      missing = set(nodenames).difference(all_info.keys())
2273
      if missing:
2274
        raise errors.OpExecError(
2275
          "Some nodes were removed before retrieving their data: %s" % missing)
2276
    else:
2277
      nodenames = all_info.keys()
2278

    
2279
    nodenames = utils.NiceSort(nodenames)
2280
    nodelist = [all_info[name] for name in nodenames]
2281

    
2282
    # begin data gathering
2283

    
2284
    if self.do_node_query:
2285
      live_data = {}
2286
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2287
                                          self.cfg.GetHypervisorType())
2288
      for name in nodenames:
2289
        nodeinfo = node_data[name]
2290
        if not nodeinfo.fail_msg and nodeinfo.payload:
2291
          nodeinfo = nodeinfo.payload
2292
          fn = utils.TryConvert
2293
          live_data[name] = {
2294
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2295
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2296
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2297
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2298
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2299
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2300
            "bootid": nodeinfo.get('bootid', None),
2301
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2302
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2303
            }
2304
        else:
2305
          live_data[name] = {}
2306
    else:
2307
      live_data = dict.fromkeys(nodenames, {})
2308

    
2309
    node_to_primary = dict([(name, set()) for name in nodenames])
2310
    node_to_secondary = dict([(name, set()) for name in nodenames])
2311

    
2312
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2313
                             "sinst_cnt", "sinst_list"))
2314
    if inst_fields & frozenset(self.op.output_fields):
2315
      instancelist = self.cfg.GetInstanceList()
2316

    
2317
      for instance_name in instancelist:
2318
        inst = self.cfg.GetInstanceInfo(instance_name)
2319
        if inst.primary_node in node_to_primary:
2320
          node_to_primary[inst.primary_node].add(inst.name)
2321
        for secnode in inst.secondary_nodes:
2322
          if secnode in node_to_secondary:
2323
            node_to_secondary[secnode].add(inst.name)
2324

    
2325
    master_node = self.cfg.GetMasterNode()
2326

    
2327
    # end data gathering
2328

    
2329
    output = []
2330
    for node in nodelist:
2331
      node_output = []
2332
      for field in self.op.output_fields:
2333
        if field == "name":
2334
          val = node.name
2335
        elif field == "pinst_list":
2336
          val = list(node_to_primary[node.name])
2337
        elif field == "sinst_list":
2338
          val = list(node_to_secondary[node.name])
2339
        elif field == "pinst_cnt":
2340
          val = len(node_to_primary[node.name])
2341
        elif field == "sinst_cnt":
2342
          val = len(node_to_secondary[node.name])
2343
        elif field == "pip":
2344
          val = node.primary_ip
2345
        elif field == "sip":
2346
          val = node.secondary_ip
2347
        elif field == "tags":
2348
          val = list(node.GetTags())
2349
        elif field == "serial_no":
2350
          val = node.serial_no
2351
        elif field == "master_candidate":
2352
          val = node.master_candidate
2353
        elif field == "master":
2354
          val = node.name == master_node
2355
        elif field == "offline":
2356
          val = node.offline
2357
        elif field == "drained":
2358
          val = node.drained
2359
        elif self._FIELDS_DYNAMIC.Matches(field):
2360
          val = live_data[node.name].get(field, None)
2361
        elif field == "role":
2362
          if node.name == master_node:
2363
            val = "M"
2364
          elif node.master_candidate:
2365
            val = "C"
2366
          elif node.drained:
2367
            val = "D"
2368
          elif node.offline:
2369
            val = "O"
2370
          else:
2371
            val = "R"
2372
        else:
2373
          raise errors.ParameterError(field)
2374
        node_output.append(val)
2375
      output.append(node_output)
2376

    
2377
    return output
2378

    
2379

    
2380
class LUQueryNodeVolumes(NoHooksLU):
2381
  """Logical unit for getting volumes on node(s).
2382

2383
  """
2384
  _OP_REQP = ["nodes", "output_fields"]
2385
  REQ_BGL = False
2386
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2387
  _FIELDS_STATIC = utils.FieldSet("node")
2388

    
2389
  def ExpandNames(self):
2390
    _CheckOutputFields(static=self._FIELDS_STATIC,
2391
                       dynamic=self._FIELDS_DYNAMIC,
2392
                       selected=self.op.output_fields)
2393

    
2394
    self.needed_locks = {}
2395
    self.share_locks[locking.LEVEL_NODE] = 1
2396
    if not self.op.nodes:
2397
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2398
    else:
2399
      self.needed_locks[locking.LEVEL_NODE] = \
2400
        _GetWantedNodes(self, self.op.nodes)
2401

    
2402
  def CheckPrereq(self):
2403
    """Check prerequisites.
2404

2405
    This checks that the fields required are valid output fields.
2406

2407
    """
2408
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2409

    
2410
  def Exec(self, feedback_fn):
2411
    """Computes the list of nodes and their attributes.
2412

2413
    """
2414
    nodenames = self.nodes
2415
    volumes = self.rpc.call_node_volumes(nodenames)
2416

    
2417
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2418
             in self.cfg.GetInstanceList()]
2419

    
2420
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2421

    
2422
    output = []
2423
    for node in nodenames:
2424
      nresult = volumes[node]
2425
      if nresult.offline:
2426
        continue
2427
      msg = nresult.fail_msg
2428
      if msg:
2429
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2430
        continue
2431

    
2432
      node_vols = nresult.payload[:]
2433
      node_vols.sort(key=lambda vol: vol['dev'])
2434

    
2435
      for vol in node_vols:
2436
        node_output = []
2437
        for field in self.op.output_fields:
2438
          if field == "node":
2439
            val = node
2440
          elif field == "phys":
2441
            val = vol['dev']
2442
          elif field == "vg":
2443
            val = vol['vg']
2444
          elif field == "name":
2445
            val = vol['name']
2446
          elif field == "size":
2447
            val = int(float(vol['size']))
2448
          elif field == "instance":
2449
            for inst in ilist:
2450
              if node not in lv_by_node[inst]:
2451
                continue
2452
              if vol['name'] in lv_by_node[inst][node]:
2453
                val = inst.name
2454
                break
2455
            else:
2456
              val = '-'
2457
          else:
2458
            raise errors.ParameterError(field)
2459
          node_output.append(str(val))
2460

    
2461
        output.append(node_output)
2462

    
2463
    return output
2464

    
2465

    
2466
class LUQueryNodeStorage(NoHooksLU):
2467
  """Logical unit for getting information on storage units on node(s).
2468

2469
  """
2470
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2471
  REQ_BGL = False
2472
  _FIELDS_STATIC = utils.FieldSet("node")
2473

    
2474
  def ExpandNames(self):
2475
    storage_type = self.op.storage_type
2476

    
2477
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2478
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2479

    
2480
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2481

    
2482
    _CheckOutputFields(static=self._FIELDS_STATIC,
2483
                       dynamic=utils.FieldSet(*dynamic_fields),
2484
                       selected=self.op.output_fields)
2485

    
2486
    self.needed_locks = {}
2487
    self.share_locks[locking.LEVEL_NODE] = 1
2488

    
2489
    if self.op.nodes:
2490
      self.needed_locks[locking.LEVEL_NODE] = \
2491
        _GetWantedNodes(self, self.op.nodes)
2492
    else:
2493
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2494

    
2495
  def CheckPrereq(self):
2496
    """Check prerequisites.
2497

2498
    This checks that the fields required are valid output fields.
2499

2500
    """
2501
    self.op.name = getattr(self.op, "name", None)
2502

    
2503
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2504

    
2505
  def Exec(self, feedback_fn):
2506
    """Computes the list of nodes and their attributes.
2507

2508
    """
2509
    # Always get name to sort by
2510
    if constants.SF_NAME in self.op.output_fields:
2511
      fields = self.op.output_fields[:]
2512
    else:
2513
      fields = [constants.SF_NAME] + self.op.output_fields
2514

    
2515
    # Never ask for node as it's only known to the LU
2516
    while "node" in fields:
2517
      fields.remove("node")
2518

    
2519
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2520
    name_idx = field_idx[constants.SF_NAME]
2521

    
2522
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2523
    data = self.rpc.call_storage_list(self.nodes,
2524
                                      self.op.storage_type, st_args,
2525
                                      self.op.name, fields)
2526

    
2527
    result = []
2528

    
2529
    for node in utils.NiceSort(self.nodes):
2530
      nresult = data[node]
2531
      if nresult.offline:
2532
        continue
2533

    
2534
      msg = nresult.fail_msg
2535
      if msg:
2536
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2537
        continue
2538

    
2539
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2540

    
2541
      for name in utils.NiceSort(rows.keys()):
2542
        row = rows[name]
2543

    
2544
        out = []
2545

    
2546
        for field in self.op.output_fields:
2547
          if field == "node":
2548
            val = node
2549
          elif field in field_idx:
2550
            val = row[field_idx[field]]
2551
          else:
2552
            raise errors.ParameterError(field)
2553

    
2554
          out.append(val)
2555

    
2556
        result.append(out)
2557

    
2558
    return result
2559

    
2560

    
2561
class LUModifyNodeStorage(NoHooksLU):
2562
  """Logical unit for modifying a storage volume on a node.
2563

2564
  """
2565
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2566
  REQ_BGL = False
2567

    
2568
  def CheckArguments(self):
2569
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2570
    if node_name is None:
2571
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2572

    
2573
    self.op.node_name = node_name
2574

    
2575
    storage_type = self.op.storage_type
2576
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2577
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2578

    
2579
  def ExpandNames(self):
2580
    self.needed_locks = {
2581
      locking.LEVEL_NODE: self.op.node_name,
2582
      }
2583

    
2584
  def CheckPrereq(self):
2585
    """Check prerequisites.
2586

2587
    """
2588
    storage_type = self.op.storage_type
2589

    
2590
    try:
2591
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2592
    except KeyError:
2593
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2594
                                 " modified" % storage_type)
2595

    
2596
    diff = set(self.op.changes.keys()) - modifiable
2597
    if diff:
2598
      raise errors.OpPrereqError("The following fields can not be modified for"
2599
                                 " storage units of type '%s': %r" %
2600
                                 (storage_type, list(diff)))
2601

    
2602
  def Exec(self, feedback_fn):
2603
    """Computes the list of nodes and their attributes.
2604

2605
    """
2606
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2607
    result = self.rpc.call_storage_modify(self.op.node_name,
2608
                                          self.op.storage_type, st_args,
2609
                                          self.op.name, self.op.changes)
2610
    result.Raise("Failed to modify storage unit '%s' on %s" %
2611
                 (self.op.name, self.op.node_name))
2612

    
2613

    
2614
class LUAddNode(LogicalUnit):
2615
  """Logical unit for adding node to the cluster.
2616

2617
  """
2618
  HPATH = "node-add"
2619
  HTYPE = constants.HTYPE_NODE
2620
  _OP_REQP = ["node_name"]
2621

    
2622
  def BuildHooksEnv(self):
2623
    """Build hooks env.
2624

2625
    This will run on all nodes before, and on all nodes + the new node after.
2626

2627
    """
2628
    env = {
2629
      "OP_TARGET": self.op.node_name,
2630
      "NODE_NAME": self.op.node_name,
2631
      "NODE_PIP": self.op.primary_ip,
2632
      "NODE_SIP": self.op.secondary_ip,
2633
      }
2634
    nodes_0 = self.cfg.GetNodeList()
2635
    nodes_1 = nodes_0 + [self.op.node_name, ]
2636
    return env, nodes_0, nodes_1
2637

    
2638
  def CheckPrereq(self):
2639
    """Check prerequisites.
2640

2641
    This checks:
2642
     - the new node is not already in the config
2643
     - it is resolvable
2644
     - its parameters (single/dual homed) matches the cluster
2645

2646
    Any errors are signaled by raising errors.OpPrereqError.
2647

2648
    """
2649
    node_name = self.op.node_name
2650
    cfg = self.cfg
2651

    
2652
    dns_data = utils.HostInfo(node_name)
2653

    
2654
    node = dns_data.name
2655
    primary_ip = self.op.primary_ip = dns_data.ip
2656
    secondary_ip = getattr(self.op, "secondary_ip", None)
2657
    if secondary_ip is None:
2658
      secondary_ip = primary_ip
2659
    if not utils.IsValidIP(secondary_ip):
2660
      raise errors.OpPrereqError("Invalid secondary IP given")
2661
    self.op.secondary_ip = secondary_ip
2662

    
2663
    node_list = cfg.GetNodeList()
2664
    if not self.op.readd and node in node_list:
2665
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2666
                                 node)
2667
    elif self.op.readd and node not in node_list:
2668
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2669

    
2670
    for existing_node_name in node_list:
2671
      existing_node = cfg.GetNodeInfo(existing_node_name)
2672

    
2673
      if self.op.readd and node == existing_node_name:
2674
        if (existing_node.primary_ip != primary_ip or
2675
            existing_node.secondary_ip != secondary_ip):
2676
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2677
                                     " address configuration as before")
2678
        continue
2679

    
2680
      if (existing_node.primary_ip == primary_ip or
2681
          existing_node.secondary_ip == primary_ip or
2682
          existing_node.primary_ip == secondary_ip or
2683
          existing_node.secondary_ip == secondary_ip):
2684
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2685
                                   " existing node %s" % existing_node.name)
2686

    
2687
    # check that the type of the node (single versus dual homed) is the
2688
    # same as for the master
2689
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2690
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2691
    newbie_singlehomed = secondary_ip == primary_ip
2692
    if master_singlehomed != newbie_singlehomed:
2693
      if master_singlehomed:
2694
        raise errors.OpPrereqError("The master has no private ip but the"
2695
                                   " new node has one")
2696
      else:
2697
        raise errors.OpPrereqError("The master has a private ip but the"
2698
                                   " new node doesn't have one")
2699

    
2700
    # checks reachability
2701
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2702
      raise errors.OpPrereqError("Node not reachable by ping")
2703

    
2704
    if not newbie_singlehomed:
2705
      # check reachability from my secondary ip to newbie's secondary ip
2706
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2707
                           source=myself.secondary_ip):
2708
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2709
                                   " based ping to noded port")
2710

    
2711
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2712
    if self.op.readd:
2713
      exceptions = [node]
2714
    else:
2715
      exceptions = []
2716
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2717
    # the new node will increase mc_max with one, so:
2718
    mc_max = min(mc_max + 1, cp_size)
2719
    self.master_candidate = mc_now < mc_max
2720

    
2721
    if self.op.readd:
2722
      self.new_node = self.cfg.GetNodeInfo(node)
2723
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2724
    else:
2725
      self.new_node = objects.Node(name=node,
2726
                                   primary_ip=primary_ip,
2727
                                   secondary_ip=secondary_ip,
2728
                                   master_candidate=self.master_candidate,
2729
                                   offline=False, drained=False)
2730

    
2731
  def Exec(self, feedback_fn):
2732
    """Adds the new node to the cluster.
2733

2734
    """
2735
    new_node = self.new_node
2736
    node = new_node.name
2737

    
2738
    # for re-adds, reset the offline/drained/master-candidate flags;
2739
    # we need to reset here, otherwise offline would prevent RPC calls
2740
    # later in the procedure; this also means that if the re-add
2741
    # fails, we are left with a non-offlined, broken node
2742
    if self.op.readd:
2743
      new_node.drained = new_node.offline = False
2744
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2745
      # if we demote the node, we do cleanup later in the procedure
2746
      new_node.master_candidate = self.master_candidate
2747

    
2748
    # notify the user about any possible mc promotion
2749
    if new_node.master_candidate:
2750
      self.LogInfo("Node will be a master candidate")
2751

    
2752
    # check connectivity
2753
    result = self.rpc.call_version([node])[node]
2754
    result.Raise("Can't get version information from node %s" % node)
2755
    if constants.PROTOCOL_VERSION == result.payload:
2756
      logging.info("Communication to node %s fine, sw version %s match",
2757
                   node, result.payload)
2758
    else:
2759
      raise errors.OpExecError("Version mismatch master version %s,"
2760
                               " node version %s" %
2761
                               (constants.PROTOCOL_VERSION, result.payload))
2762

    
2763
    # setup ssh on node
2764
    logging.info("Copy ssh key to node %s", node)
2765
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2766
    keyarray = []
2767
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2768
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2769
                priv_key, pub_key]
2770

    
2771
    for i in keyfiles:
2772
      f = open(i, 'r')
2773
      try:
2774
        keyarray.append(f.read())
2775
      finally:
2776
        f.close()
2777

    
2778
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2779
                                    keyarray[2],
2780
                                    keyarray[3], keyarray[4], keyarray[5])
2781
    result.Raise("Cannot transfer ssh keys to the new node")
2782

    
2783
    # Add node to our /etc/hosts, and add key to known_hosts
2784
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2785
      utils.AddHostToEtcHosts(new_node.name)
2786

    
2787
    if new_node.secondary_ip != new_node.primary_ip:
2788
      result = self.rpc.call_node_has_ip_address(new_node.name,
2789
                                                 new_node.secondary_ip)
2790
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2791
                   prereq=True)
2792
      if not result.payload:
2793
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2794
                                 " you gave (%s). Please fix and re-run this"
2795
                                 " command." % new_node.secondary_ip)
2796

    
2797
    node_verify_list = [self.cfg.GetMasterNode()]
2798
    node_verify_param = {
2799
      'nodelist': [node],
2800
      # TODO: do a node-net-test as well?
2801
    }
2802

    
2803
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2804
                                       self.cfg.GetClusterName())
2805
    for verifier in node_verify_list:
2806
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2807
      nl_payload = result[verifier].payload['nodelist']
2808
      if nl_payload:
2809
        for failed in nl_payload:
2810
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2811
                      (verifier, nl_payload[failed]))
2812
        raise errors.OpExecError("ssh/hostname verification failed.")
2813

    
2814
    if self.op.readd:
2815
      _RedistributeAncillaryFiles(self)
2816
      self.context.ReaddNode(new_node)
2817
      # make sure we redistribute the config
2818
      self.cfg.Update(new_node)
2819
      # and make sure the new node will not have old files around
2820
      if not new_node.master_candidate:
2821
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2822
        msg = result.RemoteFailMsg()
2823
        if msg:
2824
          self.LogWarning("Node failed to demote itself from master"
2825
                          " candidate status: %s" % msg)
2826
    else:
2827
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2828
      self.context.AddNode(new_node)
2829

    
2830

    
2831
class LUSetNodeParams(LogicalUnit):
2832
  """Modifies the parameters of a node.
2833

2834
  """
2835
  HPATH = "node-modify"
2836
  HTYPE = constants.HTYPE_NODE
2837
  _OP_REQP = ["node_name"]
2838
  REQ_BGL = False
2839

    
2840
  def CheckArguments(self):
2841
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2842
    if node_name is None:
2843
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2844
    self.op.node_name = node_name
2845
    _CheckBooleanOpField(self.op, 'master_candidate')
2846
    _CheckBooleanOpField(self.op, 'offline')
2847
    _CheckBooleanOpField(self.op, 'drained')
2848
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2849
    if all_mods.count(None) == 3:
2850
      raise errors.OpPrereqError("Please pass at least one modification")
2851
    if all_mods.count(True) > 1:
2852
      raise errors.OpPrereqError("Can't set the node into more than one"
2853
                                 " state at the same time")
2854

    
2855
  def ExpandNames(self):
2856
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2857

    
2858
  def BuildHooksEnv(self):
2859
    """Build hooks env.
2860

2861
    This runs on the master node.
2862

2863
    """
2864
    env = {
2865
      "OP_TARGET": self.op.node_name,
2866
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2867
      "OFFLINE": str(self.op.offline),
2868
      "DRAINED": str(self.op.drained),
2869
      }
2870
    nl = [self.cfg.GetMasterNode(),
2871
          self.op.node_name]
2872
    return env, nl, nl
2873

    
2874
  def CheckPrereq(self):
2875
    """Check prerequisites.
2876

2877
    This only checks the instance list against the existing names.
2878

2879
    """
2880
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2881

    
2882
    if ((self.op.master_candidate == False or self.op.offline == True or
2883
         self.op.drained == True) and node.master_candidate):
2884
      # we will demote the node from master_candidate
2885
      if self.op.node_name == self.cfg.GetMasterNode():
2886
        raise errors.OpPrereqError("The master node has to be a"
2887
                                   " master candidate, online and not drained")
2888
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2889
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2890
      if num_candidates <= cp_size:
2891
        msg = ("Not enough master candidates (desired"
2892
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2893
        if self.op.force:
2894
          self.LogWarning(msg)
2895
        else:
2896
          raise errors.OpPrereqError(msg)
2897

    
2898
    if (self.op.master_candidate == True and
2899
        ((node.offline and not self.op.offline == False) or
2900
         (node.drained and not self.op.drained == False))):
2901
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2902
                                 " to master_candidate" % node.name)
2903

    
2904
    return
2905

    
2906
  def Exec(self, feedback_fn):
2907
    """Modifies a node.
2908

2909
    """
2910
    node = self.node
2911

    
2912
    result = []
2913
    changed_mc = False
2914

    
2915
    if self.op.offline is not None:
2916
      node.offline = self.op.offline
2917
      result.append(("offline", str(self.op.offline)))
2918
      if self.op.offline == True:
2919
        if node.master_candidate:
2920
          node.master_candidate = False
2921
          changed_mc = True
2922
          result.append(("master_candidate", "auto-demotion due to offline"))
2923
        if node.drained:
2924
          node.drained = False
2925
          result.append(("drained", "clear drained status due to offline"))
2926

    
2927
    if self.op.master_candidate is not None:
2928
      node.master_candidate = self.op.master_candidate
2929
      changed_mc = True
2930
      result.append(("master_candidate", str(self.op.master_candidate)))
2931
      if self.op.master_candidate == False:
2932
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2933
        msg = rrc.fail_msg
2934
        if msg:
2935
          self.LogWarning("Node failed to demote itself: %s" % msg)
2936

    
2937
    if self.op.drained is not None:
2938
      node.drained = self.op.drained
2939
      result.append(("drained", str(self.op.drained)))
2940
      if self.op.drained == True:
2941
        if node.master_candidate:
2942
          node.master_candidate = False
2943
          changed_mc = True
2944
          result.append(("master_candidate", "auto-demotion due to drain"))
2945
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2946
          msg = rrc.RemoteFailMsg()
2947
          if msg:
2948
            self.LogWarning("Node failed to demote itself: %s" % msg)
2949
        if node.offline:
2950
          node.offline = False
2951
          result.append(("offline", "clear offline status due to drain"))
2952

    
2953
    # this will trigger configuration file update, if needed
2954
    self.cfg.Update(node)
2955
    # this will trigger job queue propagation or cleanup
2956
    if changed_mc:
2957
      self.context.ReaddNode(node)
2958

    
2959
    return result
2960

    
2961

    
2962
class LUPowercycleNode(NoHooksLU):
2963
  """Powercycles a node.
2964

2965
  """
2966
  _OP_REQP = ["node_name", "force"]
2967
  REQ_BGL = False
2968

    
2969
  def CheckArguments(self):
2970
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2971
    if node_name is None:
2972
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2973
    self.op.node_name = node_name
2974
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2975
      raise errors.OpPrereqError("The node is the master and the force"
2976
                                 " parameter was not set")
2977

    
2978
  def ExpandNames(self):
2979
    """Locking for PowercycleNode.
2980

2981
    This is a last-resort option and shouldn't block on other
2982
    jobs. Therefore, we grab no locks.
2983

2984
    """
2985
    self.needed_locks = {}
2986

    
2987
  def CheckPrereq(self):
2988
    """Check prerequisites.
2989

2990
    This LU has no prereqs.
2991

2992
    """
2993
    pass
2994

    
2995
  def Exec(self, feedback_fn):
2996
    """Reboots a node.
2997

2998
    """
2999
    result = self.rpc.call_node_powercycle(self.op.node_name,
3000
                                           self.cfg.GetHypervisorType())
3001
    result.Raise("Failed to schedule the reboot")
3002
    return result.payload
3003

    
3004

    
3005
class LUQueryClusterInfo(NoHooksLU):
3006
  """Query cluster configuration.
3007

3008
  """
3009
  _OP_REQP = []
3010
  REQ_BGL = False
3011

    
3012
  def ExpandNames(self):
3013
    self.needed_locks = {}
3014

    
3015
  def CheckPrereq(self):
3016
    """No prerequsites needed for this LU.
3017

3018
    """
3019
    pass
3020

    
3021
  def Exec(self, feedback_fn):
3022
    """Return cluster config.
3023

3024
    """
3025
    cluster = self.cfg.GetClusterInfo()
3026
    result = {
3027
      "software_version": constants.RELEASE_VERSION,
3028
      "protocol_version": constants.PROTOCOL_VERSION,
3029
      "config_version": constants.CONFIG_VERSION,
3030
      "os_api_version": max(constants.OS_API_VERSIONS),
3031
      "export_version": constants.EXPORT_VERSION,
3032
      "architecture": (platform.architecture()[0], platform.machine()),
3033
      "name": cluster.cluster_name,
3034
      "master": cluster.master_node,
3035
      "default_hypervisor": cluster.enabled_hypervisors[0],
3036
      "enabled_hypervisors": cluster.enabled_hypervisors,
3037
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3038
                        for hypervisor_name in cluster.enabled_hypervisors]),
3039
      "beparams": cluster.beparams,
3040
      "nicparams": cluster.nicparams,
3041
      "candidate_pool_size": cluster.candidate_pool_size,
3042
      "master_netdev": cluster.master_netdev,
3043
      "volume_group_name": cluster.volume_group_name,
3044
      "file_storage_dir": cluster.file_storage_dir,
3045
      }
3046

    
3047
    return result
3048

    
3049

    
3050
class LUQueryConfigValues(NoHooksLU):
3051
  """Return configuration values.
3052

3053
  """
3054
  _OP_REQP = []
3055
  REQ_BGL = False
3056
  _FIELDS_DYNAMIC = utils.FieldSet()
3057
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
3058

    
3059
  def ExpandNames(self):
3060
    self.needed_locks = {}
3061

    
3062
    _CheckOutputFields(static=self._FIELDS_STATIC,
3063
                       dynamic=self._FIELDS_DYNAMIC,
3064
                       selected=self.op.output_fields)
3065

    
3066
  def CheckPrereq(self):
3067
    """No prerequisites.
3068

3069
    """
3070
    pass
3071

    
3072
  def Exec(self, feedback_fn):
3073
    """Dump a representation of the cluster config to the standard output.
3074

3075
    """
3076
    values = []
3077
    for field in self.op.output_fields:
3078
      if field == "cluster_name":
3079
        entry = self.cfg.GetClusterName()
3080
      elif field == "master_node":
3081
        entry = self.cfg.GetMasterNode()
3082
      elif field == "drain_flag":
3083
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3084
      else:
3085
        raise errors.ParameterError(field)
3086
      values.append(entry)
3087
    return values
3088

    
3089

    
3090
class LUActivateInstanceDisks(NoHooksLU):
3091
  """Bring up an instance's disks.
3092

3093
  """
3094
  _OP_REQP = ["instance_name"]
3095
  REQ_BGL = False
3096

    
3097
  def ExpandNames(self):
3098
    self._ExpandAndLockInstance()
3099
    self.needed_locks[locking.LEVEL_NODE] = []
3100
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3101

    
3102
  def DeclareLocks(self, level):
3103
    if level == locking.LEVEL_NODE:
3104
      self._LockInstancesNodes()
3105

    
3106
  def CheckPrereq(self):
3107
    """Check prerequisites.
3108

3109
    This checks that the instance is in the cluster.
3110

3111
    """
3112
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3113
    assert self.instance is not None, \
3114
      "Cannot retrieve locked instance %s" % self.op.instance_name
3115
    _CheckNodeOnline(self, self.instance.primary_node)
3116
    if not hasattr(self.op, "ignore_size"):
3117
      self.op.ignore_size = False
3118

    
3119
  def Exec(self, feedback_fn):
3120
    """Activate the disks.
3121

3122
    """
3123
    disks_ok, disks_info = \
3124
              _AssembleInstanceDisks(self, self.instance,
3125
                                     ignore_size=self.op.ignore_size)
3126
    if not disks_ok:
3127
      raise errors.OpExecError("Cannot activate block devices")
3128

    
3129
    return disks_info
3130

    
3131

    
3132
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3133
                           ignore_size=False):
3134
  """Prepare the block devices for an instance.
3135

3136
  This sets up the block devices on all nodes.
3137

3138
  @type lu: L{LogicalUnit}
3139
  @param lu: the logical unit on whose behalf we execute
3140
  @type instance: L{objects.Instance}
3141
  @param instance: the instance for whose disks we assemble
3142
  @type ignore_secondaries: boolean
3143
  @param ignore_secondaries: if true, errors on secondary nodes
3144
      won't result in an error return from the function
3145
  @type ignore_size: boolean
3146
  @param ignore_size: if true, the current known size of the disk
3147
      will not be used during the disk activation, useful for cases
3148
      when the size is wrong
3149
  @return: False if the operation failed, otherwise a list of
3150
      (host, instance_visible_name, node_visible_name)
3151
      with the mapping from node devices to instance devices
3152

3153
  """
3154
  device_info = []
3155
  disks_ok = True
3156
  iname = instance.name
3157
  # With the two passes mechanism we try to reduce the window of
3158
  # opportunity for the race condition of switching DRBD to primary
3159
  # before handshaking occured, but we do not eliminate it
3160

    
3161
  # The proper fix would be to wait (with some limits) until the
3162
  # connection has been made and drbd transitions from WFConnection
3163
  # into any other network-connected state (Connected, SyncTarget,
3164
  # SyncSource, etc.)
3165

    
3166
  # 1st pass, assemble on all nodes in secondary mode
3167
  for inst_disk in instance.disks:
3168
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3169
      if ignore_size:
3170
        node_disk = node_disk.Copy()
3171
        node_disk.UnsetSize()
3172
      lu.cfg.SetDiskID(node_disk, node)
3173
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3174
      msg = result.fail_msg
3175
      if msg:
3176
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3177
                           " (is_primary=False, pass=1): %s",
3178
                           inst_disk.iv_name, node, msg)
3179
        if not ignore_secondaries:
3180
          disks_ok = False
3181

    
3182
  # FIXME: race condition on drbd migration to primary
3183

    
3184
  # 2nd pass, do only the primary node
3185
  for inst_disk in instance.disks:
3186
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3187
      if node != instance.primary_node:
3188
        continue
3189
      if ignore_size:
3190
        node_disk = node_disk.Copy()
3191
        node_disk.UnsetSize()
3192
      lu.cfg.SetDiskID(node_disk, node)
3193
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3194
      msg = result.fail_msg
3195
      if msg:
3196
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3197
                           " (is_primary=True, pass=2): %s",
3198
                           inst_disk.iv_name, node, msg)
3199
        disks_ok = False
3200
    device_info.append((instance.primary_node, inst_disk.iv_name,
3201
                        result.payload))
3202

    
3203
  # leave the disks configured for the primary node
3204
  # this is a workaround that would be fixed better by
3205
  # improving the logical/physical id handling
3206
  for disk in instance.disks:
3207
    lu.cfg.SetDiskID(disk, instance.primary_node)
3208

    
3209
  return disks_ok, device_info
3210

    
3211

    
3212
def _StartInstanceDisks(lu, instance, force):
3213
  """Start the disks of an instance.
3214

3215
  """
3216
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3217
                                           ignore_secondaries=force)
3218
  if not disks_ok:
3219
    _ShutdownInstanceDisks(lu, instance)
3220
    if force is not None and not force:
3221
      lu.proc.LogWarning("", hint="If the message above refers to a"
3222
                         " secondary node,"
3223
                         " you can retry the operation using '--force'.")
3224
    raise errors.OpExecError("Disk consistency error")
3225

    
3226

    
3227
class LUDeactivateInstanceDisks(NoHooksLU):
3228
  """Shutdown an instance's disks.
3229

3230
  """
3231
  _OP_REQP = ["instance_name"]
3232
  REQ_BGL = False
3233

    
3234
  def ExpandNames(self):
3235
    self._ExpandAndLockInstance()
3236
    self.needed_locks[locking.LEVEL_NODE] = []
3237
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3238

    
3239
  def DeclareLocks(self, level):
3240
    if level == locking.LEVEL_NODE:
3241
      self._LockInstancesNodes()
3242

    
3243
  def CheckPrereq(self):
3244
    """Check prerequisites.
3245

3246
    This checks that the instance is in the cluster.
3247

3248
    """
3249
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3250
    assert self.instance is not None, \
3251
      "Cannot retrieve locked instance %s" % self.op.instance_name
3252

    
3253
  def Exec(self, feedback_fn):
3254
    """Deactivate the disks
3255

3256
    """
3257
    instance = self.instance
3258
    _SafeShutdownInstanceDisks(self, instance)
3259

    
3260

    
3261
def _SafeShutdownInstanceDisks(lu, instance):
3262
  """Shutdown block devices of an instance.
3263

3264
  This function checks if an instance is running, before calling
3265
  _ShutdownInstanceDisks.
3266

3267
  """
3268
  pnode = instance.primary_node
3269
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3270
  ins_l.Raise("Can't contact node %s" % pnode)
3271

    
3272
  if instance.name in ins_l.payload:
3273
    raise errors.OpExecError("Instance is running, can't shutdown"
3274
                             " block devices.")
3275

    
3276
  _ShutdownInstanceDisks(lu, instance)
3277

    
3278

    
3279
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3280
  """Shutdown block devices of an instance.
3281

3282
  This does the shutdown on all nodes of the instance.
3283

3284
  If the ignore_primary is false, errors on the primary node are
3285
  ignored.
3286

3287
  """
3288
  all_result = True
3289
  for disk in instance.disks:
3290
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3291
      lu.cfg.SetDiskID(top_disk, node)
3292
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3293
      msg = result.fail_msg
3294
      if msg:
3295
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3296
                      disk.iv_name, node, msg)
3297
        if not ignore_primary or node != instance.primary_node:
3298
          all_result = False
3299
  return all_result
3300

    
3301

    
3302
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3303
  """Checks if a node has enough free memory.
3304

3305
  This function check if a given node has the needed amount of free
3306
  memory. In case the node has less memory or we cannot get the
3307
  information from the node, this function raise an OpPrereqError
3308
  exception.
3309

3310
  @type lu: C{LogicalUnit}
3311
  @param lu: a logical unit from which we get configuration data
3312
  @type node: C{str}
3313
  @param node: the node to check
3314
  @type reason: C{str}
3315
  @param reason: string to use in the error message
3316
  @type requested: C{int}
3317
  @param requested: the amount of memory in MiB to check for
3318
  @type hypervisor_name: C{str}
3319
  @param hypervisor_name: the hypervisor to ask for memory stats
3320
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3321
      we cannot check the node
3322

3323
  """
3324
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3325
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3326
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3327
  if not isinstance(free_mem, int):
3328
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3329
                               " was '%s'" % (node, free_mem))
3330
  if requested > free_mem:
3331
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3332
                               " needed %s MiB, available %s MiB" %
3333
                               (node, reason, requested, free_mem))
3334

    
3335

    
3336
class LUStartupInstance(LogicalUnit):
3337
  """Starts an instance.
3338

3339
  """
3340
  HPATH = "instance-start"
3341
  HTYPE = constants.HTYPE_INSTANCE
3342
  _OP_REQP = ["instance_name", "force"]
3343
  REQ_BGL = False
3344

    
3345
  def ExpandNames(self):
3346
    self._ExpandAndLockInstance()
3347

    
3348
  def BuildHooksEnv(self):
3349
    """Build hooks env.
3350

3351
    This runs on master, primary and secondary nodes of the instance.
3352

3353
    """
3354
    env = {
3355
      "FORCE": self.op.force,
3356
      }
3357
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3358
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3359
    return env, nl, nl
3360

    
3361
  def CheckPrereq(self):
3362
    """Check prerequisites.
3363

3364
    This checks that the instance is in the cluster.
3365

3366
    """
3367
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3368
    assert self.instance is not None, \
3369
      "Cannot retrieve locked instance %s" % self.op.instance_name
3370

    
3371
    # extra beparams
3372
    self.beparams = getattr(self.op, "beparams", {})
3373
    if self.beparams:
3374
      if not isinstance(self.beparams, dict):
3375
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3376
                                   " dict" % (type(self.beparams), ))
3377
      # fill the beparams dict
3378
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3379
      self.op.beparams = self.beparams
3380

    
3381
    # extra hvparams
3382
    self.hvparams = getattr(self.op, "hvparams", {})
3383
    if self.hvparams:
3384
      if not isinstance(self.hvparams, dict):
3385
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3386
                                   " dict" % (type(self.hvparams), ))
3387

    
3388
      # check hypervisor parameter syntax (locally)
3389
      cluster = self.cfg.GetClusterInfo()
3390
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3391
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3392
                                    instance.hvparams)
3393
      filled_hvp.update(self.hvparams)
3394
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3395
      hv_type.CheckParameterSyntax(filled_hvp)
3396
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3397
      self.op.hvparams = self.hvparams
3398

    
3399
    _CheckNodeOnline(self, instance.primary_node)
3400

    
3401
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3402
    # check bridges existence
3403
    _CheckInstanceBridgesExist(self, instance)
3404

    
3405
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3406
                                              instance.name,
3407
                                              instance.hypervisor)
3408
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3409
                      prereq=True)
3410
    if not remote_info.payload: # not running already
3411
      _CheckNodeFreeMemory(self, instance.primary_node,
3412
                           "starting instance %s" % instance.name,
3413
                           bep[constants.BE_MEMORY], instance.hypervisor)
3414

    
3415
  def Exec(self, feedback_fn):
3416
    """Start the instance.
3417

3418
    """
3419
    instance = self.instance
3420
    force = self.op.force
3421

    
3422
    self.cfg.MarkInstanceUp(instance.name)
3423

    
3424
    node_current = instance.primary_node
3425

    
3426
    _StartInstanceDisks(self, instance, force)
3427

    
3428
    result = self.rpc.call_instance_start(node_current, instance,
3429
                                          self.hvparams, self.beparams)
3430
    msg = result.fail_msg
3431
    if msg:
3432
      _ShutdownInstanceDisks(self, instance)
3433
      raise errors.OpExecError("Could not start instance: %s" % msg)
3434

    
3435

    
3436
class LURebootInstance(LogicalUnit):
3437
  """Reboot an instance.
3438

3439
  """
3440
  HPATH = "instance-reboot"
3441
  HTYPE = constants.HTYPE_INSTANCE
3442
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3443
  REQ_BGL = False
3444

    
3445
  def ExpandNames(self):
3446
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3447
                                   constants.INSTANCE_REBOOT_HARD,
3448
                                   constants.INSTANCE_REBOOT_FULL]:
3449
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3450
                                  (constants.INSTANCE_REBOOT_SOFT,
3451
                                   constants.INSTANCE_REBOOT_HARD,
3452
                                   constants.INSTANCE_REBOOT_FULL))
3453
    self._ExpandAndLockInstance()
3454

    
3455
  def BuildHooksEnv(self):
3456
    """Build hooks env.
3457

3458
    This runs on master, primary and secondary nodes of the instance.
3459

3460
    """
3461
    env = {
3462
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3463
      "REBOOT_TYPE": self.op.reboot_type,
3464
      }
3465
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3466
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3467
    return env, nl, nl
3468

    
3469
  def CheckPrereq(self):
3470
    """Check prerequisites.
3471

3472
    This checks that the instance is in the cluster.
3473

3474
    """
3475
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3476
    assert self.instance is not None, \
3477
      "Cannot retrieve locked instance %s" % self.op.instance_name
3478

    
3479
    _CheckNodeOnline(self, instance.primary_node)
3480

    
3481
    # check bridges existence
3482
    _CheckInstanceBridgesExist(self, instance)
3483

    
3484
  def Exec(self, feedback_fn):
3485
    """Reboot the instance.
3486

3487
    """
3488
    instance = self.instance
3489
    ignore_secondaries = self.op.ignore_secondaries
3490
    reboot_type = self.op.reboot_type
3491

    
3492
    node_current = instance.primary_node
3493

    
3494
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3495
                       constants.INSTANCE_REBOOT_HARD]:
3496
      for disk in instance.disks:
3497
        self.cfg.SetDiskID(disk, node_current)
3498
      result = self.rpc.call_instance_reboot(node_current, instance,
3499
                                             reboot_type)
3500
      result.Raise("Could not reboot instance")
3501
    else:
3502
      result = self.rpc.call_instance_shutdown(node_current, instance)
3503
      result.Raise("Could not shutdown instance for full reboot")
3504
      _ShutdownInstanceDisks(self, instance)
3505
      _StartInstanceDisks(self, instance, ignore_secondaries)
3506
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3507
      msg = result.fail_msg
3508
      if msg:
3509
        _ShutdownInstanceDisks(self, instance)
3510
        raise errors.OpExecError("Could not start instance for"
3511
                                 " full reboot: %s" % msg)
3512

    
3513
    self.cfg.MarkInstanceUp(instance.name)
3514

    
3515

    
3516
class LUShutdownInstance(LogicalUnit):
3517
  """Shutdown an instance.
3518

3519
  """
3520
  HPATH = "instance-stop"
3521
  HTYPE = constants.HTYPE_INSTANCE
3522
  _OP_REQP = ["instance_name"]
3523
  REQ_BGL = False
3524

    
3525
  def ExpandNames(self):
3526
    self._ExpandAndLockInstance()
3527

    
3528
  def BuildHooksEnv(self):
3529
    """Build hooks env.
3530

3531
    This runs on master, primary and secondary nodes of the instance.
3532

3533
    """
3534
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3535
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3536
    return env, nl, nl
3537

    
3538
  def CheckPrereq(self):
3539
    """Check prerequisites.
3540

3541
    This checks that the instance is in the cluster.
3542

3543
    """
3544
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3545
    assert self.instance is not None, \
3546
      "Cannot retrieve locked instance %s" % self.op.instance_name
3547
    _CheckNodeOnline(self, self.instance.primary_node)
3548

    
3549
  def Exec(self, feedback_fn):
3550
    """Shutdown the instance.
3551

3552
    """
3553
    instance = self.instance
3554
    node_current = instance.primary_node
3555
    self.cfg.MarkInstanceDown(instance.name)
3556
    result = self.rpc.call_instance_shutdown(node_current, instance)
3557
    msg = result.fail_msg
3558
    if msg:
3559
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3560

    
3561
    _ShutdownInstanceDisks(self, instance)
3562

    
3563

    
3564
class LUReinstallInstance(LogicalUnit):
3565
  """Reinstall an instance.
3566

3567
  """
3568
  HPATH = "instance-reinstall"
3569
  HTYPE = constants.HTYPE_INSTANCE
3570
  _OP_REQP = ["instance_name"]
3571
  REQ_BGL = False
3572

    
3573
  def ExpandNames(self):
3574
    self._ExpandAndLockInstance()
3575

    
3576
  def BuildHooksEnv(self):
3577
    """Build hooks env.
3578

3579
    This runs on master, primary and secondary nodes of the instance.
3580

3581
    """
3582
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3583
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3584
    return env, nl, nl
3585

    
3586
  def CheckPrereq(self):
3587
    """Check prerequisites.
3588

3589
    This checks that the instance is in the cluster and is not running.
3590

3591
    """
3592
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3593
    assert instance is not None, \
3594
      "Cannot retrieve locked instance %s" % self.op.instance_name
3595
    _CheckNodeOnline(self, instance.primary_node)
3596

    
3597
    if instance.disk_template == constants.DT_DISKLESS:
3598
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3599
                                 self.op.instance_name)
3600
    if instance.admin_up:
3601
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3602
                                 self.op.instance_name)
3603
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3604
                                              instance.name,
3605
                                              instance.hypervisor)
3606
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3607
                      prereq=True)
3608
    if remote_info.payload:
3609
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3610
                                 (self.op.instance_name,
3611
                                  instance.primary_node))
3612

    
3613
    self.op.os_type = getattr(self.op, "os_type", None)
3614
    if self.op.os_type is not None:
3615
      # OS verification
3616
      pnode = self.cfg.GetNodeInfo(
3617
        self.cfg.ExpandNodeName(instance.primary_node))
3618
      if pnode is None:
3619
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3620
                                   self.op.pnode)
3621
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3622
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3623
                   (self.op.os_type, pnode.name), prereq=True)
3624

    
3625
    self.instance = instance
3626

    
3627
  def Exec(self, feedback_fn):
3628
    """Reinstall the instance.
3629

3630
    """
3631
    inst = self.instance
3632

    
3633
    if self.op.os_type is not None:
3634
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3635
      inst.os = self.op.os_type
3636
      self.cfg.Update(inst)
3637

    
3638
    _StartInstanceDisks(self, inst, None)
3639
    try:
3640
      feedback_fn("Running the instance OS create scripts...")
3641
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3642
      result.Raise("Could not install OS for instance %s on node %s" %
3643
                   (inst.name, inst.primary_node))
3644
    finally:
3645
      _ShutdownInstanceDisks(self, inst)
3646

    
3647

    
3648
class LURecreateInstanceDisks(LogicalUnit):
3649
  """Recreate an instance's missing disks.
3650

3651
  """
3652
  HPATH = "instance-recreate-disks"
3653
  HTYPE = constants.HTYPE_INSTANCE
3654
  _OP_REQP = ["instance_name", "disks"]
3655
  REQ_BGL = False
3656

    
3657
  def CheckArguments(self):
3658
    """Check the arguments.
3659

3660
    """
3661
    if not isinstance(self.op.disks, list):
3662
      raise errors.OpPrereqError("Invalid disks parameter")
3663
    for item in self.op.disks:
3664
      if (not isinstance(item, int) or
3665
          item < 0):
3666
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3667
                                   str(item))
3668

    
3669
  def ExpandNames(self):
3670
    self._ExpandAndLockInstance()
3671

    
3672
  def BuildHooksEnv(self):
3673
    """Build hooks env.
3674

3675
    This runs on master, primary and secondary nodes of the instance.
3676

3677
    """
3678
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3679
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3680
    return env, nl, nl
3681

    
3682
  def CheckPrereq(self):
3683
    """Check prerequisites.
3684

3685
    This checks that the instance is in the cluster and is not running.
3686

3687
    """
3688
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3689
    assert instance is not None, \
3690
      "Cannot retrieve locked instance %s" % self.op.instance_name
3691
    _CheckNodeOnline(self, instance.primary_node)
3692

    
3693
    if instance.disk_template == constants.DT_DISKLESS:
3694
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3695
                                 self.op.instance_name)
3696
    if instance.admin_up:
3697
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3698
                                 self.op.instance_name)
3699
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3700
                                              instance.name,
3701
                                              instance.hypervisor)
3702
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3703
                      prereq=True)
3704
    if remote_info.payload:
3705
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3706
                                 (self.op.instance_name,
3707
                                  instance.primary_node))
3708

    
3709
    if not self.op.disks:
3710
      self.op.disks = range(len(instance.disks))
3711
    else:
3712
      for idx in self.op.disks:
3713
        if idx >= len(instance.disks):
3714
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3715

    
3716
    self.instance = instance
3717

    
3718
  def Exec(self, feedback_fn):
3719
    """Recreate the disks.
3720

3721
    """
3722
    to_skip = []
3723
    for idx, disk in enumerate(self.instance.disks):
3724
      if idx not in self.op.disks: # disk idx has not been passed in
3725
        to_skip.append(idx)
3726
        continue
3727

    
3728
    _CreateDisks(self, self.instance, to_skip=to_skip)
3729

    
3730

    
3731
class LURenameInstance(LogicalUnit):
3732
  """Rename an instance.
3733

3734
  """
3735
  HPATH = "instance-rename"
3736
  HTYPE = constants.HTYPE_INSTANCE
3737
  _OP_REQP = ["instance_name", "new_name"]
3738

    
3739
  def BuildHooksEnv(self):
3740
    """Build hooks env.
3741

3742
    This runs on master, primary and secondary nodes of the instance.
3743

3744
    """
3745
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3746
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3747
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3748
    return env, nl, nl
3749

    
3750
  def CheckPrereq(self):
3751
    """Check prerequisites.
3752

3753
    This checks that the instance is in the cluster and is not running.
3754

3755
    """
3756
    instance = self.cfg.GetInstanceInfo(
3757
      self.cfg.ExpandInstanceName(self.op.instance_name))
3758
    if instance is None:
3759
      raise errors.OpPrereqError("Instance '%s' not known" %
3760
                                 self.op.instance_name)
3761
    _CheckNodeOnline(self, instance.primary_node)
3762

    
3763
    if instance.admin_up:
3764
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3765
                                 self.op.instance_name)
3766
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3767
                                              instance.name,
3768
                                              instance.hypervisor)
3769
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3770
                      prereq=True)
3771
    if remote_info.payload:
3772
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3773
                                 (self.op.instance_name,
3774
                                  instance.primary_node))
3775
    self.instance = instance
3776

    
3777
    # new name verification
3778
    name_info = utils.HostInfo(self.op.new_name)
3779

    
3780
    self.op.new_name = new_name = name_info.name
3781
    instance_list = self.cfg.GetInstanceList()
3782
    if new_name in instance_list:
3783
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3784
                                 new_name)
3785

    
3786
    if not getattr(self.op, "ignore_ip", False):
3787
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3788
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3789
                                   (name_info.ip, new_name))
3790

    
3791

    
3792
  def Exec(self, feedback_fn):
3793
    """Reinstall the instance.
3794

3795
    """
3796
    inst = self.instance
3797
    old_name = inst.name
3798

    
3799
    if inst.disk_template == constants.DT_FILE:
3800
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3801

    
3802
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3803
    # Change the instance lock. This is definitely safe while we hold the BGL
3804
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3805
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3806

    
3807
    # re-read the instance from the configuration after rename
3808
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3809

    
3810
    if inst.disk_template == constants.DT_FILE:
3811
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3812
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3813
                                                     old_file_storage_dir,
3814
                                                     new_file_storage_dir)
3815
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3816
                   " (but the instance has been renamed in Ganeti)" %
3817
                   (inst.primary_node, old_file_storage_dir,
3818
                    new_file_storage_dir))
3819

    
3820
    _StartInstanceDisks(self, inst, None)
3821
    try:
3822
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3823
                                                 old_name)
3824
      msg = result.fail_msg
3825
      if msg:
3826
        msg = ("Could not run OS rename script for instance %s on node %s"
3827
               " (but the instance has been renamed in Ganeti): %s" %
3828
               (inst.name, inst.primary_node, msg))
3829
        self.proc.LogWarning(msg)
3830
    finally:
3831
      _ShutdownInstanceDisks(self, inst)
3832

    
3833

    
3834
class LURemoveInstance(LogicalUnit):
3835
  """Remove an instance.
3836

3837
  """
3838
  HPATH = "instance-remove"
3839
  HTYPE = constants.HTYPE_INSTANCE
3840
  _OP_REQP = ["instance_name", "ignore_failures"]
3841
  REQ_BGL = False
3842

    
3843
  def ExpandNames(self):
3844
    self._ExpandAndLockInstance()
3845
    self.needed_locks[locking.LEVEL_NODE] = []
3846
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3847

    
3848
  def DeclareLocks(self, level):
3849
    if level == locking.LEVEL_NODE:
3850
      self._LockInstancesNodes()
3851

    
3852
  def BuildHooksEnv(self):
3853
    """Build hooks env.
3854

3855
    This runs on master, primary and secondary nodes of the instance.
3856

3857
    """
3858
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3859
    nl = [self.cfg.GetMasterNode()]
3860
    return env, nl, nl
3861

    
3862
  def CheckPrereq(self):
3863
    """Check prerequisites.
3864

3865
    This checks that the instance is in the cluster.
3866

3867
    """
3868
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3869
    assert self.instance is not None, \
3870
      "Cannot retrieve locked instance %s" % self.op.instance_name
3871

    
3872
  def Exec(self, feedback_fn):
3873
    """Remove the instance.
3874

3875
    """
3876
    instance = self.instance
3877
    logging.info("Shutting down instance %s on node %s",
3878
                 instance.name, instance.primary_node)
3879

    
3880
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3881
    msg = result.fail_msg
3882
    if msg:
3883
      if self.op.ignore_failures:
3884
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3885
      else:
3886
        raise errors.OpExecError("Could not shutdown instance %s on"
3887
                                 " node %s: %s" %
3888
                                 (instance.name, instance.primary_node, msg))
3889

    
3890
    logging.info("Removing block devices for instance %s", instance.name)
3891

    
3892
    if not _RemoveDisks(self, instance):
3893
      if self.op.ignore_failures:
3894
        feedback_fn("Warning: can't remove instance's disks")
3895
      else:
3896
        raise errors.OpExecError("Can't remove instance's disks")
3897

    
3898
    logging.info("Removing instance %s out of cluster config", instance.name)
3899

    
3900
    self.cfg.RemoveInstance(instance.name)
3901
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3902

    
3903

    
3904
class LUQueryInstances(NoHooksLU):
3905
  """Logical unit for querying instances.
3906

3907
  """
3908
  _OP_REQP = ["output_fields", "names", "use_locking"]
3909
  REQ_BGL = False
3910
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3911
                                    "admin_state",
3912
                                    "disk_template", "ip", "mac", "bridge",
3913
                                    "nic_mode", "nic_link",
3914
                                    "sda_size", "sdb_size", "vcpus", "tags",
3915
                                    "network_port", "beparams",
3916
                                    r"(disk)\.(size)/([0-9]+)",
3917
                                    r"(disk)\.(sizes)", "disk_usage",
3918
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3919
                                    r"(nic)\.(bridge)/([0-9]+)",
3920
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3921
                                    r"(disk|nic)\.(count)",
3922
                                    "serial_no", "hypervisor", "hvparams",] +
3923
                                  ["hv/%s" % name
3924
                                   for name in constants.HVS_PARAMETERS] +
3925
                                  ["be/%s" % name
3926
                                   for name in constants.BES_PARAMETERS])
3927
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3928

    
3929

    
3930
  def ExpandNames(self):
3931
    _CheckOutputFields(static=self._FIELDS_STATIC,
3932
                       dynamic=self._FIELDS_DYNAMIC,
3933
                       selected=self.op.output_fields)
3934

    
3935
    self.needed_locks = {}
3936
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3937
    self.share_locks[locking.LEVEL_NODE] = 1
3938

    
3939
    if self.op.names:
3940
      self.wanted = _GetWantedInstances(self, self.op.names)
3941
    else:
3942
      self.wanted = locking.ALL_SET
3943

    
3944
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3945
    self.do_locking = self.do_node_query and self.op.use_locking
3946
    if self.do_locking:
3947
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3948
      self.needed_locks[locking.LEVEL_NODE] = []
3949
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3950

    
3951
  def DeclareLocks(self, level):
3952
    if level == locking.LEVEL_NODE and self.do_locking:
3953
      self._LockInstancesNodes()
3954

    
3955
  def CheckPrereq(self):
3956
    """Check prerequisites.
3957

3958
    """
3959
    pass
3960

    
3961
  def Exec(self, feedback_fn):
3962
    """Computes the list of nodes and their attributes.
3963

3964
    """
3965
    all_info = self.cfg.GetAllInstancesInfo()
3966
    if self.wanted == locking.ALL_SET:
3967
      # caller didn't specify instance names, so ordering is not important
3968
      if self.do_locking:
3969
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3970
      else:
3971
        instance_names = all_info.keys()
3972
      instance_names = utils.NiceSort(instance_names)
3973
    else:
3974
      # caller did specify names, so we must keep the ordering
3975
      if self.do_locking:
3976
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3977
      else:
3978
        tgt_set = all_info.keys()
3979
      missing = set(self.wanted).difference(tgt_set)
3980
      if missing:
3981
        raise errors.OpExecError("Some instances were removed before"
3982
                                 " retrieving their data: %s" % missing)
3983
      instance_names = self.wanted
3984

    
3985
    instance_list = [all_info[iname] for iname in instance_names]
3986

    
3987
    # begin data gathering
3988

    
3989
    nodes = frozenset([inst.primary_node for inst in instance_list])
3990
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3991

    
3992
    bad_nodes = []
3993
    off_nodes = []
3994
    if self.do_node_query:
3995
      live_data = {}
3996
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
3997
      for name in nodes:
3998
        result = node_data[name]
3999
        if result.offline:
4000
          # offline nodes will be in both lists
4001
          off_nodes.append(name)
4002
        if result.failed or result.fail_msg:
4003
          bad_nodes.append(name)
4004
        else:
4005
          if result.payload:
4006
            live_data.update(result.payload)
4007
          # else no instance is alive
4008
    else:
4009
      live_data = dict([(name, {}) for name in instance_names])
4010

    
4011
    # end data gathering
4012

    
4013
    HVPREFIX = "hv/"
4014
    BEPREFIX = "be/"
4015
    output = []
4016
    cluster = self.cfg.GetClusterInfo()
4017
    for instance in instance_list:
4018
      iout = []
4019
      i_hv = cluster.FillHV(instance)
4020
      i_be = cluster.FillBE(instance)
4021
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4022
                                 nic.nicparams) for nic in instance.nics]
4023
      for field in self.op.output_fields:
4024
        st_match = self._FIELDS_STATIC.Matches(field)
4025
        if field == "name":
4026
          val = instance.name
4027
        elif field == "os":
4028
          val = instance.os
4029
        elif field == "pnode":
4030
          val = instance.primary_node
4031
        elif field == "snodes":
4032
          val = list(instance.secondary_nodes)
4033
        elif field == "admin_state":
4034
          val = instance.admin_up
4035
        elif field == "oper_state":
4036
          if instance.primary_node in bad_nodes:
4037
            val = None
4038
          else:
4039
            val = bool(live_data.get(instance.name))
4040
        elif field == "status":
4041
          if instance.primary_node in off_nodes:
4042
            val = "ERROR_nodeoffline"
4043
          elif instance.primary_node in bad_nodes:
4044
            val = "ERROR_nodedown"
4045
          else:
4046
            running = bool(live_data.get(instance.name))
4047
            if running:
4048
              if instance.admin_up:
4049
                val = "running"
4050
              else:
4051
                val = "ERROR_up"
4052
            else:
4053
              if instance.admin_up:
4054
                val = "ERROR_down"
4055
              else:
4056
                val = "ADMIN_down"
4057
        elif field == "oper_ram":
4058
          if instance.primary_node in bad_nodes:
4059
            val = None
4060
          elif instance.name in live_data:
4061
            val = live_data[instance.name].get("memory", "?")
4062
          else:
4063
            val = "-"
4064
        elif field == "vcpus":
4065
          val = i_be[constants.BE_VCPUS]
4066
        elif field == "disk_template":
4067
          val = instance.disk_template
4068
        elif field == "ip":
4069
          if instance.nics:
4070
            val = instance.nics[0].ip
4071
          else:
4072
            val = None
4073
        elif field == "nic_mode":
4074
          if instance.nics:
4075
            val = i_nicp[0][constants.NIC_MODE]
4076
          else:
4077
            val = None
4078
        elif field == "nic_link":
4079
          if instance.nics:
4080
            val = i_nicp[0][constants.NIC_LINK]
4081
          else:
4082
            val = None
4083
        elif field == "bridge":
4084
          if (instance.nics and
4085
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4086
            val = i_nicp[0][constants.NIC_LINK]
4087
          else:
4088
            val = None
4089
        elif field == "mac":
4090
          if instance.nics:
4091
            val = instance.nics[0].mac
4092
          else:
4093
            val = None
4094
        elif field == "sda_size" or field == "sdb_size":
4095
          idx = ord(field[2]) - ord('a')
4096
          try:
4097
            val = instance.FindDisk(idx).size
4098
          except errors.OpPrereqError:
4099
            val = None
4100
        elif field == "disk_usage": # total disk usage per node
4101
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4102
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4103
        elif field == "tags":
4104
          val = list(instance.GetTags())
4105
        elif field == "serial_no":
4106
          val = instance.serial_no
4107
        elif field == "network_port":
4108
          val = instance.network_port
4109
        elif field == "hypervisor":
4110
          val = instance.hypervisor
4111
        elif field == "hvparams":
4112
          val = i_hv
4113
        elif (field.startswith(HVPREFIX) and
4114
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4115
          val = i_hv.get(field[len(HVPREFIX):], None)
4116
        elif field == "beparams":
4117
          val = i_be
4118
        elif (field.startswith(BEPREFIX) and
4119
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4120
          val = i_be.get(field[len(BEPREFIX):], None)
4121
        elif st_match and st_match.groups():
4122
          # matches a variable list
4123
          st_groups = st_match.groups()
4124
          if st_groups and st_groups[0] == "disk":
4125
            if st_groups[1] == "count":
4126
              val = len(instance.disks)
4127
            elif st_groups[1] == "sizes":
4128
              val = [disk.size for disk in instance.disks]
4129
            elif st_groups[1] == "size":
4130
              try:
4131
                val = instance.FindDisk(st_groups[2]).size
4132
              except errors.OpPrereqError:
4133
                val = None
4134
            else:
4135
              assert False, "Unhandled disk parameter"
4136
          elif st_groups[0] == "nic":
4137
            if st_groups[1] == "count":
4138
              val = len(instance.nics)
4139
            elif st_groups[1] == "macs":
4140
              val = [nic.mac for nic in instance.nics]
4141
            elif st_groups[1] == "ips":
4142
              val = [nic.ip for nic in instance.nics]
4143
            elif st_groups[1] == "modes":
4144
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4145
            elif st_groups[1] == "links":
4146
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4147
            elif st_groups[1] == "bridges":
4148
              val = []
4149
              for nicp in i_nicp:
4150
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4151
                  val.append(nicp[constants.NIC_LINK])
4152
                else:
4153
                  val.append(None)
4154
            else:
4155
              # index-based item
4156
              nic_idx = int(st_groups[2])
4157
              if nic_idx >= len(instance.nics):
4158
                val = None
4159
              else:
4160
                if st_groups[1] == "mac":
4161
                  val = instance.nics[nic_idx].mac
4162
                elif st_groups[1] == "ip":
4163
                  val = instance.nics[nic_idx].ip
4164
                elif st_groups[1] == "mode":
4165
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4166
                elif st_groups[1] == "link":
4167
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4168
                elif st_groups[1] == "bridge":
4169
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4170
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4171
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4172
                  else:
4173
                    val = None
4174
                else:
4175
                  assert False, "Unhandled NIC parameter"
4176
          else:
4177
            assert False, ("Declared but unhandled variable parameter '%s'" %
4178
                           field)
4179
        else:
4180
          assert False, "Declared but unhandled parameter '%s'" % field
4181
        iout.append(val)
4182
      output.append(iout)
4183

    
4184
    return output
4185

    
4186

    
4187
class LUFailoverInstance(LogicalUnit):
4188
  """Failover an instance.
4189

4190
  """
4191
  HPATH = "instance-failover"
4192
  HTYPE = constants.HTYPE_INSTANCE
4193
  _OP_REQP = ["instance_name", "ignore_consistency"]
4194
  REQ_BGL = False
4195

    
4196
  def ExpandNames(self):
4197
    self._ExpandAndLockInstance()
4198
    self.needed_locks[locking.LEVEL_NODE] = []
4199
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4200

    
4201
  def DeclareLocks(self, level):
4202
    if level == locking.LEVEL_NODE:
4203
      self._LockInstancesNodes()
4204

    
4205
  def BuildHooksEnv(self):
4206
    """Build hooks env.
4207

4208
    This runs on master, primary and secondary nodes of the instance.
4209

4210
    """
4211
    env = {
4212
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4213
      }
4214
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4215
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4216
    return env, nl, nl
4217

    
4218
  def CheckPrereq(self):
4219
    """Check prerequisites.
4220

4221
    This checks that the instance is in the cluster.
4222

4223
    """
4224
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4225
    assert self.instance is not None, \
4226
      "Cannot retrieve locked instance %s" % self.op.instance_name
4227

    
4228
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4229
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4230
      raise errors.OpPrereqError("Instance's disk layout is not"
4231
                                 " network mirrored, cannot failover.")
4232

    
4233
    secondary_nodes = instance.secondary_nodes
4234
    if not secondary_nodes:
4235
      raise errors.ProgrammerError("no secondary node but using "
4236
                                   "a mirrored disk template")
4237

    
4238
    target_node = secondary_nodes[0]
4239
    _CheckNodeOnline(self, target_node)
4240
    _CheckNodeNotDrained(self, target_node)
4241
    if instance.admin_up:
4242
      # check memory requirements on the secondary node
4243
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4244
                           instance.name, bep[constants.BE_MEMORY],
4245
                           instance.hypervisor)
4246
    else:
4247
      self.LogInfo("Not checking memory on the secondary node as"
4248
                   " instance will not be started")
4249

    
4250
    # check bridge existance
4251
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4252

    
4253
  def Exec(self, feedback_fn):
4254
    """Failover an instance.
4255

4256
    The failover is done by shutting it down on its present node and
4257
    starting it on the secondary.
4258

4259
    """
4260
    instance = self.instance
4261

    
4262
    source_node = instance.primary_node
4263
    target_node = instance.secondary_nodes[0]
4264

    
4265
    feedback_fn("* checking disk consistency between source and target")
4266
    for dev in instance.disks:
4267
      # for drbd, these are drbd over lvm
4268
      if not _CheckDiskConsistency(self, dev, target_node, False):
4269
        if instance.admin_up and not self.op.ignore_consistency:
4270
          raise errors.OpExecError("Disk %s is degraded on target node,"
4271
                                   " aborting failover." % dev.iv_name)
4272

    
4273
    feedback_fn("* shutting down instance on source node")
4274
    logging.info("Shutting down instance %s on node %s",
4275
                 instance.name, source_node)
4276

    
4277
    result = self.rpc.call_instance_shutdown(source_node, instance)
4278
    msg = result.fail_msg
4279
    if msg:
4280
      if self.op.ignore_consistency:
4281
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4282
                             " Proceeding anyway. Please make sure node"
4283
                             " %s is down. Error details: %s",
4284
                             instance.name, source_node, source_node, msg)
4285
      else:
4286
        raise errors.OpExecError("Could not shutdown instance %s on"
4287
                                 " node %s: %s" %
4288
                                 (instance.name, source_node, msg))
4289

    
4290
    feedback_fn("* deactivating the instance's disks on source node")
4291
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4292
      raise errors.OpExecError("Can't shut down the instance's disks.")
4293

    
4294
    instance.primary_node = target_node
4295
    # distribute new instance config to the other nodes
4296
    self.cfg.Update(instance)
4297

    
4298
    # Only start the instance if it's marked as up
4299
    if instance.admin_up:
4300
      feedback_fn("* activating the instance's disks on target node")
4301
      logging.info("Starting instance %s on node %s",
4302
                   instance.name, target_node)
4303

    
4304
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4305
                                               ignore_secondaries=True)
4306
      if not disks_ok:
4307
        _ShutdownInstanceDisks(self, instance)
4308
        raise errors.OpExecError("Can't activate the instance's disks")
4309

    
4310
      feedback_fn("* starting the instance on the target node")
4311
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4312
      msg = result.fail_msg
4313
      if msg:
4314
        _ShutdownInstanceDisks(self, instance)
4315
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4316
                                 (instance.name, target_node, msg))
4317

    
4318

    
4319
class LUMigrateInstance(LogicalUnit):
4320
  """Migrate an instance.
4321

4322
  This is migration without shutting down, compared to the failover,
4323
  which is done with shutdown.
4324

4325
  """
4326
  HPATH = "instance-migrate"
4327
  HTYPE = constants.HTYPE_INSTANCE
4328
  _OP_REQP = ["instance_name", "live", "cleanup"]
4329

    
4330
  REQ_BGL = False
4331

    
4332
  def ExpandNames(self):
4333
    self._ExpandAndLockInstance()
4334

    
4335
    self.needed_locks[locking.LEVEL_NODE] = []
4336
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4337

    
4338
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4339
                                       self.op.live, self.op.cleanup)
4340
    self.tasklets = [self._migrater]
4341

    
4342
  def DeclareLocks(self, level):
4343
    if level == locking.LEVEL_NODE:
4344
      self._LockInstancesNodes()
4345

    
4346
  def BuildHooksEnv(self):
4347
    """Build hooks env.
4348

4349
    This runs on master, primary and secondary nodes of the instance.
4350

4351
    """
4352
    instance = self._migrater.instance
4353
    env = _BuildInstanceHookEnvByObject(self, instance)
4354
    env["MIGRATE_LIVE"] = self.op.live
4355
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4356
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4357
    return env, nl, nl
4358

    
4359

    
4360
class LUMigrateNode(LogicalUnit):
4361
  """Migrate all instances from a node.
4362

4363
  """
4364
  HPATH = "node-migrate"
4365
  HTYPE = constants.HTYPE_NODE
4366
  _OP_REQP = ["node_name", "live"]
4367
  REQ_BGL = False
4368

    
4369
  def ExpandNames(self):
4370
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4371
    if self.op.node_name is None:
4372
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4373

    
4374
    self.needed_locks = {
4375
      locking.LEVEL_NODE: [self.op.node_name],
4376
      }
4377

    
4378
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4379

    
4380
    # Create tasklets for migrating instances for all instances on this node
4381
    names = []
4382
    tasklets = []
4383

    
4384
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4385
      logging.debug("Migrating instance %s", inst.name)
4386
      names.append(inst.name)
4387

    
4388
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4389

    
4390
    self.tasklets = tasklets
4391

    
4392
    # Declare instance locks
4393
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4394

    
4395
  def DeclareLocks(self, level):
4396
    if level == locking.LEVEL_NODE:
4397
      self._LockInstancesNodes()
4398

    
4399
  def BuildHooksEnv(self):
4400
    """Build hooks env.
4401

4402
    This runs on the master, the primary and all the secondaries.
4403

4404
    """
4405
    env = {
4406
      "NODE_NAME": self.op.node_name,
4407
      }
4408

    
4409
    nl = [self.cfg.GetMasterNode()]
4410

    
4411
    return (env, nl, nl)
4412

    
4413

    
4414
class TLMigrateInstance(Tasklet):
4415
  def __init__(self, lu, instance_name, live, cleanup):
4416
    """Initializes this class.
4417

4418
    """
4419
    Tasklet.__init__(self, lu)
4420

    
4421
    # Parameters
4422
    self.instance_name = instance_name
4423
    self.live = live
4424
    self.cleanup = cleanup
4425

    
4426
  def CheckPrereq(self):
4427
    """Check prerequisites.
4428

4429
    This checks that the instance is in the cluster.
4430

4431
    """
4432
    instance = self.cfg.GetInstanceInfo(
4433
      self.cfg.ExpandInstanceName(self.instance_name))
4434
    if instance is None:
4435
      raise errors.OpPrereqError("Instance '%s' not known" %
4436
                                 self.instance_name)
4437

    
4438
    if instance.disk_template != constants.DT_DRBD8:
4439
      raise errors.OpPrereqError("Instance's disk layout is not"
4440
                                 " drbd8, cannot migrate.")
4441

    
4442
    secondary_nodes = instance.secondary_nodes
4443
    if not secondary_nodes:
4444
      raise errors.ConfigurationError("No secondary node but using"
4445
                                      " drbd8 disk template")
4446

    
4447
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4448

    
4449
    target_node = secondary_nodes[0]
4450
    # check memory requirements on the secondary node
4451
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4452
                         instance.name, i_be[constants.BE_MEMORY],
4453
                         instance.hypervisor)
4454

    
4455
    # check bridge existance
4456
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4457

    
4458
    if not self.cleanup:
4459
      _CheckNodeNotDrained(self, target_node)
4460
      result = self.rpc.call_instance_migratable(instance.primary_node,
4461
                                                 instance)
4462
      result.Raise("Can't migrate, please use failover", prereq=True)
4463

    
4464
    self.instance = instance
4465

    
4466
  def _WaitUntilSync(self):
4467
    """Poll with custom rpc for disk sync.
4468

4469
    This uses our own step-based rpc call.
4470

4471
    """
4472
    self.feedback_fn("* wait until resync is done")
4473
    all_done = False
4474
    while not all_done:
4475
      all_done = True
4476
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4477
                                            self.nodes_ip,
4478
                                            self.instance.disks)
4479
      min_percent = 100
4480
      for node, nres in result.items():
4481
        nres.Raise("Cannot resync disks on node %s" % node)
4482
        node_done, node_percent = nres.payload
4483
        all_done = all_done and node_done
4484
        if node_percent is not None:
4485
          min_percent = min(min_percent, node_percent)
4486
      if not all_done:
4487
        if min_percent < 100:
4488
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4489
        time.sleep(2)
4490

    
4491
  def _EnsureSecondary(self, node):
4492
    """Demote a node to secondary.
4493

4494
    """
4495
    self.feedback_fn("* switching node %s to secondary mode" % node)
4496

    
4497
    for dev in self.instance.disks:
4498
      self.cfg.SetDiskID(dev, node)
4499

    
4500
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4501
                                          self.instance.disks)
4502
    result.Raise("Cannot change disk to secondary on node %s" % node)
4503

    
4504
  def _GoStandalone(self):
4505
    """Disconnect from the network.
4506

4507
    """
4508
    self.feedback_fn("* changing into standalone mode")
4509
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4510
                                               self.instance.disks)
4511
    for node, nres in result.items():
4512
      nres.Raise("Cannot disconnect disks node %s" % node)
4513

    
4514
  def _GoReconnect(self, multimaster):
4515
    """Reconnect to the network.
4516

4517
    """
4518
    if multimaster:
4519
      msg = "dual-master"
4520
    else:
4521
      msg = "single-master"
4522
    self.feedback_fn("* changing disks into %s mode" % msg)
4523
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4524
                                           self.instance.disks,
4525
                                           self.instance.name, multimaster)
4526
    for node, nres in result.items():
4527
      nres.Raise("Cannot change disks config on node %s" % node)
4528

    
4529
  def _ExecCleanup(self):
4530
    """Try to cleanup after a failed migration.
4531

4532
    The cleanup is done by:
4533
      - check that the instance is running only on one node
4534
        (and update the config if needed)
4535
      - change disks on its secondary node to secondary
4536
      - wait until disks are fully synchronized
4537
      - disconnect from the network
4538
      - change disks into single-master mode
4539
      - wait again until disks are fully synchronized
4540

4541
    """
4542
    instance = self.instance
4543
    target_node = self.target_node
4544
    source_node = self.source_node
4545

    
4546
    # check running on only one node
4547
    self.feedback_fn("* checking where the instance actually runs"
4548
                     " (if this hangs, the hypervisor might be in"
4549
                     " a bad state)")
4550
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4551
    for node, result in ins_l.items():
4552
      result.Raise("Can't contact node %s" % node)
4553

    
4554
    runningon_source = instance.name in ins_l[source_node].payload
4555
    runningon_target = instance.name in ins_l[target_node].payload
4556

    
4557
    if runningon_source and runningon_target:
4558
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4559
                               " or the hypervisor is confused. You will have"
4560
                               " to ensure manually that it runs only on one"
4561
                               " and restart this operation.")
4562

    
4563
    if not (runningon_source or runningon_target):
4564
      raise errors.OpExecError("Instance does not seem to be running at all."
4565
                               " In this case, it's safer to repair by"
4566
                               " running 'gnt-instance stop' to ensure disk"
4567
                               " shutdown, and then restarting it.")
4568

    
4569
    if runningon_target:
4570
      # the migration has actually succeeded, we need to update the config
4571
      self.feedback_fn("* instance running on secondary node (%s),"
4572
                       " updating config" % target_node)
4573
      instance.primary_node = target_node
4574
      self.cfg.Update(instance)
4575
      demoted_node = source_node
4576
    else:
4577
      self.feedback_fn("* instance confirmed to be running on its"
4578
                       " primary node (%s)" % source_node)
4579
      demoted_node = target_node
4580

    
4581
    self._EnsureSecondary(demoted_node)
4582
    try:
4583
      self._WaitUntilSync()
4584
    except errors.OpExecError:
4585
      # we ignore here errors, since if the device is standalone, it
4586
      # won't be able to sync
4587
      pass
4588
    self._GoStandalone()
4589
    self._GoReconnect(False)
4590
    self._WaitUntilSync()
4591

    
4592
    self.feedback_fn("* done")
4593

    
4594
  def _RevertDiskStatus(self):
4595
    """Try to revert the disk status after a failed migration.
4596

4597
    """
4598
    target_node = self.target_node
4599
    try:
4600
      self._EnsureSecondary(target_node)
4601
      self._GoStandalone()
4602
      self._GoReconnect(False)
4603
      self._WaitUntilSync()
4604
    except errors.OpExecError, err:
4605
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4606
                         " drives: error '%s'\n"
4607
                         "Please look and recover the instance status" %
4608
                         str(err))
4609

    
4610
  def _AbortMigration(self):
4611
    """Call the hypervisor code to abort a started migration.
4612

4613
    """
4614
    instance = self.instance
4615
    target_node = self.target_node
4616
    migration_info = self.migration_info
4617

    
4618
    abort_result = self.rpc.call_finalize_migration(target_node,
4619
                                                    instance,
4620
                                                    migration_info,
4621
                                                    False)
4622
    abort_msg = abort_result.fail_msg
4623
    if abort_msg:
4624
      logging.error("Aborting migration failed on target node %s: %s" %
4625
                    (target_node, abort_msg))
4626
      # Don't raise an exception here, as we stil have to try to revert the
4627
      # disk status, even if this step failed.
4628

    
4629
  def _ExecMigration(self):
4630
    """Migrate an instance.
4631

4632
    The migrate is done by:
4633
      - change the disks into dual-master mode
4634
      - wait until disks are fully synchronized again
4635
      - migrate the instance
4636
      - change disks on the new secondary node (the old primary) to secondary
4637
      - wait until disks are fully synchronized
4638
      - change disks into single-master mode
4639

4640
    """
4641
    instance = self.instance
4642
    target_node = self.target_node
4643
    source_node = self.source_node
4644

    
4645
    self.feedback_fn("* checking disk consistency between source and target")
4646
    for dev in instance.disks:
4647
      if not _CheckDiskConsistency(self, dev, target_node, False):
4648
        raise errors.OpExecError("Disk %s is degraded or not fully"
4649
                                 " synchronized on target node,"
4650
                                 " aborting migrate." % dev.iv_name)
4651

    
4652
    # First get the migration information from the remote node
4653
    result = self.rpc.call_migration_info(source_node, instance)
4654
    msg = result.fail_msg
4655
    if msg:
4656
      log_err = ("Failed fetching source migration information from %s: %s" %
4657
                 (source_node, msg))
4658
      logging.error(log_err)
4659
      raise errors.OpExecError(log_err)
4660

    
4661
    self.migration_info = migration_info = result.payload
4662

    
4663
    # Then switch the disks to master/master mode
4664
    self._EnsureSecondary(target_node)
4665
    self._GoStandalone()
4666
    self._GoReconnect(True)
4667
    self._WaitUntilSync()
4668

    
4669
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4670
    result = self.rpc.call_accept_instance(target_node,
4671
                                           instance,
4672
                                           migration_info,
4673
                                           self.nodes_ip[target_node])
4674

    
4675
    msg = result.fail_msg
4676
    if msg:
4677
      logging.error("Instance pre-migration failed, trying to revert"
4678
                    " disk status: %s", msg)
4679
      self._AbortMigration()
4680
      self._RevertDiskStatus()
4681
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4682
                               (instance.name, msg))
4683

    
4684
    self.feedback_fn("* migrating instance to %s" % target_node)
4685
    time.sleep(10)
4686
    result = self.rpc.call_instance_migrate(source_node, instance,
4687
                                            self.nodes_ip[target_node],
4688
                                            self.live)
4689
    msg = result.fail_msg
4690
    if msg:
4691
      logging.error("Instance migration failed, trying to revert"
4692
                    " disk status: %s", msg)
4693
      self._AbortMigration()
4694
      self._RevertDiskStatus()
4695
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4696
                               (instance.name, msg))
4697
    time.sleep(10)
4698

    
4699
    instance.primary_node = target_node
4700
    # distribute new instance config to the other nodes
4701
    self.cfg.Update(instance)
4702

    
4703
    result = self.rpc.call_finalize_migration(target_node,
4704
                                              instance,
4705
                                              migration_info,
4706
                                              True)
4707
    msg = result.fail_msg
4708
    if msg:
4709
      logging.error("Instance migration succeeded, but finalization failed:"
4710
                    " %s" % msg)
4711
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4712
                               msg)
4713

    
4714
    self._EnsureSecondary(source_node)
4715
    self._WaitUntilSync()
4716
    self._GoStandalone()
4717
    self._GoReconnect(False)
4718
    self._WaitUntilSync()
4719

    
4720
    self.feedback_fn("* done")
4721

    
4722
  def Exec(self, feedback_fn):
4723
    """Perform the migration.
4724

4725
    """
4726
    feedback_fn("Migrating instance %s" % self.instance.name)
4727

    
4728
    self.feedback_fn = feedback_fn
4729

    
4730
    self.source_node = self.instance.primary_node
4731
    self.target_node = self.instance.secondary_nodes[0]
4732
    self.all_nodes = [self.source_node, self.target_node]
4733
    self.nodes_ip = {
4734
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4735
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4736
      }
4737

    
4738
    if self.cleanup:
4739
      return self._ExecCleanup()
4740
    else:
4741
      return self._ExecMigration()
4742

    
4743

    
4744
def _CreateBlockDev(lu, node, instance, device, force_create,
4745
                    info, force_open):
4746
  """Create a tree of block devices on a given node.
4747

4748
  If this device type has to be created on secondaries, create it and
4749
  all its children.
4750

4751
  If not, just recurse to children keeping the same 'force' value.
4752

4753
  @param lu: the lu on whose behalf we execute
4754
  @param node: the node on which to create the device
4755
  @type instance: L{objects.Instance}
4756
  @param instance: the instance which owns the device
4757
  @type device: L{objects.Disk}
4758
  @param device: the device to create
4759
  @type force_create: boolean
4760
  @param force_create: whether to force creation of this device; this
4761
      will be change to True whenever we find a device which has
4762
      CreateOnSecondary() attribute
4763
  @param info: the extra 'metadata' we should attach to the device
4764
      (this will be represented as a LVM tag)
4765
  @type force_open: boolean
4766
  @param force_open: this parameter will be passes to the
4767
      L{backend.BlockdevCreate} function where it specifies
4768
      whether we run on primary or not, and it affects both
4769
      the child assembly and the device own Open() execution
4770

4771
  """
4772
  if device.CreateOnSecondary():
4773
    force_create = True
4774

    
4775
  if device.children:
4776
    for child in device.children:
4777
      _CreateBlockDev(lu, node, instance, child, force_create,
4778
                      info, force_open)
4779

    
4780
  if not force_create:
4781
    return
4782

    
4783
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4784

    
4785

    
4786
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4787
  """Create a single block device on a given node.
4788

4789
  This will not recurse over children of the device, so they must be
4790
  created in advance.
4791

4792
  @param lu: the lu on whose behalf we execute
4793
  @param node: the node on which to create the device
4794
  @type instance: L{objects.Instance}
4795
  @param instance: the instance which owns the device
4796
  @type device: L{objects.Disk}
4797
  @param device: the device to create
4798
  @param info: the extra 'metadata' we should attach to the device
4799
      (this will be represented as a LVM tag)
4800
  @type force_open: boolean
4801
  @param force_open: this parameter will be passes to the
4802
      L{backend.BlockdevCreate} function where it specifies
4803
      whether we run on primary or not, and it affects both
4804
      the child assembly and the device own Open() execution
4805

4806
  """
4807
  lu.cfg.SetDiskID(device, node)
4808
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4809
                                       instance.name, force_open, info)
4810
  result.Raise("Can't create block device %s on"
4811
               " node %s for instance %s" % (device, node, instance.name))
4812
  if device.physical_id is None:
4813
    device.physical_id = result.payload
4814

    
4815

    
4816
def _GenerateUniqueNames(lu, exts):
4817
  """Generate a suitable LV name.
4818

4819
  This will generate a logical volume name for the given instance.
4820

4821
  """
4822
  results = []
4823
  for val in exts:
4824
    new_id = lu.cfg.GenerateUniqueID()
4825
    results.append("%s%s" % (new_id, val))
4826
  return results
4827

    
4828

    
4829
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4830
                         p_minor, s_minor):
4831
  """Generate a drbd8 device complete with its children.
4832

4833
  """
4834
  port = lu.cfg.AllocatePort()
4835
  vgname = lu.cfg.GetVGName()
4836
  shared_secret = lu.cfg.GenerateDRBDSecret()
4837
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4838
                          logical_id=(vgname, names[0]))
4839
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4840
                          logical_id=(vgname, names[1]))
4841
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4842
                          logical_id=(primary, secondary, port,
4843
                                      p_minor, s_minor,
4844
                                      shared_secret),
4845
                          children=[dev_data, dev_meta],
4846
                          iv_name=iv_name)
4847
  return drbd_dev
4848

    
4849

    
4850
def _GenerateDiskTemplate(lu, template_name,
4851
                          instance_name, primary_node,
4852
                          secondary_nodes, disk_info,
4853
                          file_storage_dir, file_driver,
4854
                          base_index):
4855
  """Generate the entire disk layout for a given template type.
4856

4857
  """
4858
  #TODO: compute space requirements
4859

    
4860
  vgname = lu.cfg.GetVGName()
4861
  disk_count = len(disk_info)
4862
  disks = []
4863
  if template_name == constants.DT_DISKLESS:
4864
    pass
4865
  elif template_name == constants.DT_PLAIN:
4866
    if len(secondary_nodes) != 0:
4867
      raise errors.ProgrammerError("Wrong template configuration")
4868

    
4869
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4870
                                      for i in range(disk_count)])
4871
    for idx, disk in enumerate(disk_info):
4872
      disk_index = idx + base_index
4873
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4874
                              logical_id=(vgname, names[idx]),
4875
                              iv_name="disk/%d" % disk_index,
4876
                              mode=disk["mode"])
4877
      disks.append(disk_dev)
4878
  elif template_name == constants.DT_DRBD8:
4879
    if len(secondary_nodes) != 1:
4880
      raise errors.ProgrammerError("Wrong template configuration")
4881
    remote_node = secondary_nodes[0]
4882
    minors = lu.cfg.AllocateDRBDMinor(
4883
      [primary_node, remote_node] * len(disk_info), instance_name)
4884

    
4885
    names = []
4886
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4887
                                               for i in range(disk_count)]):
4888
      names.append(lv_prefix + "_data")
4889
      names.append(lv_prefix + "_meta")
4890
    for idx, disk in enumerate(disk_info):
4891
      disk_index = idx + base_index
4892
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4893
                                      disk["size"], names[idx*2:idx*2+2],
4894
                                      "disk/%d" % disk_index,
4895
                                      minors[idx*2], minors[idx*2+1])
4896
      disk_dev.mode = disk["mode"]
4897
      disks.append(disk_dev)
4898
  elif template_name == constants.DT_FILE:
4899
    if len(secondary_nodes) != 0:
4900
      raise errors.ProgrammerError("Wrong template configuration")
4901

    
4902
    for idx, disk in enumerate(disk_info):
4903
      disk_index = idx + base_index
4904
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4905
                              iv_name="disk/%d" % disk_index,
4906
                              logical_id=(file_driver,
4907
                                          "%s/disk%d" % (file_storage_dir,
4908
                                                         disk_index)),
4909
                              mode=disk["mode"])
4910
      disks.append(disk_dev)
4911
  else:
4912
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4913
  return disks
4914

    
4915

    
4916
def _GetInstanceInfoText(instance):
4917
  """Compute that text that should be added to the disk's metadata.
4918

4919
  """
4920
  return "originstname+%s" % instance.name
4921

    
4922

    
4923
def _CreateDisks(lu, instance, to_skip=None):
4924
  """Create all disks for an instance.
4925

4926
  This abstracts away some work from AddInstance.
4927

4928
  @type lu: L{LogicalUnit}
4929
  @param lu: the logical unit on whose behalf we execute
4930
  @type instance: L{objects.Instance}
4931
  @param instance: the instance whose disks we should create
4932
  @type to_skip: list
4933
  @param to_skip: list of indices to skip
4934
  @rtype: boolean
4935
  @return: the success of the creation
4936

4937
  """
4938
  info = _GetInstanceInfoText(instance)
4939
  pnode = instance.primary_node
4940

    
4941
  if instance.disk_template == constants.DT_FILE:
4942
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4943
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4944

    
4945
    result.Raise("Failed to create directory '%s' on"
4946
                 " node %s: %s" % (file_storage_dir, pnode))
4947

    
4948
  # Note: this needs to be kept in sync with adding of disks in
4949
  # LUSetInstanceParams
4950
  for idx, device in enumerate(instance.disks):
4951
    if to_skip and idx in to_skip:
4952
      continue
4953
    logging.info("Creating volume %s for instance %s",
4954
                 device.iv_name, instance.name)
4955
    #HARDCODE
4956
    for node in instance.all_nodes:
4957
      f_create = node == pnode
4958
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4959

    
4960

    
4961
def _RemoveDisks(lu, instance):
4962
  """Remove all disks for an instance.
4963

4964
  This abstracts away some work from `AddInstance()` and
4965
  `RemoveInstance()`. Note that in case some of the devices couldn't
4966
  be removed, the removal will continue with the other ones (compare
4967
  with `_CreateDisks()`).
4968

4969
  @type lu: L{LogicalUnit}
4970
  @param lu: the logical unit on whose behalf we execute
4971
  @type instance: L{objects.Instance}
4972
  @param instance: the instance whose disks we should remove
4973
  @rtype: boolean
4974
  @return: the success of the removal
4975

4976
  """
4977
  logging.info("Removing block devices for instance %s", instance.name)
4978

    
4979
  all_result = True
4980
  for device in instance.disks:
4981
    for node, disk in device.ComputeNodeTree(instance.primary_node):
4982
      lu.cfg.SetDiskID(disk, node)
4983
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
4984
      if msg:
4985
        lu.LogWarning("Could not remove block device %s on node %s,"
4986
                      " continuing anyway: %s", device.iv_name, node, msg)
4987
        all_result = False
4988

    
4989
  if instance.disk_template == constants.DT_FILE:
4990
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4991
    result = lu.rpc.call_file_storage_dir_remove(instance.primary_node,
4992
                                                 file_storage_dir)
4993
    msg = result.fail_msg
4994
    if msg:
4995
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
4996
                    file_storage_dir, instance.primary_node, msg)
4997
      all_result = False
4998

    
4999
  return all_result
5000

    
5001

    
5002
def _ComputeDiskSize(disk_template, disks):
5003
  """Compute disk size requirements in the volume group
5004

5005
  """
5006
  # Required free disk space as a function of disk and swap space
5007
  req_size_dict = {
5008
    constants.DT_DISKLESS: None,
5009
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5010
    # 128 MB are added for drbd metadata for each disk
5011
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5012
    constants.DT_FILE: None,
5013
  }
5014

    
5015
  if disk_template not in req_size_dict:
5016
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5017
                                 " is unknown" %  disk_template)
5018

    
5019
  return req_size_dict[disk_template]
5020

    
5021

    
5022
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5023
  """Hypervisor parameter validation.
5024

5025
  This function abstract the hypervisor parameter validation to be
5026
  used in both instance create and instance modify.
5027

5028
  @type lu: L{LogicalUnit}
5029
  @param lu: the logical unit for which we check
5030
  @type nodenames: list
5031
  @param nodenames: the list of nodes on which we should check
5032
  @type hvname: string
5033
  @param hvname: the name of the hypervisor we should use
5034
  @type hvparams: dict
5035
  @param hvparams: the parameters which we need to check
5036
  @raise errors.OpPrereqError: if the parameters are not valid
5037

5038
  """
5039
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5040
                                                  hvname,
5041
                                                  hvparams)
5042
  for node in nodenames:
5043
    info = hvinfo[node]
5044
    if info.offline:
5045
      continue
5046
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5047

    
5048

    
5049
class LUCreateInstance(LogicalUnit):
5050
  """Create an instance.
5051

5052
  """
5053
  HPATH = "instance-add"
5054
  HTYPE = constants.HTYPE_INSTANCE
5055
  _OP_REQP = ["instance_name", "disks", "disk_template",
5056
              "mode", "start",
5057
              "wait_for_sync", "ip_check", "nics",
5058
              "hvparams", "beparams"]
5059
  REQ_BGL = False
5060

    
5061
  def _ExpandNode(self, node):
5062
    """Expands and checks one node name.
5063

5064
    """
5065
    node_full = self.cfg.ExpandNodeName(node)
5066
    if node_full is None:
5067
      raise errors.OpPrereqError("Unknown node %s" % node)
5068
    return node_full
5069

    
5070
  def ExpandNames(self):
5071
    """ExpandNames for CreateInstance.
5072

5073
    Figure out the right locks for instance creation.
5074

5075
    """
5076
    self.needed_locks = {}
5077

    
5078
    # set optional parameters to none if they don't exist
5079
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5080
      if not hasattr(self.op, attr):
5081
        setattr(self.op, attr, None)
5082

    
5083
    # cheap checks, mostly valid constants given
5084

    
5085
    # verify creation mode
5086
    if self.op.mode not in (constants.INSTANCE_CREATE,
5087
                            constants.INSTANCE_IMPORT):
5088
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5089
                                 self.op.mode)
5090

    
5091
    # disk template and mirror node verification
5092
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5093
      raise errors.OpPrereqError("Invalid disk template name")
5094

    
5095
    if self.op.hypervisor is None:
5096
      self.op.hypervisor = self.cfg.GetHypervisorType()
5097

    
5098
    cluster = self.cfg.GetClusterInfo()
5099
    enabled_hvs = cluster.enabled_hypervisors
5100
    if self.op.hypervisor not in enabled_hvs:
5101
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5102
                                 " cluster (%s)" % (self.op.hypervisor,
5103
                                  ",".join(enabled_hvs)))
5104

    
5105
    # check hypervisor parameter syntax (locally)
5106
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5107
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5108
                                  self.op.hvparams)
5109
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5110
    hv_type.CheckParameterSyntax(filled_hvp)
5111
    self.hv_full = filled_hvp
5112

    
5113
    # fill and remember the beparams dict
5114
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5115
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5116
                                    self.op.beparams)
5117

    
5118
    #### instance parameters check
5119

    
5120
    # instance name verification
5121
    hostname1 = utils.HostInfo(self.op.instance_name)
5122
    self.op.instance_name = instance_name = hostname1.name
5123

    
5124
    # this is just a preventive check, but someone might still add this
5125
    # instance in the meantime, and creation will fail at lock-add time
5126
    if instance_name in self.cfg.GetInstanceList():
5127
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5128
                                 instance_name)
5129

    
5130
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5131

    
5132
    # NIC buildup
5133
    self.nics = []
5134
    for idx, nic in enumerate(self.op.nics):
5135
      nic_mode_req = nic.get("mode", None)
5136
      nic_mode = nic_mode_req
5137
      if nic_mode is None:
5138
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5139

    
5140
      # in routed mode, for the first nic, the default ip is 'auto'
5141
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5142
        default_ip_mode = constants.VALUE_AUTO
5143
      else:
5144
        default_ip_mode = constants.VALUE_NONE
5145

    
5146
      # ip validity checks
5147
      ip = nic.get("ip", default_ip_mode)
5148
      if ip is None or ip.lower() == constants.VALUE_NONE:
5149
        nic_ip = None
5150
      elif ip.lower() == constants.VALUE_AUTO:
5151
        nic_ip = hostname1.ip
5152
      else:
5153
        if not utils.IsValidIP(ip):
5154
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5155
                                     " like a valid IP" % ip)
5156
        nic_ip = ip
5157

    
5158
      # TODO: check the ip for uniqueness !!
5159
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5160
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5161

    
5162
      # MAC address verification
5163
      mac = nic.get("mac", constants.VALUE_AUTO)
5164
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5165
        if not utils.IsValidMac(mac.lower()):
5166
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5167
                                     mac)
5168
      # bridge verification
5169
      bridge = nic.get("bridge", None)
5170
      link = nic.get("link", None)
5171
      if bridge and link:
5172
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5173
                                   " at the same time")
5174
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5175
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5176
      elif bridge:
5177
        link = bridge
5178

    
5179
      nicparams = {}
5180
      if nic_mode_req:
5181
        nicparams[constants.NIC_MODE] = nic_mode_req
5182
      if link:
5183
        nicparams[constants.NIC_LINK] = link
5184

    
5185
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5186
                                      nicparams)
5187
      objects.NIC.CheckParameterSyntax(check_params)
5188
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5189

    
5190
    # disk checks/pre-build
5191
    self.disks = []
5192
    for disk in self.op.disks:
5193
      mode = disk.get("mode", constants.DISK_RDWR)
5194
      if mode not in constants.DISK_ACCESS_SET:
5195
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5196
                                   mode)
5197
      size = disk.get("size", None)
5198
      if size is None:
5199
        raise errors.OpPrereqError("Missing disk size")
5200
      try:
5201
        size = int(size)
5202
      except ValueError:
5203
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5204
      self.disks.append({"size": size, "mode": mode})
5205

    
5206
    # used in CheckPrereq for ip ping check
5207
    self.check_ip = hostname1.ip
5208

    
5209
    # file storage checks
5210
    if (self.op.file_driver and
5211
        not self.op.file_driver in constants.FILE_DRIVER):
5212
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5213
                                 self.op.file_driver)
5214

    
5215
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5216
      raise errors.OpPrereqError("File storage directory path not absolute")
5217

    
5218
    ### Node/iallocator related checks
5219
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5220
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5221
                                 " node must be given")
5222

    
5223
    if self.op.iallocator:
5224
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5225
    else:
5226
      self.op.pnode = self._ExpandNode(self.op.pnode)
5227
      nodelist = [self.op.pnode]
5228
      if self.op.snode is not None:
5229
        self.op.snode = self._ExpandNode(self.op.snode)
5230
        nodelist.append(self.op.snode)
5231
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5232

    
5233
    # in case of import lock the source node too
5234
    if self.op.mode == constants.INSTANCE_IMPORT:
5235
      src_node = getattr(self.op, "src_node", None)
5236
      src_path = getattr(self.op, "src_path", None)
5237

    
5238
      if src_path is None:
5239
        self.op.src_path = src_path = self.op.instance_name
5240

    
5241
      if src_node is None:
5242
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5243
        self.op.src_node = None
5244
        if os.path.isabs(src_path):
5245
          raise errors.OpPrereqError("Importing an instance from an absolute"
5246
                                     " path requires a source node option.")
5247
      else:
5248
        self.op.src_node = src_node = self._ExpandNode(src_node)
5249
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5250
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5251
        if not os.path.isabs(src_path):
5252
          self.op.src_path = src_path = \
5253
            os.path.join(constants.EXPORT_DIR, src_path)
5254

    
5255
    else: # INSTANCE_CREATE
5256
      if getattr(self.op, "os_type", None) is None:
5257
        raise errors.OpPrereqError("No guest OS specified")
5258

    
5259
  def _RunAllocator(self):
5260
    """Run the allocator based on input opcode.
5261

5262
    """
5263
    nics = [n.ToDict() for n in self.nics]
5264
    ial = IAllocator(self.cfg, self.rpc,
5265
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5266
                     name=self.op.instance_name,
5267
                     disk_template=self.op.disk_template,
5268
                     tags=[],
5269
                     os=self.op.os_type,
5270
                     vcpus=self.be_full[constants.BE_VCPUS],
5271
                     mem_size=self.be_full[constants.BE_MEMORY],
5272
                     disks=self.disks,
5273
                     nics=nics,
5274
                     hypervisor=self.op.hypervisor,
5275
                     )
5276

    
5277
    ial.Run(self.op.iallocator)
5278

    
5279
    if not ial.success:
5280
      raise errors.OpPrereqError("Can't compute nodes using"
5281
                                 " iallocator '%s': %s" % (self.op.iallocator,
5282
                                                           ial.info))
5283
    if len(ial.nodes) != ial.required_nodes:
5284
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5285
                                 " of nodes (%s), required %s" %
5286
                                 (self.op.iallocator, len(ial.nodes),
5287
                                  ial.required_nodes))
5288
    self.op.pnode = ial.nodes[0]
5289
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5290
                 self.op.instance_name, self.op.iallocator,
5291
                 ", ".join(ial.nodes))
5292
    if ial.required_nodes == 2:
5293
      self.op.snode = ial.nodes[1]
5294

    
5295
  def BuildHooksEnv(self):
5296
    """Build hooks env.
5297

5298
    This runs on master, primary and secondary nodes of the instance.
5299

5300
    """
5301
    env = {
5302
      "ADD_MODE": self.op.mode,
5303
      }
5304
    if self.op.mode == constants.INSTANCE_IMPORT:
5305
      env["SRC_NODE"] = self.op.src_node
5306
      env["SRC_PATH"] = self.op.src_path
5307
      env["SRC_IMAGES"] = self.src_images
5308

    
5309
    env.update(_BuildInstanceHookEnv(
5310
      name=self.op.instance_name,
5311
      primary_node=self.op.pnode,
5312
      secondary_nodes=self.secondaries,
5313
      status=self.op.start,
5314
      os_type=self.op.os_type,
5315
      memory=self.be_full[constants.BE_MEMORY],
5316
      vcpus=self.be_full[constants.BE_VCPUS],
5317
      nics=_NICListToTuple(self, self.nics),
5318
      disk_template=self.op.disk_template,
5319
      disks=[(d["size"], d["mode"]) for d in self.disks],
5320
      bep=self.be_full,
5321
      hvp=self.hv_full,
5322
      hypervisor_name=self.op.hypervisor,
5323
    ))
5324

    
5325
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5326
          self.secondaries)
5327
    return env, nl, nl
5328

    
5329

    
5330
  def CheckPrereq(self):
5331
    """Check prerequisites.
5332

5333
    """
5334
    if (not self.cfg.GetVGName() and
5335
        self.op.disk_template not in constants.DTS_NOT_LVM):
5336
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5337
                                 " instances")
5338

    
5339
    if self.op.mode == constants.INSTANCE_IMPORT:
5340
      src_node = self.op.src_node
5341
      src_path = self.op.src_path
5342

    
5343
      if src_node is None:
5344
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5345
        exp_list = self.rpc.call_export_list(locked_nodes)
5346
        found = False
5347
        for node in exp_list:
5348
          if exp_list[node].fail_msg:
5349
            continue
5350
          if src_path in exp_list[node].payload:
5351
            found = True
5352
            self.op.src_node = src_node = node
5353
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5354
                                                       src_path)
5355
            break
5356
        if not found:
5357
          raise errors.OpPrereqError("No export found for relative path %s" %
5358
                                      src_path)
5359

    
5360
      _CheckNodeOnline(self, src_node)
5361
      result = self.rpc.call_export_info(src_node, src_path)
5362
      result.Raise("No export or invalid export found in dir %s" % src_path)
5363

    
5364
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5365
      if not export_info.has_section(constants.INISECT_EXP):
5366
        raise errors.ProgrammerError("Corrupted export config")
5367

    
5368
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5369
      if (int(ei_version) != constants.EXPORT_VERSION):
5370
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5371
                                   (ei_version, constants.EXPORT_VERSION))
5372

    
5373
      # Check that the new instance doesn't have less disks than the export
5374
      instance_disks = len(self.disks)
5375
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5376
      if instance_disks < export_disks:
5377
        raise errors.OpPrereqError("Not enough disks to import."
5378
                                   " (instance: %d, export: %d)" %
5379
                                   (instance_disks, export_disks))
5380

    
5381
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5382
      disk_images = []
5383
      for idx in range(export_disks):
5384
        option = 'disk%d_dump' % idx
5385
        if export_info.has_option(constants.INISECT_INS, option):
5386
          # FIXME: are the old os-es, disk sizes, etc. useful?
5387
          export_name = export_info.get(constants.INISECT_INS, option)
5388
          image = os.path.join(src_path, export_name)
5389
          disk_images.append(image)
5390
        else:
5391
          disk_images.append(False)
5392

    
5393
      self.src_images = disk_images
5394

    
5395
      old_name = export_info.get(constants.INISECT_INS, 'name')
5396
      # FIXME: int() here could throw a ValueError on broken exports
5397
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5398
      if self.op.instance_name == old_name:
5399
        for idx, nic in enumerate(self.nics):
5400
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5401
            nic_mac_ini = 'nic%d_mac' % idx
5402
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5403

    
5404
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5405
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5406
    if self.op.start and not self.op.ip_check:
5407
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5408
                                 " adding an instance in start mode")
5409

    
5410
    if self.op.ip_check:
5411
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5412
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5413
                                   (self.check_ip, self.op.instance_name))
5414

    
5415
    #### mac address generation
5416
    # By generating here the mac address both the allocator and the hooks get
5417
    # the real final mac address rather than the 'auto' or 'generate' value.
5418
    # There is a race condition between the generation and the instance object
5419
    # creation, which means that we know the mac is valid now, but we're not
5420
    # sure it will be when we actually add the instance. If things go bad
5421
    # adding the instance will abort because of a duplicate mac, and the
5422
    # creation job will fail.
5423
    for nic in self.nics:
5424
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5425
        nic.mac = self.cfg.GenerateMAC()
5426

    
5427
    #### allocator run
5428

    
5429
    if self.op.iallocator is not None:
5430
      self._RunAllocator()
5431

    
5432
    #### node related checks
5433

    
5434
    # check primary node
5435
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5436
    assert self.pnode is not None, \
5437
      "Cannot retrieve locked node %s" % self.op.pnode
5438
    if pnode.offline:
5439
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5440
                                 pnode.name)
5441
    if pnode.drained:
5442
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5443
                                 pnode.name)
5444

    
5445
    self.secondaries = []
5446

    
5447
    # mirror node verification
5448
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5449
      if self.op.snode is None:
5450
        raise errors.OpPrereqError("The networked disk templates need"
5451
                                   " a mirror node")
5452
      if self.op.snode == pnode.name:
5453
        raise errors.OpPrereqError("The secondary node cannot be"
5454
                                   " the primary node.")
5455
      _CheckNodeOnline(self, self.op.snode)
5456
      _CheckNodeNotDrained(self, self.op.snode)
5457
      self.secondaries.append(self.op.snode)
5458

    
5459
    nodenames = [pnode.name] + self.secondaries
5460

    
5461
    req_size = _ComputeDiskSize(self.op.disk_template,
5462
                                self.disks)
5463

    
5464
    # Check lv size requirements
5465
    if req_size is not None:
5466
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5467
                                         self.op.hypervisor)
5468
      for node in nodenames:
5469
        info = nodeinfo[node]
5470
        info.Raise("Cannot get current information from node %s" % node)
5471
        info = info.payload
5472
        vg_free = info.get('vg_free', None)
5473
        if not isinstance(vg_free, int):
5474
          raise errors.OpPrereqError("Can't compute free disk space on"
5475
                                     " node %s" % node)
5476
        if req_size > vg_free:
5477
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5478
                                     " %d MB available, %d MB required" %
5479
                                     (node, vg_free, req_size))
5480

    
5481
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5482

    
5483
    # os verification
5484
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5485
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5486
                 (self.op.os_type, pnode.name), prereq=True)
5487

    
5488
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5489

    
5490
    # memory check on primary node
5491
    if self.op.start:
5492
      _CheckNodeFreeMemory(self, self.pnode.name,
5493
                           "creating instance %s" % self.op.instance_name,
5494
                           self.be_full[constants.BE_MEMORY],
5495
                           self.op.hypervisor)
5496

    
5497
    self.dry_run_result = list(nodenames)
5498

    
5499
  def Exec(self, feedback_fn):
5500
    """Create and add the instance to the cluster.
5501

5502
    """
5503
    instance = self.op.instance_name
5504
    pnode_name = self.pnode.name
5505

    
5506
    ht_kind = self.op.hypervisor
5507
    if ht_kind in constants.HTS_REQ_PORT:
5508
      network_port = self.cfg.AllocatePort()
5509
    else:
5510
      network_port = None
5511

    
5512
    ##if self.op.vnc_bind_address is None:
5513
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5514

    
5515
    # this is needed because os.path.join does not accept None arguments
5516
    if self.op.file_storage_dir is None:
5517
      string_file_storage_dir = ""
5518
    else:
5519
      string_file_storage_dir = self.op.file_storage_dir
5520

    
5521
    # build the full file storage dir path
5522
    file_storage_dir = os.path.normpath(os.path.join(
5523
                                        self.cfg.GetFileStorageDir(),
5524
                                        string_file_storage_dir, instance))
5525

    
5526

    
5527
    disks = _GenerateDiskTemplate(self,
5528
                                  self.op.disk_template,
5529
                                  instance, pnode_name,
5530
                                  self.secondaries,
5531
                                  self.disks,
5532
                                  file_storage_dir,
5533
                                  self.op.file_driver,
5534
                                  0)
5535

    
5536
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5537
                            primary_node=pnode_name,
5538
                            nics=self.nics, disks=disks,
5539
                            disk_template=self.op.disk_template,
5540
                            admin_up=False,
5541
                            network_port=network_port,
5542
                            beparams=self.op.beparams,
5543
                            hvparams=self.op.hvparams,
5544
                            hypervisor=self.op.hypervisor,
5545
                            )
5546

    
5547
    feedback_fn("* creating instance disks...")
5548
    try:
5549
      _CreateDisks(self, iobj)
5550
    except errors.OpExecError:
5551
      self.LogWarning("Device creation failed, reverting...")
5552
      try:
5553
        _RemoveDisks(self, iobj)
5554
      finally:
5555
        self.cfg.ReleaseDRBDMinors(instance)
5556
        raise
5557

    
5558
    feedback_fn("adding instance %s to cluster config" % instance)
5559

    
5560
    self.cfg.AddInstance(iobj)
5561
    # Declare that we don't want to remove the instance lock anymore, as we've
5562
    # added the instance to the config
5563
    del self.remove_locks[locking.LEVEL_INSTANCE]
5564
    # Unlock all the nodes
5565
    if self.op.mode == constants.INSTANCE_IMPORT:
5566
      nodes_keep = [self.op.src_node]
5567
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5568
                       if node != self.op.src_node]
5569
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5570
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5571
    else:
5572
      self.context.glm.release(locking.LEVEL_NODE)
5573
      del self.acquired_locks[locking.LEVEL_NODE]
5574

    
5575
    if self.op.wait_for_sync:
5576
      disk_abort = not _WaitForSync(self, iobj)
5577
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5578
      # make sure the disks are not degraded (still sync-ing is ok)
5579
      time.sleep(15)
5580
      feedback_fn("* checking mirrors status")
5581
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5582
    else:
5583
      disk_abort = False
5584

    
5585
    if disk_abort:
5586
      _RemoveDisks(self, iobj)
5587
      self.cfg.RemoveInstance(iobj.name)
5588
      # Make sure the instance lock gets removed
5589
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5590
      raise errors.OpExecError("There are some degraded disks for"
5591
                               " this instance")
5592

    
5593
    feedback_fn("creating os for instance %s on node %s" %
5594
                (instance, pnode_name))
5595

    
5596
    if iobj.disk_template != constants.DT_DISKLESS:
5597
      if self.op.mode == constants.INSTANCE_CREATE:
5598
        feedback_fn("* running the instance OS create scripts...")
5599
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5600
        result.Raise("Could not add os for instance %s"
5601
                     " on node %s" % (instance, pnode_name))
5602

    
5603
      elif self.op.mode == constants.INSTANCE_IMPORT:
5604
        feedback_fn("* running the instance OS import scripts...")
5605
        src_node = self.op.src_node
5606
        src_images = self.src_images
5607
        cluster_name = self.cfg.GetClusterName()
5608
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5609
                                                         src_node, src_images,
5610
                                                         cluster_name)
5611
        msg = import_result.fail_msg
5612
        if msg:
5613
          self.LogWarning("Error while importing the disk images for instance"
5614
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5615
      else:
5616
        # also checked in the prereq part
5617
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5618
                                     % self.op.mode)
5619

    
5620
    if self.op.start:
5621
      iobj.admin_up = True
5622
      self.cfg.Update(iobj)
5623
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5624
      feedback_fn("* starting instance...")
5625
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5626
      result.Raise("Could not start instance")
5627

    
5628
    return list(iobj.all_nodes)
5629

    
5630

    
5631
class LUConnectConsole(NoHooksLU):
5632
  """Connect to an instance's console.
5633

5634
  This is somewhat special in that it returns the command line that
5635
  you need to run on the master node in order to connect to the
5636
  console.
5637

5638
  """
5639
  _OP_REQP = ["instance_name"]
5640
  REQ_BGL = False
5641

    
5642
  def ExpandNames(self):
5643
    self._ExpandAndLockInstance()
5644

    
5645
  def CheckPrereq(self):
5646
    """Check prerequisites.
5647

5648
    This checks that the instance is in the cluster.
5649

5650
    """
5651
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5652
    assert self.instance is not None, \
5653
      "Cannot retrieve locked instance %s" % self.op.instance_name
5654
    _CheckNodeOnline(self, self.instance.primary_node)
5655

    
5656
  def Exec(self, feedback_fn):
5657
    """Connect to the console of an instance
5658

5659
    """
5660
    instance = self.instance
5661
    node = instance.primary_node
5662

    
5663
    node_insts = self.rpc.call_instance_list([node],
5664
                                             [instance.hypervisor])[node]
5665
    node_insts.Raise("Can't get node information from %s" % node)
5666

    
5667
    if instance.name not in node_insts.payload:
5668
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5669

    
5670
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5671

    
5672
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5673
    cluster = self.cfg.GetClusterInfo()
5674
    # beparams and hvparams are passed separately, to avoid editing the
5675
    # instance and then saving the defaults in the instance itself.
5676
    hvparams = cluster.FillHV(instance)
5677
    beparams = cluster.FillBE(instance)
5678
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5679

    
5680
    # build ssh cmdline
5681
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5682

    
5683

    
5684
class LUReplaceDisks(LogicalUnit):
5685
  """Replace the disks of an instance.
5686

5687
  """
5688
  HPATH = "mirrors-replace"
5689
  HTYPE = constants.HTYPE_INSTANCE
5690
  _OP_REQP = ["instance_name", "mode", "disks"]
5691
  REQ_BGL = False
5692

    
5693
  def CheckArguments(self):
5694
    if not hasattr(self.op, "remote_node"):
5695
      self.op.remote_node = None
5696
    if not hasattr(self.op, "iallocator"):
5697
      self.op.iallocator = None
5698

    
5699
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5700
                                  self.op.iallocator)
5701

    
5702
  def ExpandNames(self):
5703
    self._ExpandAndLockInstance()
5704

    
5705
    if self.op.iallocator is not None:
5706
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5707

    
5708
    elif self.op.remote_node is not None:
5709
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5710
      if remote_node is None:
5711
        raise errors.OpPrereqError("Node '%s' not known" %
5712
                                   self.op.remote_node)
5713

    
5714
      self.op.remote_node = remote_node
5715

    
5716
      # Warning: do not remove the locking of the new secondary here
5717
      # unless DRBD8.AddChildren is changed to work in parallel;
5718
      # currently it doesn't since parallel invocations of
5719
      # FindUnusedMinor will conflict
5720
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5721
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5722

    
5723
    else:
5724
      self.needed_locks[locking.LEVEL_NODE] = []
5725
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5726

    
5727
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5728
                                   self.op.iallocator, self.op.remote_node,
5729
                                   self.op.disks)
5730

    
5731
    self.tasklets = [self.replacer]
5732

    
5733
  def DeclareLocks(self, level):
5734
    # If we're not already locking all nodes in the set we have to declare the
5735
    # instance's primary/secondary nodes.
5736
    if (level == locking.LEVEL_NODE and
5737
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5738
      self._LockInstancesNodes()
5739

    
5740
  def BuildHooksEnv(self):
5741
    """Build hooks env.
5742

5743
    This runs on the master, the primary and all the secondaries.
5744

5745
    """
5746
    instance = self.replacer.instance
5747
    env = {
5748
      "MODE": self.op.mode,
5749
      "NEW_SECONDARY": self.op.remote_node,
5750
      "OLD_SECONDARY": instance.secondary_nodes[0],
5751
      }
5752
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5753
    nl = [
5754
      self.cfg.GetMasterNode(),
5755
      instance.primary_node,
5756
      ]
5757
    if self.op.remote_node is not None:
5758
      nl.append(self.op.remote_node)
5759
    return env, nl, nl
5760

    
5761

    
5762
class LUEvacuateNode(LogicalUnit):
5763
  """Relocate the secondary instances from a node.
5764

5765
  """
5766
  HPATH = "node-evacuate"
5767
  HTYPE = constants.HTYPE_NODE
5768
  _OP_REQP = ["node_name"]
5769
  REQ_BGL = False
5770

    
5771
  def CheckArguments(self):
5772
    if not hasattr(self.op, "remote_node"):
5773
      self.op.remote_node = None
5774
    if not hasattr(self.op, "iallocator"):
5775
      self.op.iallocator = None
5776

    
5777
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
5778
                                  self.op.remote_node,
5779
                                  self.op.iallocator)
5780

    
5781
  def ExpandNames(self):
5782
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
5783
    if self.op.node_name is None:
5784
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
5785

    
5786
    self.needed_locks = {}
5787

    
5788
    # Declare node locks
5789
    if self.op.iallocator is not None:
5790
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5791

    
5792
    elif self.op.remote_node is not None:
5793
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5794
      if remote_node is None:
5795
        raise errors.OpPrereqError("Node '%s' not known" %
5796
                                   self.op.remote_node)
5797

    
5798
      self.op.remote_node = remote_node
5799

    
5800
      # Warning: do not remove the locking of the new secondary here
5801
      # unless DRBD8.AddChildren is changed to work in parallel;
5802
      # currently it doesn't since parallel invocations of
5803
      # FindUnusedMinor will conflict
5804
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5805
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5806

    
5807
    else:
5808
      raise errors.OpPrereqError("Invalid parameters")
5809

    
5810
    # Create tasklets for replacing disks for all secondary instances on this
5811
    # node
5812
    names = []
5813
    tasklets = []
5814

    
5815
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
5816
      logging.debug("Replacing disks for instance %s", inst.name)
5817
      names.append(inst.name)
5818

    
5819
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
5820
                                self.op.iallocator, self.op.remote_node, [])
5821
      tasklets.append(replacer)
5822

    
5823
    self.tasklets = tasklets
5824
    self.instance_names = names
5825

    
5826
    # Declare instance locks
5827
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
5828

    
5829
  def DeclareLocks(self, level):
5830
    # If we're not already locking all nodes in the set we have to declare the
5831
    # instance's primary/secondary nodes.
5832
    if (level == locking.LEVEL_NODE and
5833
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5834
      self._LockInstancesNodes()
5835

    
5836
  def BuildHooksEnv(self):
5837
    """Build hooks env.
5838

5839
    This runs on the master, the primary and all the secondaries.
5840

5841
    """
5842
    env = {
5843
      "NODE_NAME": self.op.node_name,
5844
      }
5845

    
5846
    nl = [self.cfg.GetMasterNode()]
5847

    
5848
    if self.op.remote_node is not None:
5849
      env["NEW_SECONDARY"] = self.op.remote_node
5850
      nl.append(self.op.remote_node)
5851

    
5852
    return (env, nl, nl)
5853

    
5854

    
5855
class TLReplaceDisks(Tasklet):
5856
  """Replaces disks for an instance.
5857

5858
  Note: Locking is not within the scope of this class.
5859

5860
  """
5861
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
5862
               disks):
5863
    """Initializes this class.
5864

5865
    """
5866
    Tasklet.__init__(self, lu)
5867

    
5868
    # Parameters
5869
    self.instance_name = instance_name
5870
    self.mode = mode
5871
    self.iallocator_name = iallocator_name
5872
    self.remote_node = remote_node
5873
    self.disks = disks
5874

    
5875
    # Runtime data
5876
    self.instance = None
5877
    self.new_node = None
5878
    self.target_node = None
5879
    self.other_node = None
5880
    self.remote_node_info = None
5881
    self.node_secondary_ip = None
5882

    
5883
  @staticmethod
5884
  def CheckArguments(mode, remote_node, iallocator):
5885
    """Helper function for users of this class.
5886

5887
    """
5888
    # check for valid parameter combination
5889
    if mode == constants.REPLACE_DISK_CHG:
5890
      if remote_node is None and iallocator is None:
5891
        raise errors.OpPrereqError("When changing the secondary either an"
5892
                                   " iallocator script must be used or the"
5893
                                   " new node given")
5894

    
5895
      if remote_node is not None and iallocator is not None:
5896
        raise errors.OpPrereqError("Give either the iallocator or the new"
5897
                                   " secondary, not both")
5898

    
5899
    elif remote_node is not None or iallocator is not None:
5900
      # Not replacing the secondary
5901
      raise errors.OpPrereqError("The iallocator and new node options can"
5902
                                 " only be used when changing the"
5903
                                 " secondary node")
5904

    
5905
  @staticmethod
5906
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
5907
    """Compute a new secondary node using an IAllocator.
5908

5909
    """
5910
    ial = IAllocator(lu.cfg, lu.rpc,
5911
                     mode=constants.IALLOCATOR_MODE_RELOC,
5912
                     name=instance_name,
5913
                     relocate_from=relocate_from)
5914

    
5915
    ial.Run(iallocator_name)
5916

    
5917
    if not ial.success:
5918
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
5919
                                 " %s" % (iallocator_name, ial.info))
5920

    
5921
    if len(ial.nodes) != ial.required_nodes:
5922
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5923
                                 " of nodes (%s), required %s" %
5924
                                 (len(ial.nodes), ial.required_nodes))
5925

    
5926
    remote_node_name = ial.nodes[0]
5927

    
5928
    lu.LogInfo("Selected new secondary for instance '%s': %s",
5929
               instance_name, remote_node_name)
5930

    
5931
    return remote_node_name
5932

    
5933
  def _FindFaultyDisks(self, node_name):
5934
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
5935
                                    node_name, True)
5936

    
5937
  def CheckPrereq(self):
5938
    """Check prerequisites.
5939

5940
    This checks that the instance is in the cluster.
5941

5942
    """
5943
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
5944
    assert self.instance is not None, \
5945
      "Cannot retrieve locked instance %s" % self.instance_name
5946

    
5947
    if self.instance.disk_template != constants.DT_DRBD8:
5948
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5949
                                 " instances")
5950

    
5951
    if len(self.instance.secondary_nodes) != 1:
5952
      raise errors.OpPrereqError("The instance has a strange layout,"
5953
                                 " expected one secondary but found %d" %
5954
                                 len(self.instance.secondary_nodes))
5955

    
5956
    secondary_node = self.instance.secondary_nodes[0]
5957

    
5958
    if self.iallocator_name is None:
5959
      remote_node = self.remote_node
5960
    else:
5961
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
5962
                                       self.instance.name, secondary_node)
5963

    
5964
    if remote_node is not None:
5965
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5966
      assert self.remote_node_info is not None, \
5967
        "Cannot retrieve locked node %s" % remote_node
5968
    else:
5969
      self.remote_node_info = None
5970

    
5971
    if remote_node == self.instance.primary_node:
5972
      raise errors.OpPrereqError("The specified node is the primary node of"
5973
                                 " the instance.")
5974

    
5975
    if remote_node == secondary_node:
5976
      raise errors.OpPrereqError("The specified node is already the"
5977
                                 " secondary node of the instance.")
5978

    
5979
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
5980
                                    constants.REPLACE_DISK_CHG):
5981
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
5982

    
5983
    if self.mode == constants.REPLACE_DISK_AUTO:
5984
      faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
5985
      faulty_secondary = self._FindFaultyDisks(secondary_node)
5986

    
5987
      if faulty_primary and faulty_secondary:
5988
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
5989
                                   " one node and can not be repaired"
5990
                                   " automatically" % self.instance_name)
5991

    
5992
      if faulty_primary:
5993
        self.disks = faulty_primary
5994
        self.target_node = self.instance.primary_node
5995
        self.other_node = secondary_node
5996
        check_nodes = [self.target_node, self.other_node]
5997
      elif faulty_secondary:
5998
        self.disks = faulty_secondary
5999
        self.target_node = secondary_node
6000
        self.other_node = self.instance.primary_node
6001
        check_nodes = [self.target_node, self.other_node]
6002
      else:
6003
        self.disks = []
6004
        check_nodes = []
6005

    
6006
    else:
6007
      # Non-automatic modes
6008
      if self.mode == constants.REPLACE_DISK_PRI:
6009
        self.target_node = self.instance.primary_node
6010
        self.other_node = secondary_node
6011
        check_nodes = [self.target_node, self.other_node]
6012

    
6013
      elif self.mode == constants.REPLACE_DISK_SEC:
6014
        self.target_node = secondary_node
6015
        self.other_node = self.instance.primary_node
6016
        check_nodes = [self.target_node, self.other_node]
6017

    
6018
      elif self.mode == constants.REPLACE_DISK_CHG:
6019
        self.new_node = remote_node
6020
        self.other_node = self.instance.primary_node
6021
        self.target_node = secondary_node
6022
        check_nodes = [self.new_node, self.other_node]
6023

    
6024
        _CheckNodeNotDrained(self.lu, remote_node)
6025

    
6026
      else:
6027
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6028
                                     self.mode)
6029

    
6030
      # If not specified all disks should be replaced
6031
      if not self.disks:
6032
        self.disks = range(len(self.instance.disks))
6033

    
6034
    for node in check_nodes:
6035
      _CheckNodeOnline(self.lu, node)
6036

    
6037
    # Check whether disks are valid
6038
    for disk_idx in self.disks:
6039
      self.instance.FindDisk(disk_idx)
6040

    
6041
    # Get secondary node IP addresses
6042
    node_2nd_ip = {}
6043

    
6044
    for node_name in [self.target_node, self.other_node, self.new_node]:
6045
      if node_name is not None:
6046
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6047

    
6048
    self.node_secondary_ip = node_2nd_ip
6049

    
6050
  def Exec(self, feedback_fn):
6051
    """Execute disk replacement.
6052

6053
    This dispatches the disk replacement to the appropriate handler.
6054

6055
    """
6056
    if not self.disks:
6057
      feedback_fn("No disks need replacement")
6058
      return
6059

    
6060
    feedback_fn("Replacing disk(s) %s for %s" %
6061
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6062

    
6063
    activate_disks = (not self.instance.admin_up)
6064

    
6065
    # Activate the instance disks if we're replacing them on a down instance
6066
    if activate_disks:
6067
      _StartInstanceDisks(self.lu, self.instance, True)
6068

    
6069
    try:
6070
      # Should we replace the secondary node?
6071
      if self.new_node is not None:
6072
        return self._ExecDrbd8Secondary()
6073
      else:
6074
        return self._ExecDrbd8DiskOnly()
6075

    
6076
    finally:
6077
      # Deactivate the instance disks if we're replacing them on a down instance
6078
      if activate_disks:
6079
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6080

    
6081
  def _CheckVolumeGroup(self, nodes):
6082
    self.lu.LogInfo("Checking volume groups")
6083

    
6084
    vgname = self.cfg.GetVGName()
6085

    
6086
    # Make sure volume group exists on all involved nodes
6087
    results = self.rpc.call_vg_list(nodes)
6088
    if not results:
6089
      raise errors.OpExecError("Can't list volume groups on the nodes")
6090

    
6091
    for node in nodes:
6092
      res = results[node]
6093
      res.Raise("Error checking node %s" % node)
6094
      if vgname not in res.payload:
6095
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6096
                                 (vgname, node))
6097

    
6098
  def _CheckDisksExistence(self, nodes):
6099
    # Check disk existence
6100
    for idx, dev in enumerate(self.instance.disks):
6101
      if idx not in self.disks:
6102
        continue
6103

    
6104
      for node in nodes:
6105
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6106
        self.cfg.SetDiskID(dev, node)
6107

    
6108
        result = self.rpc.call_blockdev_find(node, dev)
6109

    
6110
        msg = result.fail_msg
6111
        if msg or not result.payload:
6112
          if not msg:
6113
            msg = "disk not found"
6114
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6115
                                   (idx, node, msg))
6116

    
6117
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6118
    for idx, dev in enumerate(self.instance.disks):
6119
      if idx not in self.disks:
6120
        continue
6121

    
6122
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6123
                      (idx, node_name))
6124

    
6125
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6126
                                   ldisk=ldisk):
6127
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6128
                                 " replace disks for instance %s" %
6129
                                 (node_name, self.instance.name))
6130

    
6131
  def _CreateNewStorage(self, node_name):
6132
    vgname = self.cfg.GetVGName()
6133
    iv_names = {}
6134

    
6135
    for idx, dev in enumerate(self.instance.disks):
6136
      if idx not in self.disks:
6137
        continue
6138

    
6139
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6140

    
6141
      self.cfg.SetDiskID(dev, node_name)
6142

    
6143
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6144
      names = _GenerateUniqueNames(self.lu, lv_names)
6145

    
6146
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6147
                             logical_id=(vgname, names[0]))
6148
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6149
                             logical_id=(vgname, names[1]))
6150

    
6151
      new_lvs = [lv_data, lv_meta]
6152
      old_lvs = dev.children
6153
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6154

    
6155
      # we pass force_create=True to force the LVM creation
6156
      for new_lv in new_lvs:
6157
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6158
                        _GetInstanceInfoText(self.instance), False)
6159

    
6160
    return iv_names
6161

    
6162
  def _CheckDevices(self, node_name, iv_names):
6163
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6164
      self.cfg.SetDiskID(dev, node_name)
6165

    
6166
      result = self.rpc.call_blockdev_find(node_name, dev)
6167

    
6168
      msg = result.fail_msg
6169
      if msg or not result.payload:
6170
        if not msg:
6171
          msg = "disk not found"
6172
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6173
                                 (name, msg))
6174

    
6175
      if result.payload.is_degraded:
6176
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6177

    
6178
  def _RemoveOldStorage(self, node_name, iv_names):
6179
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6180
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6181

    
6182
      for lv in old_lvs:
6183
        self.cfg.SetDiskID(lv, node_name)
6184

    
6185
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6186
        if msg:
6187
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6188
                             hint="remove unused LVs manually")
6189

    
6190
  def _ExecDrbd8DiskOnly(self):
6191
    """Replace a disk on the primary or secondary for DRBD 8.
6192

6193
    The algorithm for replace is quite complicated:
6194

6195
      1. for each disk to be replaced:
6196

6197
        1. create new LVs on the target node with unique names
6198
        1. detach old LVs from the drbd device
6199
        1. rename old LVs to name_replaced.<time_t>
6200
        1. rename new LVs to old LVs
6201
        1. attach the new LVs (with the old names now) to the drbd device
6202

6203
      1. wait for sync across all devices
6204

6205
      1. for each modified disk:
6206

6207
        1. remove old LVs (which have the name name_replaces.<time_t>)
6208

6209
    Failures are not very well handled.
6210

6211
    """
6212
    steps_total = 6
6213

    
6214
    # Step: check device activation
6215
    self.lu.LogStep(1, steps_total, "Check device existence")
6216
    self._CheckDisksExistence([self.other_node, self.target_node])
6217
    self._CheckVolumeGroup([self.target_node, self.other_node])
6218

    
6219
    # Step: check other node consistency
6220
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6221
    self._CheckDisksConsistency(self.other_node,
6222
                                self.other_node == self.instance.primary_node,
6223
                                False)
6224

    
6225
    # Step: create new storage
6226
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6227
    iv_names = self._CreateNewStorage(self.target_node)
6228

    
6229
    # Step: for each lv, detach+rename*2+attach
6230
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6231
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6232
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6233

    
6234
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
6235
      result.Raise("Can't detach drbd from local storage on node"
6236
                   " %s for device %s" % (self.target_node, dev.iv_name))
6237
      #dev.children = []
6238
      #cfg.Update(instance)
6239

    
6240
      # ok, we created the new LVs, so now we know we have the needed
6241
      # storage; as such, we proceed on the target node to rename
6242
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6243
      # using the assumption that logical_id == physical_id (which in
6244
      # turn is the unique_id on that node)
6245

    
6246
      # FIXME(iustin): use a better name for the replaced LVs
6247
      temp_suffix = int(time.time())
6248
      ren_fn = lambda d, suff: (d.physical_id[0],
6249
                                d.physical_id[1] + "_replaced-%s" % suff)
6250

    
6251
      # Build the rename list based on what LVs exist on the node
6252
      rename_old_to_new = []
6253
      for to_ren in old_lvs:
6254
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6255
        if not result.fail_msg and result.payload:
6256
          # device exists
6257
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6258

    
6259
      self.lu.LogInfo("Renaming the old LVs on the target node")
6260
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
6261
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6262

    
6263
      # Now we rename the new LVs to the old LVs
6264
      self.lu.LogInfo("Renaming the new LVs on the target node")
6265
      rename_new_to_old = [(new, old.physical_id)
6266
                           for old, new in zip(old_lvs, new_lvs)]
6267
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
6268
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6269

    
6270
      for old, new in zip(old_lvs, new_lvs):
6271
        new.logical_id = old.logical_id
6272
        self.cfg.SetDiskID(new, self.target_node)
6273

    
6274
      for disk in old_lvs:
6275
        disk.logical_id = ren_fn(disk, temp_suffix)
6276
        self.cfg.SetDiskID(disk, self.target_node)
6277

    
6278
      # Now that the new lvs have the old name, we can add them to the device
6279
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6280
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
6281
      msg = result.fail_msg
6282
      if msg:
6283
        for new_lv in new_lvs:
6284
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
6285
          if msg2:
6286
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6287
                               hint=("cleanup manually the unused logical"
6288
                                     "volumes"))
6289
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6290

    
6291
      dev.children = new_lvs
6292

    
6293
      self.cfg.Update(self.instance)
6294

    
6295
    # Wait for sync
6296
    # This can fail as the old devices are degraded and _WaitForSync
6297
    # does a combined result over all disks, so we don't check its return value
6298
    self.lu.LogStep(5, steps_total, "Sync devices")
6299
    _WaitForSync(self.lu, self.instance, unlock=True)
6300

    
6301
    # Check all devices manually
6302
    self._CheckDevices(self.instance.primary_node, iv_names)
6303

    
6304
    # Step: remove old storage
6305
    self.lu.LogStep(6, steps_total, "Removing old storage")
6306
    self._RemoveOldStorage(self.target_node, iv_names)
6307

    
6308
  def _ExecDrbd8Secondary(self):
6309
    """Replace the secondary node for DRBD 8.
6310

6311
    The algorithm for replace is quite complicated:
6312
      - for all disks of the instance:
6313
        - create new LVs on the new node with same names
6314
        - shutdown the drbd device on the old secondary
6315
        - disconnect the drbd network on the primary
6316
        - create the drbd device on the new secondary
6317
        - network attach the drbd on the primary, using an artifice:
6318
          the drbd code for Attach() will connect to the network if it
6319
          finds a device which is connected to the good local disks but
6320
          not network enabled
6321
      - wait for sync across all devices
6322
      - remove all disks from the old secondary
6323

6324
    Failures are not very well handled.
6325

6326
    """
6327
    steps_total = 6
6328

    
6329
    # Step: check device activation
6330
    self.lu.LogStep(1, steps_total, "Check device existence")
6331
    self._CheckDisksExistence([self.instance.primary_node])
6332
    self._CheckVolumeGroup([self.instance.primary_node])
6333

    
6334
    # Step: check other node consistency
6335
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6336
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6337

    
6338
    # Step: create new storage
6339
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6340
    for idx, dev in enumerate(self.instance.disks):
6341
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6342
                      (self.new_node, idx))
6343
      # we pass force_create=True to force LVM creation
6344
      for new_lv in dev.children:
6345
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6346
                        _GetInstanceInfoText(self.instance), False)
6347

    
6348
    # Step 4: dbrd minors and drbd setups changes
6349
    # after this, we must manually remove the drbd minors on both the
6350
    # error and the success paths
6351
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6352
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
6353
                                        self.instance.name)
6354
    logging.debug("Allocated minors %r" % (minors,))
6355

    
6356
    iv_names = {}
6357
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6358
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
6359
      # create new devices on new_node; note that we create two IDs:
6360
      # one without port, so the drbd will be activated without
6361
      # networking information on the new node at this stage, and one
6362
      # with network, for the latter activation in step 4
6363
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6364
      if self.instance.primary_node == o_node1:
6365
        p_minor = o_minor1
6366
      else:
6367
        p_minor = o_minor2
6368

    
6369
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
6370
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
6371

    
6372
      iv_names[idx] = (dev, dev.children, new_net_id)
6373
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6374
                    new_net_id)
6375
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6376
                              logical_id=new_alone_id,
6377
                              children=dev.children,
6378
                              size=dev.size)
6379
      try:
6380
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6381
                              _GetInstanceInfoText(self.instance), False)
6382
      except errors.GenericError:
6383
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6384
        raise
6385

    
6386
    # We have new devices, shutdown the drbd on the old secondary
6387
    for idx, dev in enumerate(self.instance.disks):
6388
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6389
      self.cfg.SetDiskID(dev, self.target_node)
6390
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6391
      if msg:
6392
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6393
                           "node: %s" % (idx, msg),
6394
                           hint=("Please cleanup this device manually as"
6395
                                 " soon as possible"))
6396

    
6397
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6398
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
6399
                                               self.instance.disks)[self.instance.primary_node]
6400

    
6401
    msg = result.fail_msg
6402
    if msg:
6403
      # detaches didn't succeed (unlikely)
6404
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6405
      raise errors.OpExecError("Can't detach the disks from the network on"
6406
                               " old node: %s" % (msg,))
6407

    
6408
    # if we managed to detach at least one, we update all the disks of
6409
    # the instance to point to the new secondary
6410
    self.lu.LogInfo("Updating instance configuration")
6411
    for dev, _, new_logical_id in iv_names.itervalues():
6412
      dev.logical_id = new_logical_id
6413
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6414

    
6415
    self.cfg.Update(self.instance)
6416

    
6417
    # and now perform the drbd attach
6418
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6419
                    " (standalone => connected)")
6420
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
6421
                                           self.instance.disks, self.instance.name,
6422
                                           False)
6423
    for to_node, to_result in result.items():
6424
      msg = to_result.fail_msg
6425
      if msg:
6426
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
6427
                           hint=("please do a gnt-instance info to see the"
6428
                                 " status of disks"))
6429

    
6430
    # Wait for sync
6431
    # This can fail as the old devices are degraded and _WaitForSync
6432
    # does a combined result over all disks, so we don't check its return value
6433
    self.lu.LogStep(5, steps_total, "Sync devices")
6434
    _WaitForSync(self.lu, self.instance, unlock=True)
6435

    
6436
    # Check all devices manually
6437
    self._CheckDevices(self.instance.primary_node, iv_names)
6438

    
6439
    # Step: remove old storage
6440
    self.lu.LogStep(6, steps_total, "Removing old storage")
6441
    self._RemoveOldStorage(self.target_node, iv_names)
6442

    
6443

    
6444
class LURepairNodeStorage(NoHooksLU):
6445
  """Repairs the volume group on a node.
6446

6447
  """
6448
  _OP_REQP = ["node_name"]
6449
  REQ_BGL = False
6450

    
6451
  def CheckArguments(self):
6452
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6453
    if node_name is None:
6454
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6455

    
6456
    self.op.node_name = node_name
6457

    
6458
  def ExpandNames(self):
6459
    self.needed_locks = {
6460
      locking.LEVEL_NODE: [self.op.node_name],
6461
      }
6462

    
6463
  def _CheckFaultyDisks(self, instance, node_name):
6464
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6465
                                node_name, True):
6466
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6467
                                 " node '%s'" % (inst.name, node_name))
6468

    
6469
  def CheckPrereq(self):
6470
    """Check prerequisites.
6471

6472
    """
6473
    storage_type = self.op.storage_type
6474

    
6475
    if (constants.SO_FIX_CONSISTENCY not in
6476
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6477
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6478
                                 " repaired" % storage_type)
6479

    
6480
    # Check whether any instance on this node has faulty disks
6481
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6482
      check_nodes = set(inst.all_nodes)
6483
      check_nodes.discard(self.op.node_name)
6484
      for inst_node_name in check_nodes:
6485
        self._CheckFaultyDisks(inst, inst_node_name)
6486

    
6487
  def Exec(self, feedback_fn):
6488
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6489
                (self.op.name, self.op.node_name))
6490

    
6491
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6492
    result = self.rpc.call_storage_execute(self.op.node_name,
6493
                                           self.op.storage_type, st_args,
6494
                                           self.op.name,
6495
                                           constants.SO_FIX_CONSISTENCY)
6496
    result.Raise("Failed to repair storage unit '%s' on %s" %
6497
                 (self.op.name, self.op.node_name))
6498

    
6499

    
6500
class LUGrowDisk(LogicalUnit):
6501
  """Grow a disk of an instance.
6502

6503
  """
6504
  HPATH = "disk-grow"
6505
  HTYPE = constants.HTYPE_INSTANCE
6506
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6507
  REQ_BGL = False
6508

    
6509
  def ExpandNames(self):
6510
    self._ExpandAndLockInstance()
6511
    self.needed_locks[locking.LEVEL_NODE] = []
6512
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6513

    
6514
  def DeclareLocks(self, level):
6515
    if level == locking.LEVEL_NODE:
6516
      self._LockInstancesNodes()
6517

    
6518
  def BuildHooksEnv(self):
6519
    """Build hooks env.
6520

6521
    This runs on the master, the primary and all the secondaries.
6522

6523
    """
6524
    env = {
6525
      "DISK": self.op.disk,
6526
      "AMOUNT": self.op.amount,
6527
      }
6528
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6529
    nl = [
6530
      self.cfg.GetMasterNode(),
6531
      self.instance.primary_node,
6532
      ]
6533
    return env, nl, nl
6534

    
6535
  def CheckPrereq(self):
6536
    """Check prerequisites.
6537

6538
    This checks that the instance is in the cluster.
6539

6540
    """
6541
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6542
    assert instance is not None, \
6543
      "Cannot retrieve locked instance %s" % self.op.instance_name
6544
    nodenames = list(instance.all_nodes)
6545
    for node in nodenames:
6546
      _CheckNodeOnline(self, node)
6547

    
6548

    
6549
    self.instance = instance
6550

    
6551
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6552
      raise errors.OpPrereqError("Instance's disk layout does not support"
6553
                                 " growing.")
6554

    
6555
    self.disk = instance.FindDisk(self.op.disk)
6556

    
6557
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6558
                                       instance.hypervisor)
6559
    for node in nodenames:
6560
      info = nodeinfo[node]
6561
      info.Raise("Cannot get current information from node %s" % node)
6562
      vg_free = info.payload.get('vg_free', None)
6563
      if not isinstance(vg_free, int):
6564
        raise errors.OpPrereqError("Can't compute free disk space on"
6565
                                   " node %s" % node)
6566
      if self.op.amount > vg_free:
6567
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
6568
                                   " %d MiB available, %d MiB required" %
6569
                                   (node, vg_free, self.op.amount))
6570

    
6571
  def Exec(self, feedback_fn):
6572
    """Execute disk grow.
6573

6574
    """
6575
    instance = self.instance
6576
    disk = self.disk
6577
    for node in instance.all_nodes:
6578
      self.cfg.SetDiskID(disk, node)
6579
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6580
      result.Raise("Grow request failed to node %s" % node)
6581
    disk.RecordGrow(self.op.amount)
6582
    self.cfg.Update(instance)
6583
    if self.op.wait_for_sync:
6584
      disk_abort = not _WaitForSync(self, instance)
6585
      if disk_abort:
6586
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6587
                             " status.\nPlease check the instance.")
6588

    
6589

    
6590
class LUQueryInstanceData(NoHooksLU):
6591
  """Query runtime instance data.
6592

6593
  """
6594
  _OP_REQP = ["instances", "static"]
6595
  REQ_BGL = False
6596

    
6597
  def ExpandNames(self):
6598
    self.needed_locks = {}
6599
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6600

    
6601
    if not isinstance(self.op.instances, list):
6602
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6603

    
6604
    if self.op.instances:
6605
      self.wanted_names = []
6606
      for name in self.op.instances:
6607
        full_name = self.cfg.ExpandInstanceName(name)
6608
        if full_name is None:
6609
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6610
        self.wanted_names.append(full_name)
6611
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6612
    else:
6613
      self.wanted_names = None
6614
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6615

    
6616
    self.needed_locks[locking.LEVEL_NODE] = []
6617
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6618

    
6619
  def DeclareLocks(self, level):
6620
    if level == locking.LEVEL_NODE:
6621
      self._LockInstancesNodes()
6622

    
6623
  def CheckPrereq(self):
6624
    """Check prerequisites.
6625

6626
    This only checks the optional instance list against the existing names.
6627

6628
    """
6629
    if self.wanted_names is None:
6630
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6631

    
6632
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6633
                             in self.wanted_names]
6634
    return
6635

    
6636
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
6637
    """Returns the status of a block device
6638

6639
    """
6640
    if self.op.static:
6641
      return None
6642

    
6643
    self.cfg.SetDiskID(dev, node)
6644

    
6645
    result = self.rpc.call_blockdev_find(node, dev)
6646
    if result.offline:
6647
      return None
6648

    
6649
    result.Raise("Can't compute disk status for %s" % instance_name)
6650

    
6651
    status = result.payload
6652
    if status is None:
6653
      return None
6654

    
6655
    return (status.dev_path, status.major, status.minor,
6656
            status.sync_percent, status.estimated_time,
6657
            status.is_degraded, status.ldisk_status)
6658

    
6659
  def _ComputeDiskStatus(self, instance, snode, dev):
6660
    """Compute block device status.
6661

6662
    """
6663
    if dev.dev_type in constants.LDS_DRBD:
6664
      # we change the snode then (otherwise we use the one passed in)
6665
      if dev.logical_id[0] == instance.primary_node:
6666
        snode = dev.logical_id[1]
6667
      else:
6668
        snode = dev.logical_id[0]
6669

    
6670
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6671
                                              instance.name, dev)
6672
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6673

    
6674
    if dev.children:
6675
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6676
                      for child in dev.children]
6677
    else:
6678
      dev_children = []
6679

    
6680
    data = {
6681
      "iv_name": dev.iv_name,
6682
      "dev_type": dev.dev_type,
6683
      "logical_id": dev.logical_id,
6684
      "physical_id": dev.physical_id,
6685
      "pstatus": dev_pstatus,
6686
      "sstatus": dev_sstatus,
6687
      "children": dev_children,
6688
      "mode": dev.mode,
6689
      "size": dev.size,
6690
      }
6691

    
6692
    return data
6693

    
6694
  def Exec(self, feedback_fn):
6695
    """Gather and return data"""
6696
    result = {}
6697

    
6698
    cluster = self.cfg.GetClusterInfo()
6699

    
6700
    for instance in self.wanted_instances:
6701
      if not self.op.static:
6702
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6703
                                                  instance.name,
6704
                                                  instance.hypervisor)
6705
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6706
        remote_info = remote_info.payload
6707
        if remote_info and "state" in remote_info:
6708
          remote_state = "up"
6709
        else:
6710
          remote_state = "down"
6711
      else:
6712
        remote_state = None
6713
      if instance.admin_up:
6714
        config_state = "up"
6715
      else:
6716
        config_state = "down"
6717

    
6718
      disks = [self._ComputeDiskStatus(instance, None, device)
6719
               for device in instance.disks]
6720

    
6721
      idict = {
6722
        "name": instance.name,
6723
        "config_state": config_state,
6724
        "run_state": remote_state,
6725
        "pnode": instance.primary_node,
6726
        "snodes": instance.secondary_nodes,
6727
        "os": instance.os,
6728
        # this happens to be the same format used for hooks
6729
        "nics": _NICListToTuple(self, instance.nics),
6730
        "disks": disks,
6731
        "hypervisor": instance.hypervisor,
6732
        "network_port": instance.network_port,
6733
        "hv_instance": instance.hvparams,
6734
        "hv_actual": cluster.FillHV(instance),
6735
        "be_instance": instance.beparams,
6736
        "be_actual": cluster.FillBE(instance),
6737
        }
6738

    
6739
      result[instance.name] = idict
6740

    
6741
    return result
6742

    
6743

    
6744
class LUSetInstanceParams(LogicalUnit):
6745
  """Modifies an instances's parameters.
6746

6747
  """
6748
  HPATH = "instance-modify"
6749
  HTYPE = constants.HTYPE_INSTANCE
6750
  _OP_REQP = ["instance_name"]
6751
  REQ_BGL = False
6752

    
6753
  def CheckArguments(self):
6754
    if not hasattr(self.op, 'nics'):
6755
      self.op.nics = []
6756
    if not hasattr(self.op, 'disks'):
6757
      self.op.disks = []
6758
    if not hasattr(self.op, 'beparams'):
6759
      self.op.beparams = {}
6760
    if not hasattr(self.op, 'hvparams'):
6761
      self.op.hvparams = {}
6762
    self.op.force = getattr(self.op, "force", False)
6763
    if not (self.op.nics or self.op.disks or
6764
            self.op.hvparams or self.op.beparams):
6765
      raise errors.OpPrereqError("No changes submitted")
6766

    
6767
    # Disk validation
6768
    disk_addremove = 0
6769
    for disk_op, disk_dict in self.op.disks:
6770
      if disk_op == constants.DDM_REMOVE:
6771
        disk_addremove += 1
6772
        continue
6773
      elif disk_op == constants.DDM_ADD:
6774
        disk_addremove += 1
6775
      else:
6776
        if not isinstance(disk_op, int):
6777
          raise errors.OpPrereqError("Invalid disk index")
6778
        if not isinstance(disk_dict, dict):
6779
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
6780
          raise errors.OpPrereqError(msg)
6781

    
6782
      if disk_op == constants.DDM_ADD:
6783
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
6784
        if mode not in constants.DISK_ACCESS_SET:
6785
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
6786
        size = disk_dict.get('size', None)
6787
        if size is None:
6788
          raise errors.OpPrereqError("Required disk parameter size missing")
6789
        try:
6790
          size = int(size)
6791
        except ValueError, err:
6792
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
6793
                                     str(err))
6794
        disk_dict['size'] = size
6795
      else:
6796
        # modification of disk
6797
        if 'size' in disk_dict:
6798
          raise errors.OpPrereqError("Disk size change not possible, use"
6799
                                     " grow-disk")
6800

    
6801
    if disk_addremove > 1:
6802
      raise errors.OpPrereqError("Only one disk add or remove operation"
6803
                                 " supported at a time")
6804

    
6805
    # NIC validation
6806
    nic_addremove = 0
6807
    for nic_op, nic_dict in self.op.nics:
6808
      if nic_op == constants.DDM_REMOVE:
6809
        nic_addremove += 1
6810
        continue
6811
      elif nic_op == constants.DDM_ADD:
6812
        nic_addremove += 1
6813
      else:
6814
        if not isinstance(nic_op, int):
6815
          raise errors.OpPrereqError("Invalid nic index")
6816
        if not isinstance(nic_dict, dict):
6817
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
6818
          raise errors.OpPrereqError(msg)
6819

    
6820
      # nic_dict should be a dict
6821
      nic_ip = nic_dict.get('ip', None)
6822
      if nic_ip is not None:
6823
        if nic_ip.lower() == constants.VALUE_NONE:
6824
          nic_dict['ip'] = None
6825
        else:
6826
          if not utils.IsValidIP(nic_ip):
6827
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
6828

    
6829
      nic_bridge = nic_dict.get('bridge', None)
6830
      nic_link = nic_dict.get('link', None)
6831
      if nic_bridge and nic_link:
6832
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6833
                                   " at the same time")
6834
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
6835
        nic_dict['bridge'] = None
6836
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
6837
        nic_dict['link'] = None
6838

    
6839
      if nic_op == constants.DDM_ADD:
6840
        nic_mac = nic_dict.get('mac', None)
6841
        if nic_mac is None:
6842
          nic_dict['mac'] = constants.VALUE_AUTO
6843

    
6844
      if 'mac' in nic_dict:
6845
        nic_mac = nic_dict['mac']
6846
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6847
          if not utils.IsValidMac(nic_mac):
6848
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
6849
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
6850
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
6851
                                     " modifying an existing nic")
6852

    
6853
    if nic_addremove > 1:
6854
      raise errors.OpPrereqError("Only one NIC add or remove operation"
6855
                                 " supported at a time")
6856

    
6857
  def ExpandNames(self):
6858
    self._ExpandAndLockInstance()
6859
    self.needed_locks[locking.LEVEL_NODE] = []
6860
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6861

    
6862
  def DeclareLocks(self, level):
6863
    if level == locking.LEVEL_NODE:
6864
      self._LockInstancesNodes()
6865

    
6866
  def BuildHooksEnv(self):
6867
    """Build hooks env.
6868

6869
    This runs on the master, primary and secondaries.
6870

6871
    """
6872
    args = dict()
6873
    if constants.BE_MEMORY in self.be_new:
6874
      args['memory'] = self.be_new[constants.BE_MEMORY]
6875
    if constants.BE_VCPUS in self.be_new:
6876
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
6877
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
6878
    # information at all.
6879
    if self.op.nics:
6880
      args['nics'] = []
6881
      nic_override = dict(self.op.nics)
6882
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
6883
      for idx, nic in enumerate(self.instance.nics):
6884
        if idx in nic_override:
6885
          this_nic_override = nic_override[idx]
6886
        else:
6887
          this_nic_override = {}
6888
        if 'ip' in this_nic_override:
6889
          ip = this_nic_override['ip']
6890
        else:
6891
          ip = nic.ip
6892
        if 'mac' in this_nic_override:
6893
          mac = this_nic_override['mac']
6894
        else:
6895
          mac = nic.mac
6896
        if idx in self.nic_pnew:
6897
          nicparams = self.nic_pnew[idx]
6898
        else:
6899
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6900
        mode = nicparams[constants.NIC_MODE]
6901
        link = nicparams[constants.NIC_LINK]
6902
        args['nics'].append((ip, mac, mode, link))
6903
      if constants.DDM_ADD in nic_override:
6904
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6905
        mac = nic_override[constants.DDM_ADD]['mac']
6906
        nicparams = self.nic_pnew[constants.DDM_ADD]
6907
        mode = nicparams[constants.NIC_MODE]
6908
        link = nicparams[constants.NIC_LINK]
6909
        args['nics'].append((ip, mac, mode, link))
6910
      elif constants.DDM_REMOVE in nic_override:
6911
        del args['nics'][-1]
6912

    
6913
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6914
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6915
    return env, nl, nl
6916

    
6917
  def _GetUpdatedParams(self, old_params, update_dict,
6918
                        default_values, parameter_types):
6919
    """Return the new params dict for the given params.
6920

6921
    @type old_params: dict
6922
    @param old_params: old parameters
6923
    @type update_dict: dict
6924
    @param update_dict: dict containing new parameter values,
6925
                        or constants.VALUE_DEFAULT to reset the
6926
                        parameter to its default value
6927
    @type default_values: dict
6928
    @param default_values: default values for the filled parameters
6929
    @type parameter_types: dict
6930
    @param parameter_types: dict mapping target dict keys to types
6931
                            in constants.ENFORCEABLE_TYPES
6932
    @rtype: (dict, dict)
6933
    @return: (new_parameters, filled_parameters)
6934

6935
    """
6936
    params_copy = copy.deepcopy(old_params)
6937
    for key, val in update_dict.iteritems():
6938
      if val == constants.VALUE_DEFAULT:
6939
        try:
6940
          del params_copy[key]
6941
        except KeyError:
6942
          pass
6943
      else:
6944
        params_copy[key] = val
6945
    utils.ForceDictType(params_copy, parameter_types)
6946
    params_filled = objects.FillDict(default_values, params_copy)
6947
    return (params_copy, params_filled)
6948

    
6949
  def CheckPrereq(self):
6950
    """Check prerequisites.
6951

6952
    This only checks the instance list against the existing names.
6953

6954
    """
6955
    self.force = self.op.force
6956

    
6957
    # checking the new params on the primary/secondary nodes
6958

    
6959
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6960
    cluster = self.cluster = self.cfg.GetClusterInfo()
6961
    assert self.instance is not None, \
6962
      "Cannot retrieve locked instance %s" % self.op.instance_name
6963
    pnode = instance.primary_node
6964
    nodelist = list(instance.all_nodes)
6965

    
6966
    # hvparams processing
6967
    if self.op.hvparams:
6968
      i_hvdict, hv_new = self._GetUpdatedParams(
6969
                             instance.hvparams, self.op.hvparams,
6970
                             cluster.hvparams[instance.hypervisor],
6971
                             constants.HVS_PARAMETER_TYPES)
6972
      # local check
6973
      hypervisor.GetHypervisor(
6974
        instance.hypervisor).CheckParameterSyntax(hv_new)
6975
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
6976
      self.hv_new = hv_new # the new actual values
6977
      self.hv_inst = i_hvdict # the new dict (without defaults)
6978
    else:
6979
      self.hv_new = self.hv_inst = {}
6980

    
6981
    # beparams processing
6982
    if self.op.beparams:
6983
      i_bedict, be_new = self._GetUpdatedParams(
6984
                             instance.beparams, self.op.beparams,
6985
                             cluster.beparams[constants.PP_DEFAULT],
6986
                             constants.BES_PARAMETER_TYPES)
6987
      self.be_new = be_new # the new actual values
6988
      self.be_inst = i_bedict # the new dict (without defaults)
6989
    else:
6990
      self.be_new = self.be_inst = {}
6991

    
6992
    self.warn = []
6993

    
6994
    if constants.BE_MEMORY in self.op.beparams and not self.force:
6995
      mem_check_list = [pnode]
6996
      if be_new[constants.BE_AUTO_BALANCE]:
6997
        # either we changed auto_balance to yes or it was from before
6998
        mem_check_list.extend(instance.secondary_nodes)
6999
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7000
                                                  instance.hypervisor)
7001
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7002
                                         instance.hypervisor)
7003
      pninfo = nodeinfo[pnode]
7004
      msg = pninfo.fail_msg
7005
      if msg:
7006
        # Assume the primary node is unreachable and go ahead
7007
        self.warn.append("Can't get info from primary node %s: %s" %
7008
                         (pnode,  msg))
7009
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7010
        self.warn.append("Node data from primary node %s doesn't contain"
7011
                         " free memory information" % pnode)
7012
      elif instance_info.fail_msg:
7013
        self.warn.append("Can't get instance runtime information: %s" %
7014
                        instance_info.fail_msg)
7015
      else:
7016
        if instance_info.payload:
7017
          current_mem = int(instance_info.payload['memory'])
7018
        else:
7019
          # Assume instance not running
7020
          # (there is a slight race condition here, but it's not very probable,
7021
          # and we have no other way to check)
7022
          current_mem = 0
7023
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7024
                    pninfo.payload['memory_free'])
7025
        if miss_mem > 0:
7026
          raise errors.OpPrereqError("This change will prevent the instance"
7027
                                     " from starting, due to %d MB of memory"
7028
                                     " missing on its primary node" % miss_mem)
7029

    
7030
      if be_new[constants.BE_AUTO_BALANCE]:
7031
        for node, nres in nodeinfo.items():
7032
          if node not in instance.secondary_nodes:
7033
            continue
7034
          msg = nres.fail_msg
7035
          if msg:
7036
            self.warn.append("Can't get info from secondary node %s: %s" %
7037
                             (node, msg))
7038
          elif not isinstance(nres.payload.get('memory_free', None), int):
7039
            self.warn.append("Secondary node %s didn't return free"
7040
                             " memory information" % node)
7041
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7042
            self.warn.append("Not enough memory to failover instance to"
7043
                             " secondary node %s" % node)
7044

    
7045
    # NIC processing
7046
    self.nic_pnew = {}
7047
    self.nic_pinst = {}
7048
    for nic_op, nic_dict in self.op.nics:
7049
      if nic_op == constants.DDM_REMOVE:
7050
        if not instance.nics:
7051
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7052
        continue
7053
      if nic_op != constants.DDM_ADD:
7054
        # an existing nic
7055
        if nic_op < 0 or nic_op >= len(instance.nics):
7056
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7057
                                     " are 0 to %d" %
7058
                                     (nic_op, len(instance.nics)))
7059
        old_nic_params = instance.nics[nic_op].nicparams
7060
        old_nic_ip = instance.nics[nic_op].ip
7061
      else:
7062
        old_nic_params = {}
7063
        old_nic_ip = None
7064

    
7065
      update_params_dict = dict([(key, nic_dict[key])
7066
                                 for key in constants.NICS_PARAMETERS
7067
                                 if key in nic_dict])
7068

    
7069
      if 'bridge' in nic_dict:
7070
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7071

    
7072
      new_nic_params, new_filled_nic_params = \
7073
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7074
                                 cluster.nicparams[constants.PP_DEFAULT],
7075
                                 constants.NICS_PARAMETER_TYPES)
7076
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7077
      self.nic_pinst[nic_op] = new_nic_params
7078
      self.nic_pnew[nic_op] = new_filled_nic_params
7079
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7080

    
7081
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7082
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7083
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7084
        if msg:
7085
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7086
          if self.force:
7087
            self.warn.append(msg)
7088
          else:
7089
            raise errors.OpPrereqError(msg)
7090
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7091
        if 'ip' in nic_dict:
7092
          nic_ip = nic_dict['ip']
7093
        else:
7094
          nic_ip = old_nic_ip
7095
        if nic_ip is None:
7096
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7097
                                     ' on a routed nic')
7098
      if 'mac' in nic_dict:
7099
        nic_mac = nic_dict['mac']
7100
        if nic_mac is None:
7101
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7102
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7103
          # otherwise generate the mac
7104
          nic_dict['mac'] = self.cfg.GenerateMAC()
7105
        else:
7106
          # or validate/reserve the current one
7107
          if self.cfg.IsMacInUse(nic_mac):
7108
            raise errors.OpPrereqError("MAC address %s already in use"
7109
                                       " in cluster" % nic_mac)
7110

    
7111
    # DISK processing
7112
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7113
      raise errors.OpPrereqError("Disk operations not supported for"
7114
                                 " diskless instances")
7115
    for disk_op, disk_dict in self.op.disks:
7116
      if disk_op == constants.DDM_REMOVE:
7117
        if len(instance.disks) == 1:
7118
          raise errors.OpPrereqError("Cannot remove the last disk of"
7119
                                     " an instance")
7120
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7121
        ins_l = ins_l[pnode]
7122
        msg = ins_l.fail_msg
7123
        if msg:
7124
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7125
                                     (pnode, msg))
7126
        if instance.name in ins_l.payload:
7127
          raise errors.OpPrereqError("Instance is running, can't remove"
7128
                                     " disks.")
7129

    
7130
      if (disk_op == constants.DDM_ADD and
7131
          len(instance.nics) >= constants.MAX_DISKS):
7132
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7133
                                   " add more" % constants.MAX_DISKS)
7134
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7135
        # an existing disk
7136
        if disk_op < 0 or disk_op >= len(instance.disks):
7137
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7138
                                     " are 0 to %d" %
7139
                                     (disk_op, len(instance.disks)))
7140

    
7141
    return
7142

    
7143
  def Exec(self, feedback_fn):
7144
    """Modifies an instance.
7145

7146
    All parameters take effect only at the next restart of the instance.
7147

7148
    """
7149
    # Process here the warnings from CheckPrereq, as we don't have a
7150
    # feedback_fn there.
7151
    for warn in self.warn:
7152
      feedback_fn("WARNING: %s" % warn)
7153

    
7154
    result = []
7155
    instance = self.instance
7156
    cluster = self.cluster
7157
    # disk changes
7158
    for disk_op, disk_dict in self.op.disks:
7159
      if disk_op == constants.DDM_REMOVE:
7160
        # remove the last disk
7161
        device = instance.disks.pop()
7162
        device_idx = len(instance.disks)
7163
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7164
          self.cfg.SetDiskID(disk, node)
7165
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7166
          if msg:
7167
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7168
                            " continuing anyway", device_idx, node, msg)
7169
        result.append(("disk/%d" % device_idx, "remove"))
7170
      elif disk_op == constants.DDM_ADD:
7171
        # add a new disk
7172
        if instance.disk_template == constants.DT_FILE:
7173
          file_driver, file_path = instance.disks[0].logical_id
7174
          file_path = os.path.dirname(file_path)
7175
        else:
7176
          file_driver = file_path = None
7177
        disk_idx_base = len(instance.disks)
7178
        new_disk = _GenerateDiskTemplate(self,
7179
                                         instance.disk_template,
7180
                                         instance.name, instance.primary_node,
7181
                                         instance.secondary_nodes,
7182
                                         [disk_dict],
7183
                                         file_path,
7184
                                         file_driver,
7185
                                         disk_idx_base)[0]
7186
        instance.disks.append(new_disk)
7187
        info = _GetInstanceInfoText(instance)
7188

    
7189
        logging.info("Creating volume %s for instance %s",
7190
                     new_disk.iv_name, instance.name)
7191
        # Note: this needs to be kept in sync with _CreateDisks
7192
        #HARDCODE
7193
        for node in instance.all_nodes:
7194
          f_create = node == instance.primary_node
7195
          try:
7196
            _CreateBlockDev(self, node, instance, new_disk,
7197
                            f_create, info, f_create)
7198
          except errors.OpExecError, err:
7199
            self.LogWarning("Failed to create volume %s (%s) on"
7200
                            " node %s: %s",
7201
                            new_disk.iv_name, new_disk, node, err)
7202
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7203
                       (new_disk.size, new_disk.mode)))
7204
      else:
7205
        # change a given disk
7206
        instance.disks[disk_op].mode = disk_dict['mode']
7207
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7208
    # NIC changes
7209
    for nic_op, nic_dict in self.op.nics:
7210
      if nic_op == constants.DDM_REMOVE:
7211
        # remove the last nic
7212
        del instance.nics[-1]
7213
        result.append(("nic.%d" % len(instance.nics), "remove"))
7214
      elif nic_op == constants.DDM_ADD:
7215
        # mac and bridge should be set, by now
7216
        mac = nic_dict['mac']
7217
        ip = nic_dict.get('ip', None)
7218
        nicparams = self.nic_pinst[constants.DDM_ADD]
7219
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7220
        instance.nics.append(new_nic)
7221
        result.append(("nic.%d" % (len(instance.nics) - 1),
7222
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7223
                       (new_nic.mac, new_nic.ip,
7224
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7225
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7226
                       )))
7227
      else:
7228
        for key in 'mac', 'ip':
7229
          if key in nic_dict:
7230
            setattr(instance.nics[nic_op], key, nic_dict[key])
7231
        if nic_op in self.nic_pnew:
7232
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7233
        for key, val in nic_dict.iteritems():
7234
          result.append(("nic.%s/%d" % (key, nic_op), val))
7235

    
7236
    # hvparams changes
7237
    if self.op.hvparams:
7238
      instance.hvparams = self.hv_inst
7239
      for key, val in self.op.hvparams.iteritems():
7240
        result.append(("hv/%s" % key, val))
7241

    
7242
    # beparams changes
7243
    if self.op.beparams:
7244
      instance.beparams = self.be_inst
7245
      for key, val in self.op.beparams.iteritems():
7246
        result.append(("be/%s" % key, val))
7247

    
7248
    self.cfg.Update(instance)
7249

    
7250
    return result
7251

    
7252

    
7253
class LUQueryExports(NoHooksLU):
7254
  """Query the exports list
7255

7256
  """
7257
  _OP_REQP = ['nodes']
7258
  REQ_BGL = False
7259

    
7260
  def ExpandNames(self):
7261
    self.needed_locks = {}
7262
    self.share_locks[locking.LEVEL_NODE] = 1
7263
    if not self.op.nodes:
7264
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7265
    else:
7266
      self.needed_locks[locking.LEVEL_NODE] = \
7267
        _GetWantedNodes(self, self.op.nodes)
7268

    
7269
  def CheckPrereq(self):
7270
    """Check prerequisites.
7271

7272
    """
7273
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7274

    
7275
  def Exec(self, feedback_fn):
7276
    """Compute the list of all the exported system images.
7277

7278
    @rtype: dict
7279
    @return: a dictionary with the structure node->(export-list)
7280
        where export-list is a list of the instances exported on
7281
        that node.
7282

7283
    """
7284
    rpcresult = self.rpc.call_export_list(self.nodes)
7285
    result = {}
7286
    for node in rpcresult:
7287
      if rpcresult[node].fail_msg:
7288
        result[node] = False
7289
      else:
7290
        result[node] = rpcresult[node].payload
7291

    
7292
    return result
7293

    
7294

    
7295
class LUExportInstance(LogicalUnit):
7296
  """Export an instance to an image in the cluster.
7297

7298
  """
7299
  HPATH = "instance-export"
7300
  HTYPE = constants.HTYPE_INSTANCE
7301
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7302
  REQ_BGL = False
7303

    
7304
  def ExpandNames(self):
7305
    self._ExpandAndLockInstance()
7306
    # FIXME: lock only instance primary and destination node
7307
    #
7308
    # Sad but true, for now we have do lock all nodes, as we don't know where
7309
    # the previous export might be, and and in this LU we search for it and
7310
    # remove it from its current node. In the future we could fix this by:
7311
    #  - making a tasklet to search (share-lock all), then create the new one,
7312
    #    then one to remove, after
7313
    #  - removing the removal operation altogether
7314
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7315

    
7316
  def DeclareLocks(self, level):
7317
    """Last minute lock declaration."""
7318
    # All nodes are locked anyway, so nothing to do here.
7319

    
7320
  def BuildHooksEnv(self):
7321
    """Build hooks env.
7322

7323
    This will run on the master, primary node and target node.
7324

7325
    """
7326
    env = {
7327
      "EXPORT_NODE": self.op.target_node,
7328
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7329
      }
7330
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7331
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7332
          self.op.target_node]
7333
    return env, nl, nl
7334

    
7335
  def CheckPrereq(self):
7336
    """Check prerequisites.
7337

7338
    This checks that the instance and node names are valid.
7339

7340
    """
7341
    instance_name = self.op.instance_name
7342
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7343
    assert self.instance is not None, \
7344
          "Cannot retrieve locked instance %s" % self.op.instance_name
7345
    _CheckNodeOnline(self, self.instance.primary_node)
7346

    
7347
    self.dst_node = self.cfg.GetNodeInfo(
7348
      self.cfg.ExpandNodeName(self.op.target_node))
7349

    
7350
    if self.dst_node is None:
7351
      # This is wrong node name, not a non-locked node
7352
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7353
    _CheckNodeOnline(self, self.dst_node.name)
7354
    _CheckNodeNotDrained(self, self.dst_node.name)
7355

    
7356
    # instance disk type verification
7357
    for disk in self.instance.disks:
7358
      if disk.dev_type == constants.LD_FILE:
7359
        raise errors.OpPrereqError("Export not supported for instances with"
7360
                                   " file-based disks")
7361

    
7362
  def Exec(self, feedback_fn):
7363
    """Export an instance to an image in the cluster.
7364

7365
    """
7366
    instance = self.instance
7367
    dst_node = self.dst_node
7368
    src_node = instance.primary_node
7369
    if self.op.shutdown:
7370
      # shutdown the instance, but not the disks
7371
      result = self.rpc.call_instance_shutdown(src_node, instance)
7372
      result.Raise("Could not shutdown instance %s on"
7373
                   " node %s" % (instance.name, src_node))
7374

    
7375
    vgname = self.cfg.GetVGName()
7376

    
7377
    snap_disks = []
7378

    
7379
    # set the disks ID correctly since call_instance_start needs the
7380
    # correct drbd minor to create the symlinks
7381
    for disk in instance.disks:
7382
      self.cfg.SetDiskID(disk, src_node)
7383

    
7384
    # per-disk results
7385
    dresults = []
7386
    try:
7387
      for idx, disk in enumerate(instance.disks):
7388
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7389
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7390
        msg = result.fail_msg
7391
        if msg:
7392
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7393
                          idx, src_node, msg)
7394
          snap_disks.append(False)
7395
        else:
7396
          disk_id = (vgname, result.payload)
7397
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7398
                                 logical_id=disk_id, physical_id=disk_id,
7399
                                 iv_name=disk.iv_name)
7400
          snap_disks.append(new_dev)
7401

    
7402
    finally:
7403
      if self.op.shutdown and instance.admin_up:
7404
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7405
        msg = result.fail_msg
7406
        if msg:
7407
          _ShutdownInstanceDisks(self, instance)
7408
          raise errors.OpExecError("Could not start instance: %s" % msg)
7409

    
7410
    # TODO: check for size
7411

    
7412
    cluster_name = self.cfg.GetClusterName()
7413
    for idx, dev in enumerate(snap_disks):
7414
      if dev:
7415
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7416
                                               instance, cluster_name, idx)
7417
        msg = result.fail_msg
7418
        if msg:
7419
          self.LogWarning("Could not export disk/%s from node %s to"
7420
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7421
          dresults.append(False)
7422
        else:
7423
          dresults.append(True)
7424
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7425
        if msg:
7426
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7427
                          " %s: %s", idx, src_node, msg)
7428
      else:
7429
        dresults.append(False)
7430

    
7431
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7432
    fin_resu = True
7433
    msg = result.fail_msg
7434
    if msg:
7435
      self.LogWarning("Could not finalize export for instance %s"
7436
                      " on node %s: %s", instance.name, dst_node.name, msg)
7437
      fin_resu = False
7438

    
7439
    nodelist = self.cfg.GetNodeList()
7440
    nodelist.remove(dst_node.name)
7441

    
7442
    # on one-node clusters nodelist will be empty after the removal
7443
    # if we proceed the backup would be removed because OpQueryExports
7444
    # substitutes an empty list with the full cluster node list.
7445
    iname = instance.name
7446
    if nodelist:
7447
      exportlist = self.rpc.call_export_list(nodelist)
7448
      for node in exportlist:
7449
        if exportlist[node].fail_msg:
7450
          continue
7451
        if iname in exportlist[node].payload:
7452
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7453
          if msg:
7454
            self.LogWarning("Could not remove older export for instance %s"
7455
                            " on node %s: %s", iname, node, msg)
7456
    return fin_resu, dresults
7457

    
7458

    
7459
class LURemoveExport(NoHooksLU):
7460
  """Remove exports related to the named instance.
7461

7462
  """
7463
  _OP_REQP = ["instance_name"]
7464
  REQ_BGL = False
7465

    
7466
  def ExpandNames(self):
7467
    self.needed_locks = {}
7468
    # We need all nodes to be locked in order for RemoveExport to work, but we
7469
    # don't need to lock the instance itself, as nothing will happen to it (and
7470
    # we can remove exports also for a removed instance)
7471
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7472

    
7473
  def CheckPrereq(self):
7474
    """Check prerequisites.
7475
    """
7476
    pass
7477

    
7478
  def Exec(self, feedback_fn):
7479
    """Remove any export.
7480

7481
    """
7482
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7483
    # If the instance was not found we'll try with the name that was passed in.
7484
    # This will only work if it was an FQDN, though.
7485
    fqdn_warn = False
7486
    if not instance_name:
7487
      fqdn_warn = True
7488
      instance_name = self.op.instance_name
7489

    
7490
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7491
    exportlist = self.rpc.call_export_list(locked_nodes)
7492
    found = False
7493
    for node in exportlist:
7494
      msg = exportlist[node].fail_msg
7495
      if msg:
7496
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7497
        continue
7498
      if instance_name in exportlist[node].payload:
7499
        found = True
7500
        result = self.rpc.call_export_remove(node, instance_name)
7501
        msg = result.fail_msg
7502
        if msg:
7503
          logging.error("Could not remove export for instance %s"
7504
                        " on node %s: %s", instance_name, node, msg)
7505

    
7506
    if fqdn_warn and not found:
7507
      feedback_fn("Export not found. If trying to remove an export belonging"
7508
                  " to a deleted instance please use its Fully Qualified"
7509
                  " Domain Name.")
7510

    
7511

    
7512
class TagsLU(NoHooksLU):
7513
  """Generic tags LU.
7514

7515
  This is an abstract class which is the parent of all the other tags LUs.
7516

7517
  """
7518

    
7519
  def ExpandNames(self):
7520
    self.needed_locks = {}
7521
    if self.op.kind == constants.TAG_NODE:
7522
      name = self.cfg.ExpandNodeName(self.op.name)
7523
      if name is None:
7524
        raise errors.OpPrereqError("Invalid node name (%s)" %
7525
                                   (self.op.name,))
7526
      self.op.name = name
7527
      self.needed_locks[locking.LEVEL_NODE] = name
7528
    elif self.op.kind == constants.TAG_INSTANCE:
7529
      name = self.cfg.ExpandInstanceName(self.op.name)
7530
      if name is None:
7531
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7532
                                   (self.op.name,))
7533
      self.op.name = name
7534
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7535

    
7536
  def CheckPrereq(self):
7537
    """Check prerequisites.
7538

7539
    """
7540
    if self.op.kind == constants.TAG_CLUSTER:
7541
      self.target = self.cfg.GetClusterInfo()
7542
    elif self.op.kind == constants.TAG_NODE:
7543
      self.target = self.cfg.GetNodeInfo(self.op.name)
7544
    elif self.op.kind == constants.TAG_INSTANCE:
7545
      self.target = self.cfg.GetInstanceInfo(self.op.name)
7546
    else:
7547
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7548
                                 str(self.op.kind))
7549

    
7550

    
7551
class LUGetTags(TagsLU):
7552
  """Returns the tags of a given object.
7553

7554
  """
7555
  _OP_REQP = ["kind", "name"]
7556
  REQ_BGL = False
7557

    
7558
  def Exec(self, feedback_fn):
7559
    """Returns the tag list.
7560

7561
    """
7562
    return list(self.target.GetTags())
7563

    
7564

    
7565
class LUSearchTags(NoHooksLU):
7566
  """Searches the tags for a given pattern.
7567

7568
  """
7569
  _OP_REQP = ["pattern"]
7570
  REQ_BGL = False
7571

    
7572
  def ExpandNames(self):
7573
    self.needed_locks = {}
7574

    
7575
  def CheckPrereq(self):
7576
    """Check prerequisites.
7577

7578
    This checks the pattern passed for validity by compiling it.
7579

7580
    """
7581
    try:
7582
      self.re = re.compile(self.op.pattern)
7583
    except re.error, err:
7584
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7585
                                 (self.op.pattern, err))
7586

    
7587
  def Exec(self, feedback_fn):
7588
    """Returns the tag list.
7589

7590
    """
7591
    cfg = self.cfg
7592
    tgts = [("/cluster", cfg.GetClusterInfo())]
7593
    ilist = cfg.GetAllInstancesInfo().values()
7594
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7595
    nlist = cfg.GetAllNodesInfo().values()
7596
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7597
    results = []
7598
    for path, target in tgts:
7599
      for tag in target.GetTags():
7600
        if self.re.search(tag):
7601
          results.append((path, tag))
7602
    return results
7603

    
7604

    
7605
class LUAddTags(TagsLU):
7606
  """Sets a tag on a given object.
7607

7608
  """
7609
  _OP_REQP = ["kind", "name", "tags"]
7610
  REQ_BGL = False
7611

    
7612
  def CheckPrereq(self):
7613
    """Check prerequisites.
7614

7615
    This checks the type and length of the tag name and value.
7616

7617
    """
7618
    TagsLU.CheckPrereq(self)
7619
    for tag in self.op.tags:
7620
      objects.TaggableObject.ValidateTag(tag)
7621

    
7622
  def Exec(self, feedback_fn):
7623
    """Sets the tag.
7624

7625
    """
7626
    try:
7627
      for tag in self.op.tags:
7628
        self.target.AddTag(tag)
7629
    except errors.TagError, err:
7630
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7631
    try:
7632
      self.cfg.Update(self.target)
7633
    except errors.ConfigurationError:
7634
      raise errors.OpRetryError("There has been a modification to the"
7635
                                " config file and the operation has been"
7636
                                " aborted. Please retry.")
7637

    
7638

    
7639
class LUDelTags(TagsLU):
7640
  """Delete a list of tags from a given object.
7641

7642
  """
7643
  _OP_REQP = ["kind", "name", "tags"]
7644
  REQ_BGL = False
7645

    
7646
  def CheckPrereq(self):
7647
    """Check prerequisites.
7648

7649
    This checks that we have the given tag.
7650

7651
    """
7652
    TagsLU.CheckPrereq(self)
7653
    for tag in self.op.tags:
7654
      objects.TaggableObject.ValidateTag(tag)
7655
    del_tags = frozenset(self.op.tags)
7656
    cur_tags = self.target.GetTags()
7657
    if not del_tags <= cur_tags:
7658
      diff_tags = del_tags - cur_tags
7659
      diff_names = ["'%s'" % tag for tag in diff_tags]
7660
      diff_names.sort()
7661
      raise errors.OpPrereqError("Tag(s) %s not found" %
7662
                                 (",".join(diff_names)))
7663

    
7664
  def Exec(self, feedback_fn):
7665
    """Remove the tag from the object.
7666

7667
    """
7668
    for tag in self.op.tags:
7669
      self.target.RemoveTag(tag)
7670
    try:
7671
      self.cfg.Update(self.target)
7672
    except errors.ConfigurationError:
7673
      raise errors.OpRetryError("There has been a modification to the"
7674
                                " config file and the operation has been"
7675
                                " aborted. Please retry.")
7676

    
7677

    
7678
class LUTestDelay(NoHooksLU):
7679
  """Sleep for a specified amount of time.
7680

7681
  This LU sleeps on the master and/or nodes for a specified amount of
7682
  time.
7683

7684
  """
7685
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7686
  REQ_BGL = False
7687

    
7688
  def ExpandNames(self):
7689
    """Expand names and set required locks.
7690

7691
    This expands the node list, if any.
7692

7693
    """
7694
    self.needed_locks = {}
7695
    if self.op.on_nodes:
7696
      # _GetWantedNodes can be used here, but is not always appropriate to use
7697
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7698
      # more information.
7699
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7700
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7701

    
7702
  def CheckPrereq(self):
7703
    """Check prerequisites.
7704

7705
    """
7706

    
7707
  def Exec(self, feedback_fn):
7708
    """Do the actual sleep.
7709

7710
    """
7711
    if self.op.on_master:
7712
      if not utils.TestDelay(self.op.duration):
7713
        raise errors.OpExecError("Error during master delay test")
7714
    if self.op.on_nodes:
7715
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7716
      for node, node_result in result.items():
7717
        node_result.Raise("Failure during rpc call to node %s" % node)
7718

    
7719

    
7720
class IAllocator(object):
7721
  """IAllocator framework.
7722

7723
  An IAllocator instance has three sets of attributes:
7724
    - cfg that is needed to query the cluster
7725
    - input data (all members of the _KEYS class attribute are required)
7726
    - four buffer attributes (in|out_data|text), that represent the
7727
      input (to the external script) in text and data structure format,
7728
      and the output from it, again in two formats
7729
    - the result variables from the script (success, info, nodes) for
7730
      easy usage
7731

7732
  """
7733
  _ALLO_KEYS = [
7734
    "mem_size", "disks", "disk_template",
7735
    "os", "tags", "nics", "vcpus", "hypervisor",
7736
    ]
7737
  _RELO_KEYS = [
7738
    "relocate_from",
7739
    ]
7740

    
7741
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7742
    self.cfg = cfg
7743
    self.rpc = rpc
7744
    # init buffer variables
7745
    self.in_text = self.out_text = self.in_data = self.out_data = None
7746
    # init all input fields so that pylint is happy
7747
    self.mode = mode
7748
    self.name = name
7749
    self.mem_size = self.disks = self.disk_template = None
7750
    self.os = self.tags = self.nics = self.vcpus = None
7751
    self.hypervisor = None
7752
    self.relocate_from = None
7753
    # computed fields
7754
    self.required_nodes = None
7755
    # init result fields
7756
    self.success = self.info = self.nodes = None
7757
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7758
      keyset = self._ALLO_KEYS
7759
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7760
      keyset = self._RELO_KEYS
7761
    else:
7762
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
7763
                                   " IAllocator" % self.mode)
7764
    for key in kwargs:
7765
      if key not in keyset:
7766
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
7767
                                     " IAllocator" % key)
7768
      setattr(self, key, kwargs[key])
7769
    for key in keyset:
7770
      if key not in kwargs:
7771
        raise errors.ProgrammerError("Missing input parameter '%s' to"
7772
                                     " IAllocator" % key)
7773
    self._BuildInputData()
7774

    
7775
  def _ComputeClusterData(self):
7776
    """Compute the generic allocator input data.
7777

7778
    This is the data that is independent of the actual operation.
7779

7780
    """
7781
    cfg = self.cfg
7782
    cluster_info = cfg.GetClusterInfo()
7783
    # cluster data
7784
    data = {
7785
      "version": constants.IALLOCATOR_VERSION,
7786
      "cluster_name": cfg.GetClusterName(),
7787
      "cluster_tags": list(cluster_info.GetTags()),
7788
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
7789
      # we don't have job IDs
7790
      }
7791
    iinfo = cfg.GetAllInstancesInfo().values()
7792
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
7793

    
7794
    # node data
7795
    node_results = {}
7796
    node_list = cfg.GetNodeList()
7797

    
7798
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7799
      hypervisor_name = self.hypervisor
7800
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7801
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
7802

    
7803
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
7804
                                        hypervisor_name)
7805
    node_iinfo = \
7806
      self.rpc.call_all_instances_info(node_list,
7807
                                       cluster_info.enabled_hypervisors)
7808
    for nname, nresult in node_data.items():
7809
      # first fill in static (config-based) values
7810
      ninfo = cfg.GetNodeInfo(nname)
7811
      pnr = {
7812
        "tags": list(ninfo.GetTags()),
7813
        "primary_ip": ninfo.primary_ip,
7814
        "secondary_ip": ninfo.secondary_ip,
7815
        "offline": ninfo.offline,
7816
        "drained": ninfo.drained,
7817
        "master_candidate": ninfo.master_candidate,
7818
        }
7819

    
7820
      if not (ninfo.offline or ninfo.drained):
7821
        nresult.Raise("Can't get data for node %s" % nname)
7822
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
7823
                                nname)
7824
        remote_info = nresult.payload
7825

    
7826
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
7827
                     'vg_size', 'vg_free', 'cpu_total']:
7828
          if attr not in remote_info:
7829
            raise errors.OpExecError("Node '%s' didn't return attribute"
7830
                                     " '%s'" % (nname, attr))
7831
          if not isinstance(remote_info[attr], int):
7832
            raise errors.OpExecError("Node '%s' returned invalid value"
7833
                                     " for '%s': %s" %
7834
                                     (nname, attr, remote_info[attr]))
7835
        # compute memory used by primary instances
7836
        i_p_mem = i_p_up_mem = 0
7837
        for iinfo, beinfo in i_list:
7838
          if iinfo.primary_node == nname:
7839
            i_p_mem += beinfo[constants.BE_MEMORY]
7840
            if iinfo.name not in node_iinfo[nname].payload:
7841
              i_used_mem = 0
7842
            else:
7843
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
7844
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
7845
            remote_info['memory_free'] -= max(0, i_mem_diff)
7846

    
7847
            if iinfo.admin_up:
7848
              i_p_up_mem += beinfo[constants.BE_MEMORY]
7849

    
7850
        # compute memory used by instances
7851
        pnr_dyn = {
7852
          "total_memory": remote_info['memory_total'],
7853
          "reserved_memory": remote_info['memory_dom0'],
7854
          "free_memory": remote_info['memory_free'],
7855
          "total_disk": remote_info['vg_size'],
7856
          "free_disk": remote_info['vg_free'],
7857
          "total_cpus": remote_info['cpu_total'],
7858
          "i_pri_memory": i_p_mem,
7859
          "i_pri_up_memory": i_p_up_mem,
7860
          }
7861
        pnr.update(pnr_dyn)
7862

    
7863
      node_results[nname] = pnr
7864
    data["nodes"] = node_results
7865

    
7866
    # instance data
7867
    instance_data = {}
7868
    for iinfo, beinfo in i_list:
7869
      nic_data = []
7870
      for nic in iinfo.nics:
7871
        filled_params = objects.FillDict(
7872
            cluster_info.nicparams[constants.PP_DEFAULT],
7873
            nic.nicparams)
7874
        nic_dict = {"mac": nic.mac,
7875
                    "ip": nic.ip,
7876
                    "mode": filled_params[constants.NIC_MODE],
7877
                    "link": filled_params[constants.NIC_LINK],
7878
                   }
7879
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
7880
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
7881
        nic_data.append(nic_dict)
7882
      pir = {
7883
        "tags": list(iinfo.GetTags()),
7884
        "admin_up": iinfo.admin_up,
7885
        "vcpus": beinfo[constants.BE_VCPUS],
7886
        "memory": beinfo[constants.BE_MEMORY],
7887
        "os": iinfo.os,
7888
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
7889
        "nics": nic_data,
7890
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
7891
        "disk_template": iinfo.disk_template,
7892
        "hypervisor": iinfo.hypervisor,
7893
        }
7894
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7895
                                                 pir["disks"])
7896
      instance_data[iinfo.name] = pir
7897

    
7898
    data["instances"] = instance_data
7899

    
7900
    self.in_data = data
7901

    
7902
  def _AddNewInstance(self):
7903
    """Add new instance data to allocator structure.
7904

7905
    This in combination with _AllocatorGetClusterData will create the
7906
    correct structure needed as input for the allocator.
7907

7908
    The checks for the completeness of the opcode must have already been
7909
    done.
7910

7911
    """
7912
    data = self.in_data
7913

    
7914
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7915

    
7916
    if self.disk_template in constants.DTS_NET_MIRROR:
7917
      self.required_nodes = 2
7918
    else:
7919
      self.required_nodes = 1
7920
    request = {
7921
      "type": "allocate",
7922
      "name": self.name,
7923
      "disk_template": self.disk_template,
7924
      "tags": self.tags,
7925
      "os": self.os,
7926
      "vcpus": self.vcpus,
7927
      "memory": self.mem_size,
7928
      "disks": self.disks,
7929
      "disk_space_total": disk_space,
7930
      "nics": self.nics,
7931
      "required_nodes": self.required_nodes,
7932
      }
7933
    data["request"] = request
7934

    
7935
  def _AddRelocateInstance(self):
7936
    """Add relocate instance data to allocator structure.
7937

7938
    This in combination with _IAllocatorGetClusterData will create the
7939
    correct structure needed as input for the allocator.
7940

7941
    The checks for the completeness of the opcode must have already been
7942
    done.
7943

7944
    """
7945
    instance = self.cfg.GetInstanceInfo(self.name)
7946
    if instance is None:
7947
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7948
                                   " IAllocator" % self.name)
7949

    
7950
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7951
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7952

    
7953
    if len(instance.secondary_nodes) != 1:
7954
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7955

    
7956
    self.required_nodes = 1
7957
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7958
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7959

    
7960
    request = {
7961
      "type": "relocate",
7962
      "name": self.name,
7963
      "disk_space_total": disk_space,
7964
      "required_nodes": self.required_nodes,
7965
      "relocate_from": self.relocate_from,
7966
      }
7967
    self.in_data["request"] = request
7968

    
7969
  def _BuildInputData(self):
7970
    """Build input data structures.
7971

7972
    """
7973
    self._ComputeClusterData()
7974

    
7975
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7976
      self._AddNewInstance()
7977
    else:
7978
      self._AddRelocateInstance()
7979

    
7980
    self.in_text = serializer.Dump(self.in_data)
7981

    
7982
  def Run(self, name, validate=True, call_fn=None):
7983
    """Run an instance allocator and return the results.
7984

7985
    """
7986
    if call_fn is None:
7987
      call_fn = self.rpc.call_iallocator_runner
7988

    
7989
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
7990
    result.Raise("Failure while running the iallocator script")
7991

    
7992
    self.out_text = result.payload
7993
    if validate:
7994
      self._ValidateResult()
7995

    
7996
  def _ValidateResult(self):
7997
    """Process the allocator results.
7998

7999
    This will process and if successful save the result in
8000
    self.out_data and the other parameters.
8001

8002
    """
8003
    try:
8004
      rdict = serializer.Load(self.out_text)
8005
    except Exception, err:
8006
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8007

    
8008
    if not isinstance(rdict, dict):
8009
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8010

    
8011
    for key in "success", "info", "nodes":
8012
      if key not in rdict:
8013
        raise errors.OpExecError("Can't parse iallocator results:"
8014
                                 " missing key '%s'" % key)
8015
      setattr(self, key, rdict[key])
8016

    
8017
    if not isinstance(rdict["nodes"], list):
8018
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8019
                               " is not a list")
8020
    self.out_data = rdict
8021

    
8022

    
8023
class LUTestAllocator(NoHooksLU):
8024
  """Run allocator tests.
8025

8026
  This LU runs the allocator tests
8027

8028
  """
8029
  _OP_REQP = ["direction", "mode", "name"]
8030

    
8031
  def CheckPrereq(self):
8032
    """Check prerequisites.
8033

8034
    This checks the opcode parameters depending on the director and mode test.
8035

8036
    """
8037
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8038
      for attr in ["name", "mem_size", "disks", "disk_template",
8039
                   "os", "tags", "nics", "vcpus"]:
8040
        if not hasattr(self.op, attr):
8041
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8042
                                     attr)
8043
      iname = self.cfg.ExpandInstanceName(self.op.name)
8044
      if iname is not None:
8045
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8046
                                   iname)
8047
      if not isinstance(self.op.nics, list):
8048
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8049
      for row in self.op.nics:
8050
        if (not isinstance(row, dict) or
8051
            "mac" not in row or
8052
            "ip" not in row or
8053
            "bridge" not in row):
8054
          raise errors.OpPrereqError("Invalid contents of the"
8055
                                     " 'nics' parameter")
8056
      if not isinstance(self.op.disks, list):
8057
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8058
      for row in self.op.disks:
8059
        if (not isinstance(row, dict) or
8060
            "size" not in row or
8061
            not isinstance(row["size"], int) or
8062
            "mode" not in row or
8063
            row["mode"] not in ['r', 'w']):
8064
          raise errors.OpPrereqError("Invalid contents of the"
8065
                                     " 'disks' parameter")
8066
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8067
        self.op.hypervisor = self.cfg.GetHypervisorType()
8068
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8069
      if not hasattr(self.op, "name"):
8070
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8071
      fname = self.cfg.ExpandInstanceName(self.op.name)
8072
      if fname is None:
8073
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8074
                                   self.op.name)
8075
      self.op.name = fname
8076
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8077
    else:
8078
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8079
                                 self.op.mode)
8080

    
8081
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8082
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8083
        raise errors.OpPrereqError("Missing allocator name")
8084
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8085
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8086
                                 self.op.direction)
8087

    
8088
  def Exec(self, feedback_fn):
8089
    """Run the allocator test.
8090

8091
    """
8092
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8093
      ial = IAllocator(self.cfg, self.rpc,
8094
                       mode=self.op.mode,
8095
                       name=self.op.name,
8096
                       mem_size=self.op.mem_size,
8097
                       disks=self.op.disks,
8098
                       disk_template=self.op.disk_template,
8099
                       os=self.op.os,
8100
                       tags=self.op.tags,
8101
                       nics=self.op.nics,
8102
                       vcpus=self.op.vcpus,
8103
                       hypervisor=self.op.hypervisor,
8104
                       )
8105
    else:
8106
      ial = IAllocator(self.cfg, self.rpc,
8107
                       mode=self.op.mode,
8108
                       name=self.op.name,
8109
                       relocate_from=list(self.relocate_from),
8110
                       )
8111

    
8112
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8113
      result = ial.in_text
8114
    else:
8115
      ial.Run(self.op.allocator, validate=False)
8116
      result = ial.out_text
8117
    return result