Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 621b7678

History | View | Annotate | Download (279.6 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool()
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _CheckNicsBridgesExist(lu, target_nics, target_node,
690
                               profile=constants.PP_DEFAULT):
691
  """Check that the brigdes needed by a list of nics exist.
692

693
  """
694
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
695
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
696
                for nic in target_nics]
697
  brlist = [params[constants.NIC_LINK] for params in paramslist
698
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
699
  if brlist:
700
    result = lu.rpc.call_bridges_exist(target_node, brlist)
701
    result.Raise("Error checking bridges on destination node '%s'" %
702
                 target_node, prereq=True)
703

    
704

    
705
def _CheckInstanceBridgesExist(lu, instance, node=None):
706
  """Check that the brigdes needed by an instance exist.
707

708
  """
709
  if node is None:
710
    node = instance.primary_node
711
  _CheckNicsBridgesExist(lu, instance.nics, node)
712

    
713

    
714
def _GetNodeInstancesInner(cfg, fn):
715
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
716

    
717

    
718
def _GetNodeInstances(cfg, node_name):
719
  """Returns a list of all primary and secondary instances on a node.
720

721
  """
722

    
723
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
724

    
725

    
726
def _GetNodePrimaryInstances(cfg, node_name):
727
  """Returns primary instances on a node.
728

729
  """
730
  return _GetNodeInstancesInner(cfg,
731
                                lambda inst: node_name == inst.primary_node)
732

    
733

    
734
def _GetNodeSecondaryInstances(cfg, node_name):
735
  """Returns secondary instances on a node.
736

737
  """
738
  return _GetNodeInstancesInner(cfg,
739
                                lambda inst: node_name in inst.secondary_nodes)
740

    
741

    
742
def _GetStorageTypeArgs(cfg, storage_type):
743
  """Returns the arguments for a storage type.
744

745
  """
746
  # Special case for file storage
747
  if storage_type == constants.ST_FILE:
748
    # storage.FileStorage wants a list of storage directories
749
    return [[cfg.GetFileStorageDir()]]
750

    
751
  return []
752

    
753

    
754
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
755
  faulty = []
756

    
757
  for dev in instance.disks:
758
    cfg.SetDiskID(dev, node_name)
759

    
760
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
761
  result.Raise("Failed to get disk status from node %s" % node_name,
762
               prereq=prereq)
763

    
764
  for idx, bdev_status in enumerate(result.payload):
765
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
766
      faulty.append(idx)
767

    
768
  return faulty
769

    
770

    
771
class LUPostInitCluster(LogicalUnit):
772
  """Logical unit for running hooks after cluster initialization.
773

774
  """
775
  HPATH = "cluster-init"
776
  HTYPE = constants.HTYPE_CLUSTER
777
  _OP_REQP = []
778

    
779
  def BuildHooksEnv(self):
780
    """Build hooks env.
781

782
    """
783
    env = {"OP_TARGET": self.cfg.GetClusterName()}
784
    mn = self.cfg.GetMasterNode()
785
    return env, [], [mn]
786

    
787
  def CheckPrereq(self):
788
    """No prerequisites to check.
789

790
    """
791
    return True
792

    
793
  def Exec(self, feedback_fn):
794
    """Nothing to do.
795

796
    """
797
    return True
798

    
799

    
800
class LUDestroyCluster(NoHooksLU):
801
  """Logical unit for destroying the cluster.
802

803
  """
804
  _OP_REQP = []
805

    
806
  def CheckPrereq(self):
807
    """Check prerequisites.
808

809
    This checks whether the cluster is empty.
810

811
    Any errors are signaled by raising errors.OpPrereqError.
812

813
    """
814
    master = self.cfg.GetMasterNode()
815

    
816
    nodelist = self.cfg.GetNodeList()
817
    if len(nodelist) != 1 or nodelist[0] != master:
818
      raise errors.OpPrereqError("There are still %d node(s) in"
819
                                 " this cluster." % (len(nodelist) - 1))
820
    instancelist = self.cfg.GetInstanceList()
821
    if instancelist:
822
      raise errors.OpPrereqError("There are still %d instance(s) in"
823
                                 " this cluster." % len(instancelist))
824

    
825
  def Exec(self, feedback_fn):
826
    """Destroys the cluster.
827

828
    """
829
    master = self.cfg.GetMasterNode()
830
    result = self.rpc.call_node_stop_master(master, False)
831
    result.Raise("Could not disable the master role")
832
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
833
    utils.CreateBackup(priv_key)
834
    utils.CreateBackup(pub_key)
835
    return master
836

    
837

    
838
class LUVerifyCluster(LogicalUnit):
839
  """Verifies the cluster status.
840

841
  """
842
  HPATH = "cluster-verify"
843
  HTYPE = constants.HTYPE_CLUSTER
844
  _OP_REQP = ["skip_checks"]
845
  REQ_BGL = False
846

    
847
  def ExpandNames(self):
848
    self.needed_locks = {
849
      locking.LEVEL_NODE: locking.ALL_SET,
850
      locking.LEVEL_INSTANCE: locking.ALL_SET,
851
    }
852
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
853

    
854
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
855
                  node_result, feedback_fn, master_files,
856
                  drbd_map, vg_name):
857
    """Run multiple tests against a node.
858

859
    Test list:
860

861
      - compares ganeti version
862
      - checks vg existence and size > 20G
863
      - checks config file checksum
864
      - checks ssh to other nodes
865

866
    @type nodeinfo: L{objects.Node}
867
    @param nodeinfo: the node to check
868
    @param file_list: required list of files
869
    @param local_cksum: dictionary of local files and their checksums
870
    @param node_result: the results from the node
871
    @param feedback_fn: function used to accumulate results
872
    @param master_files: list of files that only masters should have
873
    @param drbd_map: the useddrbd minors for this node, in
874
        form of minor: (instance, must_exist) which correspond to instances
875
        and their running status
876
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
877

878
    """
879
    node = nodeinfo.name
880

    
881
    # main result, node_result should be a non-empty dict
882
    if not node_result or not isinstance(node_result, dict):
883
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
884
      return True
885

    
886
    # compares ganeti version
887
    local_version = constants.PROTOCOL_VERSION
888
    remote_version = node_result.get('version', None)
889
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
890
            len(remote_version) == 2):
891
      feedback_fn("  - ERROR: connection to %s failed" % (node))
892
      return True
893

    
894
    if local_version != remote_version[0]:
895
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
896
                  " node %s %s" % (local_version, node, remote_version[0]))
897
      return True
898

    
899
    # node seems compatible, we can actually try to look into its results
900

    
901
    bad = False
902

    
903
    # full package version
904
    if constants.RELEASE_VERSION != remote_version[1]:
905
      feedback_fn("  - WARNING: software version mismatch: master %s,"
906
                  " node %s %s" %
907
                  (constants.RELEASE_VERSION, node, remote_version[1]))
908

    
909
    # checks vg existence and size > 20G
910
    if vg_name is not None:
911
      vglist = node_result.get(constants.NV_VGLIST, None)
912
      if not vglist:
913
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
914
                        (node,))
915
        bad = True
916
      else:
917
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
918
                                              constants.MIN_VG_SIZE)
919
        if vgstatus:
920
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
921
          bad = True
922

    
923
    # checks config file checksum
924

    
925
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
926
    if not isinstance(remote_cksum, dict):
927
      bad = True
928
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
929
    else:
930
      for file_name in file_list:
931
        node_is_mc = nodeinfo.master_candidate
932
        must_have_file = file_name not in master_files
933
        if file_name not in remote_cksum:
934
          if node_is_mc or must_have_file:
935
            bad = True
936
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
937
        elif remote_cksum[file_name] != local_cksum[file_name]:
938
          if node_is_mc or must_have_file:
939
            bad = True
940
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
941
          else:
942
            # not candidate and this is not a must-have file
943
            bad = True
944
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
945
                        " candidates (and the file is outdated)" % file_name)
946
        else:
947
          # all good, except non-master/non-must have combination
948
          if not node_is_mc and not must_have_file:
949
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
950
                        " candidates" % file_name)
951

    
952
    # checks ssh to any
953

    
954
    if constants.NV_NODELIST not in node_result:
955
      bad = True
956
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
957
    else:
958
      if node_result[constants.NV_NODELIST]:
959
        bad = True
960
        for node in node_result[constants.NV_NODELIST]:
961
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
962
                          (node, node_result[constants.NV_NODELIST][node]))
963

    
964
    if constants.NV_NODENETTEST not in node_result:
965
      bad = True
966
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
967
    else:
968
      if node_result[constants.NV_NODENETTEST]:
969
        bad = True
970
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
971
        for node in nlist:
972
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
973
                          (node, node_result[constants.NV_NODENETTEST][node]))
974

    
975
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
976
    if isinstance(hyp_result, dict):
977
      for hv_name, hv_result in hyp_result.iteritems():
978
        if hv_result is not None:
979
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
980
                      (hv_name, hv_result))
981

    
982
    # check used drbd list
983
    if vg_name is not None:
984
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
985
      if not isinstance(used_minors, (tuple, list)):
986
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
987
                    str(used_minors))
988
      else:
989
        for minor, (iname, must_exist) in drbd_map.items():
990
          if minor not in used_minors and must_exist:
991
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
992
                        " not active" % (minor, iname))
993
            bad = True
994
        for minor in used_minors:
995
          if minor not in drbd_map:
996
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
997
                        minor)
998
            bad = True
999

    
1000
    return bad
1001

    
1002
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1003
                      node_instance, feedback_fn, n_offline):
1004
    """Verify an instance.
1005

1006
    This function checks to see if the required block devices are
1007
    available on the instance's node.
1008

1009
    """
1010
    bad = False
1011

    
1012
    node_current = instanceconfig.primary_node
1013

    
1014
    node_vol_should = {}
1015
    instanceconfig.MapLVsByNode(node_vol_should)
1016

    
1017
    for node in node_vol_should:
1018
      if node in n_offline:
1019
        # ignore missing volumes on offline nodes
1020
        continue
1021
      for volume in node_vol_should[node]:
1022
        if node not in node_vol_is or volume not in node_vol_is[node]:
1023
          feedback_fn("  - ERROR: volume %s missing on node %s" %
1024
                          (volume, node))
1025
          bad = True
1026

    
1027
    if instanceconfig.admin_up:
1028
      if ((node_current not in node_instance or
1029
          not instance in node_instance[node_current]) and
1030
          node_current not in n_offline):
1031
        feedback_fn("  - ERROR: instance %s not running on node %s" %
1032
                        (instance, node_current))
1033
        bad = True
1034

    
1035
    for node in node_instance:
1036
      if (not node == node_current):
1037
        if instance in node_instance[node]:
1038
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
1039
                          (instance, node))
1040
          bad = True
1041

    
1042
    return bad
1043

    
1044
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
1045
    """Verify if there are any unknown volumes in the cluster.
1046

1047
    The .os, .swap and backup volumes are ignored. All other volumes are
1048
    reported as unknown.
1049

1050
    """
1051
    bad = False
1052

    
1053
    for node in node_vol_is:
1054
      for volume in node_vol_is[node]:
1055
        if node not in node_vol_should or volume not in node_vol_should[node]:
1056
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
1057
                      (volume, node))
1058
          bad = True
1059
    return bad
1060

    
1061
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
1062
    """Verify the list of running instances.
1063

1064
    This checks what instances are running but unknown to the cluster.
1065

1066
    """
1067
    bad = False
1068
    for node in node_instance:
1069
      for runninginstance in node_instance[node]:
1070
        if runninginstance not in instancelist:
1071
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
1072
                          (runninginstance, node))
1073
          bad = True
1074
    return bad
1075

    
1076
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
1077
    """Verify N+1 Memory Resilience.
1078

1079
    Check that if one single node dies we can still start all the instances it
1080
    was primary for.
1081

1082
    """
1083
    bad = False
1084

    
1085
    for node, nodeinfo in node_info.iteritems():
1086
      # This code checks that every node which is now listed as secondary has
1087
      # enough memory to host all instances it is supposed to should a single
1088
      # other node in the cluster fail.
1089
      # FIXME: not ready for failover to an arbitrary node
1090
      # FIXME: does not support file-backed instances
1091
      # WARNING: we currently take into account down instances as well as up
1092
      # ones, considering that even if they're down someone might want to start
1093
      # them even in the event of a node failure.
1094
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1095
        needed_mem = 0
1096
        for instance in instances:
1097
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1098
          if bep[constants.BE_AUTO_BALANCE]:
1099
            needed_mem += bep[constants.BE_MEMORY]
1100
        if nodeinfo['mfree'] < needed_mem:
1101
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1102
                      " failovers should node %s fail" % (node, prinode))
1103
          bad = True
1104
    return bad
1105

    
1106
  def CheckPrereq(self):
1107
    """Check prerequisites.
1108

1109
    Transform the list of checks we're going to skip into a set and check that
1110
    all its members are valid.
1111

1112
    """
1113
    self.skip_set = frozenset(self.op.skip_checks)
1114
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1115
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1116

    
1117
  def BuildHooksEnv(self):
1118
    """Build hooks env.
1119

1120
    Cluster-Verify hooks just ran in the post phase and their failure makes
1121
    the output be logged in the verify output and the verification to fail.
1122

1123
    """
1124
    all_nodes = self.cfg.GetNodeList()
1125
    env = {
1126
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1127
      }
1128
    for node in self.cfg.GetAllNodesInfo().values():
1129
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1130

    
1131
    return env, [], all_nodes
1132

    
1133
  def Exec(self, feedback_fn):
1134
    """Verify integrity of cluster, performing various test on nodes.
1135

1136
    """
1137
    bad = False
1138
    feedback_fn("* Verifying global settings")
1139
    for msg in self.cfg.VerifyConfig():
1140
      feedback_fn("  - ERROR: %s" % msg)
1141

    
1142
    vg_name = self.cfg.GetVGName()
1143
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1144
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1145
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1146
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1147
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1148
                        for iname in instancelist)
1149
    i_non_redundant = [] # Non redundant instances
1150
    i_non_a_balanced = [] # Non auto-balanced instances
1151
    n_offline = [] # List of offline nodes
1152
    n_drained = [] # List of nodes being drained
1153
    node_volume = {}
1154
    node_instance = {}
1155
    node_info = {}
1156
    instance_cfg = {}
1157

    
1158
    # FIXME: verify OS list
1159
    # do local checksums
1160
    master_files = [constants.CLUSTER_CONF_FILE]
1161

    
1162
    file_names = ssconf.SimpleStore().GetFileList()
1163
    file_names.append(constants.SSL_CERT_FILE)
1164
    file_names.append(constants.RAPI_CERT_FILE)
1165
    file_names.extend(master_files)
1166

    
1167
    local_checksums = utils.FingerprintFiles(file_names)
1168

    
1169
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1170
    node_verify_param = {
1171
      constants.NV_FILELIST: file_names,
1172
      constants.NV_NODELIST: [node.name for node in nodeinfo
1173
                              if not node.offline],
1174
      constants.NV_HYPERVISOR: hypervisors,
1175
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1176
                                  node.secondary_ip) for node in nodeinfo
1177
                                 if not node.offline],
1178
      constants.NV_INSTANCELIST: hypervisors,
1179
      constants.NV_VERSION: None,
1180
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1181
      }
1182
    if vg_name is not None:
1183
      node_verify_param[constants.NV_VGLIST] = None
1184
      node_verify_param[constants.NV_LVLIST] = vg_name
1185
      node_verify_param[constants.NV_DRBDLIST] = None
1186
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1187
                                           self.cfg.GetClusterName())
1188

    
1189
    cluster = self.cfg.GetClusterInfo()
1190
    master_node = self.cfg.GetMasterNode()
1191
    all_drbd_map = self.cfg.ComputeDRBDMap()
1192

    
1193
    for node_i in nodeinfo:
1194
      node = node_i.name
1195

    
1196
      if node_i.offline:
1197
        feedback_fn("* Skipping offline node %s" % (node,))
1198
        n_offline.append(node)
1199
        continue
1200

    
1201
      if node == master_node:
1202
        ntype = "master"
1203
      elif node_i.master_candidate:
1204
        ntype = "master candidate"
1205
      elif node_i.drained:
1206
        ntype = "drained"
1207
        n_drained.append(node)
1208
      else:
1209
        ntype = "regular"
1210
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1211

    
1212
      msg = all_nvinfo[node].fail_msg
1213
      if msg:
1214
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1215
        bad = True
1216
        continue
1217

    
1218
      nresult = all_nvinfo[node].payload
1219
      node_drbd = {}
1220
      for minor, instance in all_drbd_map[node].items():
1221
        if instance not in instanceinfo:
1222
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1223
                      instance)
1224
          # ghost instance should not be running, but otherwise we
1225
          # don't give double warnings (both ghost instance and
1226
          # unallocated minor in use)
1227
          node_drbd[minor] = (instance, False)
1228
        else:
1229
          instance = instanceinfo[instance]
1230
          node_drbd[minor] = (instance.name, instance.admin_up)
1231
      result = self._VerifyNode(node_i, file_names, local_checksums,
1232
                                nresult, feedback_fn, master_files,
1233
                                node_drbd, vg_name)
1234
      bad = bad or result
1235

    
1236
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1237
      if vg_name is None:
1238
        node_volume[node] = {}
1239
      elif isinstance(lvdata, basestring):
1240
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1241
                    (node, utils.SafeEncode(lvdata)))
1242
        bad = True
1243
        node_volume[node] = {}
1244
      elif not isinstance(lvdata, dict):
1245
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1246
        bad = True
1247
        continue
1248
      else:
1249
        node_volume[node] = lvdata
1250

    
1251
      # node_instance
1252
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1253
      if not isinstance(idata, list):
1254
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1255
                    (node,))
1256
        bad = True
1257
        continue
1258

    
1259
      node_instance[node] = idata
1260

    
1261
      # node_info
1262
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1263
      if not isinstance(nodeinfo, dict):
1264
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1265
        bad = True
1266
        continue
1267

    
1268
      try:
1269
        node_info[node] = {
1270
          "mfree": int(nodeinfo['memory_free']),
1271
          "pinst": [],
1272
          "sinst": [],
1273
          # dictionary holding all instances this node is secondary for,
1274
          # grouped by their primary node. Each key is a cluster node, and each
1275
          # value is a list of instances which have the key as primary and the
1276
          # current node as secondary.  this is handy to calculate N+1 memory
1277
          # availability if you can only failover from a primary to its
1278
          # secondary.
1279
          "sinst-by-pnode": {},
1280
        }
1281
        # FIXME: devise a free space model for file based instances as well
1282
        if vg_name is not None:
1283
          if (constants.NV_VGLIST not in nresult or
1284
              vg_name not in nresult[constants.NV_VGLIST]):
1285
            feedback_fn("  - ERROR: node %s didn't return data for the"
1286
                        " volume group '%s' - it is either missing or broken" %
1287
                        (node, vg_name))
1288
            bad = True
1289
            continue
1290
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1291
      except (ValueError, KeyError):
1292
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1293
                    " from node %s" % (node,))
1294
        bad = True
1295
        continue
1296

    
1297
    node_vol_should = {}
1298

    
1299
    for instance in instancelist:
1300
      feedback_fn("* Verifying instance %s" % instance)
1301
      inst_config = instanceinfo[instance]
1302
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1303
                                     node_instance, feedback_fn, n_offline)
1304
      bad = bad or result
1305
      inst_nodes_offline = []
1306

    
1307
      inst_config.MapLVsByNode(node_vol_should)
1308

    
1309
      instance_cfg[instance] = inst_config
1310

    
1311
      pnode = inst_config.primary_node
1312
      if pnode in node_info:
1313
        node_info[pnode]['pinst'].append(instance)
1314
      elif pnode not in n_offline:
1315
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1316
                    " %s failed" % (instance, pnode))
1317
        bad = True
1318

    
1319
      if pnode in n_offline:
1320
        inst_nodes_offline.append(pnode)
1321

    
1322
      # If the instance is non-redundant we cannot survive losing its primary
1323
      # node, so we are not N+1 compliant. On the other hand we have no disk
1324
      # templates with more than one secondary so that situation is not well
1325
      # supported either.
1326
      # FIXME: does not support file-backed instances
1327
      if len(inst_config.secondary_nodes) == 0:
1328
        i_non_redundant.append(instance)
1329
      elif len(inst_config.secondary_nodes) > 1:
1330
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1331
                    % instance)
1332

    
1333
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1334
        i_non_a_balanced.append(instance)
1335

    
1336
      for snode in inst_config.secondary_nodes:
1337
        if snode in node_info:
1338
          node_info[snode]['sinst'].append(instance)
1339
          if pnode not in node_info[snode]['sinst-by-pnode']:
1340
            node_info[snode]['sinst-by-pnode'][pnode] = []
1341
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1342
        elif snode not in n_offline:
1343
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1344
                      " %s failed" % (instance, snode))
1345
          bad = True
1346
        if snode in n_offline:
1347
          inst_nodes_offline.append(snode)
1348

    
1349
      if inst_nodes_offline:
1350
        # warn that the instance lives on offline nodes, and set bad=True
1351
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1352
                    ", ".join(inst_nodes_offline))
1353
        bad = True
1354

    
1355
    feedback_fn("* Verifying orphan volumes")
1356
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1357
                                       feedback_fn)
1358
    bad = bad or result
1359

    
1360
    feedback_fn("* Verifying remaining instances")
1361
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1362
                                         feedback_fn)
1363
    bad = bad or result
1364

    
1365
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1366
      feedback_fn("* Verifying N+1 Memory redundancy")
1367
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1368
      bad = bad or result
1369

    
1370
    feedback_fn("* Other Notes")
1371
    if i_non_redundant:
1372
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1373
                  % len(i_non_redundant))
1374

    
1375
    if i_non_a_balanced:
1376
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1377
                  % len(i_non_a_balanced))
1378

    
1379
    if n_offline:
1380
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1381

    
1382
    if n_drained:
1383
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1384

    
1385
    return not bad
1386

    
1387
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1388
    """Analyze the post-hooks' result
1389

1390
    This method analyses the hook result, handles it, and sends some
1391
    nicely-formatted feedback back to the user.
1392

1393
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1394
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1395
    @param hooks_results: the results of the multi-node hooks rpc call
1396
    @param feedback_fn: function used send feedback back to the caller
1397
    @param lu_result: previous Exec result
1398
    @return: the new Exec result, based on the previous result
1399
        and hook results
1400

1401
    """
1402
    # We only really run POST phase hooks, and are only interested in
1403
    # their results
1404
    if phase == constants.HOOKS_PHASE_POST:
1405
      # Used to change hooks' output to proper indentation
1406
      indent_re = re.compile('^', re.M)
1407
      feedback_fn("* Hooks Results")
1408
      if not hooks_results:
1409
        feedback_fn("  - ERROR: general communication failure")
1410
        lu_result = 1
1411
      else:
1412
        for node_name in hooks_results:
1413
          show_node_header = True
1414
          res = hooks_results[node_name]
1415
          msg = res.fail_msg
1416
          if msg:
1417
            if res.offline:
1418
              # no need to warn or set fail return value
1419
              continue
1420
            feedback_fn("    Communication failure in hooks execution: %s" %
1421
                        msg)
1422
            lu_result = 1
1423
            continue
1424
          for script, hkr, output in res.payload:
1425
            if hkr == constants.HKR_FAIL:
1426
              # The node header is only shown once, if there are
1427
              # failing hooks on that node
1428
              if show_node_header:
1429
                feedback_fn("  Node %s:" % node_name)
1430
                show_node_header = False
1431
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1432
              output = indent_re.sub('      ', output)
1433
              feedback_fn("%s" % output)
1434
              lu_result = 1
1435

    
1436
      return lu_result
1437

    
1438

    
1439
class LUVerifyDisks(NoHooksLU):
1440
  """Verifies the cluster disks status.
1441

1442
  """
1443
  _OP_REQP = []
1444
  REQ_BGL = False
1445

    
1446
  def ExpandNames(self):
1447
    self.needed_locks = {
1448
      locking.LEVEL_NODE: locking.ALL_SET,
1449
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1450
    }
1451
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1452

    
1453
  def CheckPrereq(self):
1454
    """Check prerequisites.
1455

1456
    This has no prerequisites.
1457

1458
    """
1459
    pass
1460

    
1461
  def Exec(self, feedback_fn):
1462
    """Verify integrity of cluster disks.
1463

1464
    @rtype: tuple of three items
1465
    @return: a tuple of (dict of node-to-node_error, list of instances
1466
        which need activate-disks, dict of instance: (node, volume) for
1467
        missing volumes
1468

1469
    """
1470
    result = res_nodes, res_instances, res_missing = {}, [], {}
1471

    
1472
    vg_name = self.cfg.GetVGName()
1473
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1474
    instances = [self.cfg.GetInstanceInfo(name)
1475
                 for name in self.cfg.GetInstanceList()]
1476

    
1477
    nv_dict = {}
1478
    for inst in instances:
1479
      inst_lvs = {}
1480
      if (not inst.admin_up or
1481
          inst.disk_template not in constants.DTS_NET_MIRROR):
1482
        continue
1483
      inst.MapLVsByNode(inst_lvs)
1484
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1485
      for node, vol_list in inst_lvs.iteritems():
1486
        for vol in vol_list:
1487
          nv_dict[(node, vol)] = inst
1488

    
1489
    if not nv_dict:
1490
      return result
1491

    
1492
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1493

    
1494
    for node in nodes:
1495
      # node_volume
1496
      node_res = node_lvs[node]
1497
      if node_res.offline:
1498
        continue
1499
      msg = node_res.fail_msg
1500
      if msg:
1501
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1502
        res_nodes[node] = msg
1503
        continue
1504

    
1505
      lvs = node_res.payload
1506
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1507
        inst = nv_dict.pop((node, lv_name), None)
1508
        if (not lv_online and inst is not None
1509
            and inst.name not in res_instances):
1510
          res_instances.append(inst.name)
1511

    
1512
    # any leftover items in nv_dict are missing LVs, let's arrange the
1513
    # data better
1514
    for key, inst in nv_dict.iteritems():
1515
      if inst.name not in res_missing:
1516
        res_missing[inst.name] = []
1517
      res_missing[inst.name].append(key)
1518

    
1519
    return result
1520

    
1521

    
1522
class LURepairDiskSizes(NoHooksLU):
1523
  """Verifies the cluster disks sizes.
1524

1525
  """
1526
  _OP_REQP = ["instances"]
1527
  REQ_BGL = False
1528

    
1529
  def ExpandNames(self):
1530

    
1531
    if not isinstance(self.op.instances, list):
1532
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1533

    
1534
    if self.op.instances:
1535
      self.wanted_names = []
1536
      for name in self.op.instances:
1537
        full_name = self.cfg.ExpandInstanceName(name)
1538
        if full_name is None:
1539
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1540
        self.wanted_names.append(full_name)
1541
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1542
      self.needed_locks = {
1543
        locking.LEVEL_NODE: [],
1544
        locking.LEVEL_INSTANCE: self.wanted_names,
1545
        }
1546
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1547
    else:
1548
      self.wanted_names = None
1549
      self.needed_locks = {
1550
        locking.LEVEL_NODE: locking.ALL_SET,
1551
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1552
        }
1553
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1554

    
1555
  def DeclareLocks(self, level):
1556
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1557
      self._LockInstancesNodes(primary_only=True)
1558

    
1559
  def CheckPrereq(self):
1560
    """Check prerequisites.
1561

1562
    This only checks the optional instance list against the existing names.
1563

1564
    """
1565
    if self.wanted_names is None:
1566
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1567

    
1568
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1569
                             in self.wanted_names]
1570

    
1571
  def Exec(self, feedback_fn):
1572
    """Verify the size of cluster disks.
1573

1574
    """
1575
    # TODO: check child disks too
1576
    # TODO: check differences in size between primary/secondary nodes
1577
    per_node_disks = {}
1578
    for instance in self.wanted_instances:
1579
      pnode = instance.primary_node
1580
      if pnode not in per_node_disks:
1581
        per_node_disks[pnode] = []
1582
      for idx, disk in enumerate(instance.disks):
1583
        per_node_disks[pnode].append((instance, idx, disk))
1584

    
1585
    changed = []
1586
    for node, dskl in per_node_disks.items():
1587
      result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1588
      if result.failed:
1589
        self.LogWarning("Failure in blockdev_getsizes call to node"
1590
                        " %s, ignoring", node)
1591
        continue
1592
      if len(result.data) != len(dskl):
1593
        self.LogWarning("Invalid result from node %s, ignoring node results",
1594
                        node)
1595
        continue
1596
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1597
        if size is None:
1598
          self.LogWarning("Disk %d of instance %s did not return size"
1599
                          " information, ignoring", idx, instance.name)
1600
          continue
1601
        if not isinstance(size, (int, long)):
1602
          self.LogWarning("Disk %d of instance %s did not return valid"
1603
                          " size information, ignoring", idx, instance.name)
1604
          continue
1605
        size = size >> 20
1606
        if size != disk.size:
1607
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1608
                       " correcting: recorded %d, actual %d", idx,
1609
                       instance.name, disk.size, size)
1610
          disk.size = size
1611
          self.cfg.Update(instance)
1612
          changed.append((instance.name, idx, size))
1613
    return changed
1614

    
1615

    
1616
class LURenameCluster(LogicalUnit):
1617
  """Rename the cluster.
1618

1619
  """
1620
  HPATH = "cluster-rename"
1621
  HTYPE = constants.HTYPE_CLUSTER
1622
  _OP_REQP = ["name"]
1623

    
1624
  def BuildHooksEnv(self):
1625
    """Build hooks env.
1626

1627
    """
1628
    env = {
1629
      "OP_TARGET": self.cfg.GetClusterName(),
1630
      "NEW_NAME": self.op.name,
1631
      }
1632
    mn = self.cfg.GetMasterNode()
1633
    return env, [mn], [mn]
1634

    
1635
  def CheckPrereq(self):
1636
    """Verify that the passed name is a valid one.
1637

1638
    """
1639
    hostname = utils.HostInfo(self.op.name)
1640

    
1641
    new_name = hostname.name
1642
    self.ip = new_ip = hostname.ip
1643
    old_name = self.cfg.GetClusterName()
1644
    old_ip = self.cfg.GetMasterIP()
1645
    if new_name == old_name and new_ip == old_ip:
1646
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1647
                                 " cluster has changed")
1648
    if new_ip != old_ip:
1649
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1650
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1651
                                   " reachable on the network. Aborting." %
1652
                                   new_ip)
1653

    
1654
    self.op.name = new_name
1655

    
1656
  def Exec(self, feedback_fn):
1657
    """Rename the cluster.
1658

1659
    """
1660
    clustername = self.op.name
1661
    ip = self.ip
1662

    
1663
    # shutdown the master IP
1664
    master = self.cfg.GetMasterNode()
1665
    result = self.rpc.call_node_stop_master(master, False)
1666
    result.Raise("Could not disable the master role")
1667

    
1668
    try:
1669
      cluster = self.cfg.GetClusterInfo()
1670
      cluster.cluster_name = clustername
1671
      cluster.master_ip = ip
1672
      self.cfg.Update(cluster)
1673

    
1674
      # update the known hosts file
1675
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1676
      node_list = self.cfg.GetNodeList()
1677
      try:
1678
        node_list.remove(master)
1679
      except ValueError:
1680
        pass
1681
      result = self.rpc.call_upload_file(node_list,
1682
                                         constants.SSH_KNOWN_HOSTS_FILE)
1683
      for to_node, to_result in result.iteritems():
1684
        msg = to_result.fail_msg
1685
        if msg:
1686
          msg = ("Copy of file %s to node %s failed: %s" %
1687
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1688
          self.proc.LogWarning(msg)
1689

    
1690
    finally:
1691
      result = self.rpc.call_node_start_master(master, False, False)
1692
      msg = result.fail_msg
1693
      if msg:
1694
        self.LogWarning("Could not re-enable the master role on"
1695
                        " the master, please restart manually: %s", msg)
1696

    
1697

    
1698
def _RecursiveCheckIfLVMBased(disk):
1699
  """Check if the given disk or its children are lvm-based.
1700

1701
  @type disk: L{objects.Disk}
1702
  @param disk: the disk to check
1703
  @rtype: boolean
1704
  @return: boolean indicating whether a LD_LV dev_type was found or not
1705

1706
  """
1707
  if disk.children:
1708
    for chdisk in disk.children:
1709
      if _RecursiveCheckIfLVMBased(chdisk):
1710
        return True
1711
  return disk.dev_type == constants.LD_LV
1712

    
1713

    
1714
class LUSetClusterParams(LogicalUnit):
1715
  """Change the parameters of the cluster.
1716

1717
  """
1718
  HPATH = "cluster-modify"
1719
  HTYPE = constants.HTYPE_CLUSTER
1720
  _OP_REQP = []
1721
  REQ_BGL = False
1722

    
1723
  def CheckArguments(self):
1724
    """Check parameters
1725

1726
    """
1727
    if not hasattr(self.op, "candidate_pool_size"):
1728
      self.op.candidate_pool_size = None
1729
    if self.op.candidate_pool_size is not None:
1730
      try:
1731
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1732
      except (ValueError, TypeError), err:
1733
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1734
                                   str(err))
1735
      if self.op.candidate_pool_size < 1:
1736
        raise errors.OpPrereqError("At least one master candidate needed")
1737

    
1738
  def ExpandNames(self):
1739
    # FIXME: in the future maybe other cluster params won't require checking on
1740
    # all nodes to be modified.
1741
    self.needed_locks = {
1742
      locking.LEVEL_NODE: locking.ALL_SET,
1743
    }
1744
    self.share_locks[locking.LEVEL_NODE] = 1
1745

    
1746
  def BuildHooksEnv(self):
1747
    """Build hooks env.
1748

1749
    """
1750
    env = {
1751
      "OP_TARGET": self.cfg.GetClusterName(),
1752
      "NEW_VG_NAME": self.op.vg_name,
1753
      }
1754
    mn = self.cfg.GetMasterNode()
1755
    return env, [mn], [mn]
1756

    
1757
  def CheckPrereq(self):
1758
    """Check prerequisites.
1759

1760
    This checks whether the given params don't conflict and
1761
    if the given volume group is valid.
1762

1763
    """
1764
    if self.op.vg_name is not None and not self.op.vg_name:
1765
      instances = self.cfg.GetAllInstancesInfo().values()
1766
      for inst in instances:
1767
        for disk in inst.disks:
1768
          if _RecursiveCheckIfLVMBased(disk):
1769
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1770
                                       " lvm-based instances exist")
1771

    
1772
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1773

    
1774
    # if vg_name not None, checks given volume group on all nodes
1775
    if self.op.vg_name:
1776
      vglist = self.rpc.call_vg_list(node_list)
1777
      for node in node_list:
1778
        msg = vglist[node].fail_msg
1779
        if msg:
1780
          # ignoring down node
1781
          self.LogWarning("Error while gathering data on node %s"
1782
                          " (ignoring node): %s", node, msg)
1783
          continue
1784
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1785
                                              self.op.vg_name,
1786
                                              constants.MIN_VG_SIZE)
1787
        if vgstatus:
1788
          raise errors.OpPrereqError("Error on node '%s': %s" %
1789
                                     (node, vgstatus))
1790

    
1791
    self.cluster = cluster = self.cfg.GetClusterInfo()
1792
    # validate params changes
1793
    if self.op.beparams:
1794
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1795
      self.new_beparams = objects.FillDict(
1796
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1797

    
1798
    if self.op.nicparams:
1799
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1800
      self.new_nicparams = objects.FillDict(
1801
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1802
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1803

    
1804
    # hypervisor list/parameters
1805
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1806
    if self.op.hvparams:
1807
      if not isinstance(self.op.hvparams, dict):
1808
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1809
      for hv_name, hv_dict in self.op.hvparams.items():
1810
        if hv_name not in self.new_hvparams:
1811
          self.new_hvparams[hv_name] = hv_dict
1812
        else:
1813
          self.new_hvparams[hv_name].update(hv_dict)
1814

    
1815
    if self.op.enabled_hypervisors is not None:
1816
      self.hv_list = self.op.enabled_hypervisors
1817
      if not self.hv_list:
1818
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1819
                                   " least one member")
1820
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1821
      if invalid_hvs:
1822
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1823
                                   " entries: %s" % invalid_hvs)
1824
    else:
1825
      self.hv_list = cluster.enabled_hypervisors
1826

    
1827
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1828
      # either the enabled list has changed, or the parameters have, validate
1829
      for hv_name, hv_params in self.new_hvparams.items():
1830
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1831
            (self.op.enabled_hypervisors and
1832
             hv_name in self.op.enabled_hypervisors)):
1833
          # either this is a new hypervisor, or its parameters have changed
1834
          hv_class = hypervisor.GetHypervisor(hv_name)
1835
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1836
          hv_class.CheckParameterSyntax(hv_params)
1837
          _CheckHVParams(self, node_list, hv_name, hv_params)
1838

    
1839
  def Exec(self, feedback_fn):
1840
    """Change the parameters of the cluster.
1841

1842
    """
1843
    if self.op.vg_name is not None:
1844
      new_volume = self.op.vg_name
1845
      if not new_volume:
1846
        new_volume = None
1847
      if new_volume != self.cfg.GetVGName():
1848
        self.cfg.SetVGName(new_volume)
1849
      else:
1850
        feedback_fn("Cluster LVM configuration already in desired"
1851
                    " state, not changing")
1852
    if self.op.hvparams:
1853
      self.cluster.hvparams = self.new_hvparams
1854
    if self.op.enabled_hypervisors is not None:
1855
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1856
    if self.op.beparams:
1857
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1858
    if self.op.nicparams:
1859
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1860

    
1861
    if self.op.candidate_pool_size is not None:
1862
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1863
      # we need to update the pool size here, otherwise the save will fail
1864
      _AdjustCandidatePool(self)
1865

    
1866
    self.cfg.Update(self.cluster)
1867

    
1868

    
1869
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1870
  """Distribute additional files which are part of the cluster configuration.
1871

1872
  ConfigWriter takes care of distributing the config and ssconf files, but
1873
  there are more files which should be distributed to all nodes. This function
1874
  makes sure those are copied.
1875

1876
  @param lu: calling logical unit
1877
  @param additional_nodes: list of nodes not in the config to distribute to
1878

1879
  """
1880
  # 1. Gather target nodes
1881
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1882
  dist_nodes = lu.cfg.GetNodeList()
1883
  if additional_nodes is not None:
1884
    dist_nodes.extend(additional_nodes)
1885
  if myself.name in dist_nodes:
1886
    dist_nodes.remove(myself.name)
1887
  # 2. Gather files to distribute
1888
  dist_files = set([constants.ETC_HOSTS,
1889
                    constants.SSH_KNOWN_HOSTS_FILE,
1890
                    constants.RAPI_CERT_FILE,
1891
                    constants.RAPI_USERS_FILE,
1892
                    constants.HMAC_CLUSTER_KEY,
1893
                   ])
1894

    
1895
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1896
  for hv_name in enabled_hypervisors:
1897
    hv_class = hypervisor.GetHypervisor(hv_name)
1898
    dist_files.update(hv_class.GetAncillaryFiles())
1899

    
1900
  # 3. Perform the files upload
1901
  for fname in dist_files:
1902
    if os.path.exists(fname):
1903
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1904
      for to_node, to_result in result.items():
1905
        msg = to_result.fail_msg
1906
        if msg:
1907
          msg = ("Copy of file %s to node %s failed: %s" %
1908
                 (fname, to_node, msg))
1909
          lu.proc.LogWarning(msg)
1910

    
1911

    
1912
class LURedistributeConfig(NoHooksLU):
1913
  """Force the redistribution of cluster configuration.
1914

1915
  This is a very simple LU.
1916

1917
  """
1918
  _OP_REQP = []
1919
  REQ_BGL = False
1920

    
1921
  def ExpandNames(self):
1922
    self.needed_locks = {
1923
      locking.LEVEL_NODE: locking.ALL_SET,
1924
    }
1925
    self.share_locks[locking.LEVEL_NODE] = 1
1926

    
1927
  def CheckPrereq(self):
1928
    """Check prerequisites.
1929

1930
    """
1931

    
1932
  def Exec(self, feedback_fn):
1933
    """Redistribute the configuration.
1934

1935
    """
1936
    self.cfg.Update(self.cfg.GetClusterInfo())
1937
    _RedistributeAncillaryFiles(self)
1938

    
1939

    
1940
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1941
  """Sleep and poll for an instance's disk to sync.
1942

1943
  """
1944
  if not instance.disks:
1945
    return True
1946

    
1947
  if not oneshot:
1948
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1949

    
1950
  node = instance.primary_node
1951

    
1952
  for dev in instance.disks:
1953
    lu.cfg.SetDiskID(dev, node)
1954

    
1955
  retries = 0
1956
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1957
  while True:
1958
    max_time = 0
1959
    done = True
1960
    cumul_degraded = False
1961
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1962
    msg = rstats.fail_msg
1963
    if msg:
1964
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1965
      retries += 1
1966
      if retries >= 10:
1967
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1968
                                 " aborting." % node)
1969
      time.sleep(6)
1970
      continue
1971
    rstats = rstats.payload
1972
    retries = 0
1973
    for i, mstat in enumerate(rstats):
1974
      if mstat is None:
1975
        lu.LogWarning("Can't compute data for node %s/%s",
1976
                           node, instance.disks[i].iv_name)
1977
        continue
1978

    
1979
      cumul_degraded = (cumul_degraded or
1980
                        (mstat.is_degraded and mstat.sync_percent is None))
1981
      if mstat.sync_percent is not None:
1982
        done = False
1983
        if mstat.estimated_time is not None:
1984
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
1985
          max_time = mstat.estimated_time
1986
        else:
1987
          rem_time = "no time estimate"
1988
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1989
                        (instance.disks[i].iv_name, mstat.sync_percent, rem_time))
1990

    
1991
    # if we're done but degraded, let's do a few small retries, to
1992
    # make sure we see a stable and not transient situation; therefore
1993
    # we force restart of the loop
1994
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1995
      logging.info("Degraded disks found, %d retries left", degr_retries)
1996
      degr_retries -= 1
1997
      time.sleep(1)
1998
      continue
1999

    
2000
    if done or oneshot:
2001
      break
2002

    
2003
    time.sleep(min(60, max_time))
2004

    
2005
  if done:
2006
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2007
  return not cumul_degraded
2008

    
2009

    
2010
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2011
  """Check that mirrors are not degraded.
2012

2013
  The ldisk parameter, if True, will change the test from the
2014
  is_degraded attribute (which represents overall non-ok status for
2015
  the device(s)) to the ldisk (representing the local storage status).
2016

2017
  """
2018
  lu.cfg.SetDiskID(dev, node)
2019

    
2020
  result = True
2021

    
2022
  if on_primary or dev.AssembleOnSecondary():
2023
    rstats = lu.rpc.call_blockdev_find(node, dev)
2024
    msg = rstats.fail_msg
2025
    if msg:
2026
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2027
      result = False
2028
    elif not rstats.payload:
2029
      lu.LogWarning("Can't find disk on node %s", node)
2030
      result = False
2031
    else:
2032
      if ldisk:
2033
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2034
      else:
2035
        result = result and not rstats.payload.is_degraded
2036

    
2037
  if dev.children:
2038
    for child in dev.children:
2039
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2040

    
2041
  return result
2042

    
2043

    
2044
class LUDiagnoseOS(NoHooksLU):
2045
  """Logical unit for OS diagnose/query.
2046

2047
  """
2048
  _OP_REQP = ["output_fields", "names"]
2049
  REQ_BGL = False
2050
  _FIELDS_STATIC = utils.FieldSet()
2051
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
2052

    
2053
  def ExpandNames(self):
2054
    if self.op.names:
2055
      raise errors.OpPrereqError("Selective OS query not supported")
2056

    
2057
    _CheckOutputFields(static=self._FIELDS_STATIC,
2058
                       dynamic=self._FIELDS_DYNAMIC,
2059
                       selected=self.op.output_fields)
2060

    
2061
    # Lock all nodes, in shared mode
2062
    # Temporary removal of locks, should be reverted later
2063
    # TODO: reintroduce locks when they are lighter-weight
2064
    self.needed_locks = {}
2065
    #self.share_locks[locking.LEVEL_NODE] = 1
2066
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2067

    
2068
  def CheckPrereq(self):
2069
    """Check prerequisites.
2070

2071
    """
2072

    
2073
  @staticmethod
2074
  def _DiagnoseByOS(node_list, rlist):
2075
    """Remaps a per-node return list into an a per-os per-node dictionary
2076

2077
    @param node_list: a list with the names of all nodes
2078
    @param rlist: a map with node names as keys and OS objects as values
2079

2080
    @rtype: dict
2081
    @return: a dictionary with osnames as keys and as value another map, with
2082
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2083

2084
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2085
                                     (/srv/..., False, "invalid api")],
2086
                           "node2": [(/srv/..., True, "")]}
2087
          }
2088

2089
    """
2090
    all_os = {}
2091
    # we build here the list of nodes that didn't fail the RPC (at RPC
2092
    # level), so that nodes with a non-responding node daemon don't
2093
    # make all OSes invalid
2094
    good_nodes = [node_name for node_name in rlist
2095
                  if not rlist[node_name].fail_msg]
2096
    for node_name, nr in rlist.items():
2097
      if nr.fail_msg or not nr.payload:
2098
        continue
2099
      for name, path, status, diagnose in nr.payload:
2100
        if name not in all_os:
2101
          # build a list of nodes for this os containing empty lists
2102
          # for each node in node_list
2103
          all_os[name] = {}
2104
          for nname in good_nodes:
2105
            all_os[name][nname] = []
2106
        all_os[name][node_name].append((path, status, diagnose))
2107
    return all_os
2108

    
2109
  def Exec(self, feedback_fn):
2110
    """Compute the list of OSes.
2111

2112
    """
2113
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2114
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2115
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2116
    output = []
2117
    for os_name, os_data in pol.items():
2118
      row = []
2119
      for field in self.op.output_fields:
2120
        if field == "name":
2121
          val = os_name
2122
        elif field == "valid":
2123
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
2124
        elif field == "node_status":
2125
          # this is just a copy of the dict
2126
          val = {}
2127
          for node_name, nos_list in os_data.items():
2128
            val[node_name] = nos_list
2129
        else:
2130
          raise errors.ParameterError(field)
2131
        row.append(val)
2132
      output.append(row)
2133

    
2134
    return output
2135

    
2136

    
2137
class LURemoveNode(LogicalUnit):
2138
  """Logical unit for removing a node.
2139

2140
  """
2141
  HPATH = "node-remove"
2142
  HTYPE = constants.HTYPE_NODE
2143
  _OP_REQP = ["node_name"]
2144

    
2145
  def BuildHooksEnv(self):
2146
    """Build hooks env.
2147

2148
    This doesn't run on the target node in the pre phase as a failed
2149
    node would then be impossible to remove.
2150

2151
    """
2152
    env = {
2153
      "OP_TARGET": self.op.node_name,
2154
      "NODE_NAME": self.op.node_name,
2155
      }
2156
    all_nodes = self.cfg.GetNodeList()
2157
    all_nodes.remove(self.op.node_name)
2158
    return env, all_nodes, all_nodes
2159

    
2160
  def CheckPrereq(self):
2161
    """Check prerequisites.
2162

2163
    This checks:
2164
     - the node exists in the configuration
2165
     - it does not have primary or secondary instances
2166
     - it's not the master
2167

2168
    Any errors are signaled by raising errors.OpPrereqError.
2169

2170
    """
2171
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2172
    if node is None:
2173
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2174

    
2175
    instance_list = self.cfg.GetInstanceList()
2176

    
2177
    masternode = self.cfg.GetMasterNode()
2178
    if node.name == masternode:
2179
      raise errors.OpPrereqError("Node is the master node,"
2180
                                 " you need to failover first.")
2181

    
2182
    for instance_name in instance_list:
2183
      instance = self.cfg.GetInstanceInfo(instance_name)
2184
      if node.name in instance.all_nodes:
2185
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2186
                                   " please remove first." % instance_name)
2187
    self.op.node_name = node.name
2188
    self.node = node
2189

    
2190
  def Exec(self, feedback_fn):
2191
    """Removes the node from the cluster.
2192

2193
    """
2194
    node = self.node
2195
    logging.info("Stopping the node daemon and removing configs from node %s",
2196
                 node.name)
2197

    
2198
    self.context.RemoveNode(node.name)
2199

    
2200
    result = self.rpc.call_node_leave_cluster(node.name)
2201
    msg = result.fail_msg
2202
    if msg:
2203
      self.LogWarning("Errors encountered on the remote node while leaving"
2204
                      " the cluster: %s", msg)
2205

    
2206
    # Promote nodes to master candidate as needed
2207
    _AdjustCandidatePool(self)
2208

    
2209

    
2210
class LUQueryNodes(NoHooksLU):
2211
  """Logical unit for querying nodes.
2212

2213
  """
2214
  _OP_REQP = ["output_fields", "names", "use_locking"]
2215
  REQ_BGL = False
2216
  _FIELDS_DYNAMIC = utils.FieldSet(
2217
    "dtotal", "dfree",
2218
    "mtotal", "mnode", "mfree",
2219
    "bootid",
2220
    "ctotal", "cnodes", "csockets",
2221
    )
2222

    
2223
  _FIELDS_STATIC = utils.FieldSet(
2224
    "name", "pinst_cnt", "sinst_cnt",
2225
    "pinst_list", "sinst_list",
2226
    "pip", "sip", "tags",
2227
    "serial_no", "ctime", "mtime",
2228
    "master_candidate",
2229
    "master",
2230
    "offline",
2231
    "drained",
2232
    "role",
2233
    )
2234

    
2235
  def ExpandNames(self):
2236
    _CheckOutputFields(static=self._FIELDS_STATIC,
2237
                       dynamic=self._FIELDS_DYNAMIC,
2238
                       selected=self.op.output_fields)
2239

    
2240
    self.needed_locks = {}
2241
    self.share_locks[locking.LEVEL_NODE] = 1
2242

    
2243
    if self.op.names:
2244
      self.wanted = _GetWantedNodes(self, self.op.names)
2245
    else:
2246
      self.wanted = locking.ALL_SET
2247

    
2248
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2249
    self.do_locking = self.do_node_query and self.op.use_locking
2250
    if self.do_locking:
2251
      # if we don't request only static fields, we need to lock the nodes
2252
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2253

    
2254

    
2255
  def CheckPrereq(self):
2256
    """Check prerequisites.
2257

2258
    """
2259
    # The validation of the node list is done in the _GetWantedNodes,
2260
    # if non empty, and if empty, there's no validation to do
2261
    pass
2262

    
2263
  def Exec(self, feedback_fn):
2264
    """Computes the list of nodes and their attributes.
2265

2266
    """
2267
    all_info = self.cfg.GetAllNodesInfo()
2268
    if self.do_locking:
2269
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2270
    elif self.wanted != locking.ALL_SET:
2271
      nodenames = self.wanted
2272
      missing = set(nodenames).difference(all_info.keys())
2273
      if missing:
2274
        raise errors.OpExecError(
2275
          "Some nodes were removed before retrieving their data: %s" % missing)
2276
    else:
2277
      nodenames = all_info.keys()
2278

    
2279
    nodenames = utils.NiceSort(nodenames)
2280
    nodelist = [all_info[name] for name in nodenames]
2281

    
2282
    # begin data gathering
2283

    
2284
    if self.do_node_query:
2285
      live_data = {}
2286
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2287
                                          self.cfg.GetHypervisorType())
2288
      for name in nodenames:
2289
        nodeinfo = node_data[name]
2290
        if not nodeinfo.fail_msg and nodeinfo.payload:
2291
          nodeinfo = nodeinfo.payload
2292
          fn = utils.TryConvert
2293
          live_data[name] = {
2294
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2295
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2296
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2297
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2298
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2299
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2300
            "bootid": nodeinfo.get('bootid', None),
2301
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2302
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2303
            }
2304
        else:
2305
          live_data[name] = {}
2306
    else:
2307
      live_data = dict.fromkeys(nodenames, {})
2308

    
2309
    node_to_primary = dict([(name, set()) for name in nodenames])
2310
    node_to_secondary = dict([(name, set()) for name in nodenames])
2311

    
2312
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2313
                             "sinst_cnt", "sinst_list"))
2314
    if inst_fields & frozenset(self.op.output_fields):
2315
      instancelist = self.cfg.GetInstanceList()
2316

    
2317
      for instance_name in instancelist:
2318
        inst = self.cfg.GetInstanceInfo(instance_name)
2319
        if inst.primary_node in node_to_primary:
2320
          node_to_primary[inst.primary_node].add(inst.name)
2321
        for secnode in inst.secondary_nodes:
2322
          if secnode in node_to_secondary:
2323
            node_to_secondary[secnode].add(inst.name)
2324

    
2325
    master_node = self.cfg.GetMasterNode()
2326

    
2327
    # end data gathering
2328

    
2329
    output = []
2330
    for node in nodelist:
2331
      node_output = []
2332
      for field in self.op.output_fields:
2333
        if field == "name":
2334
          val = node.name
2335
        elif field == "pinst_list":
2336
          val = list(node_to_primary[node.name])
2337
        elif field == "sinst_list":
2338
          val = list(node_to_secondary[node.name])
2339
        elif field == "pinst_cnt":
2340
          val = len(node_to_primary[node.name])
2341
        elif field == "sinst_cnt":
2342
          val = len(node_to_secondary[node.name])
2343
        elif field == "pip":
2344
          val = node.primary_ip
2345
        elif field == "sip":
2346
          val = node.secondary_ip
2347
        elif field == "tags":
2348
          val = list(node.GetTags())
2349
        elif field == "serial_no":
2350
          val = node.serial_no
2351
        elif field == "ctime":
2352
          val = node.ctime
2353
        elif field == "mtime":
2354
          val = node.mtime
2355
        elif field == "master_candidate":
2356
          val = node.master_candidate
2357
        elif field == "master":
2358
          val = node.name == master_node
2359
        elif field == "offline":
2360
          val = node.offline
2361
        elif field == "drained":
2362
          val = node.drained
2363
        elif self._FIELDS_DYNAMIC.Matches(field):
2364
          val = live_data[node.name].get(field, None)
2365
        elif field == "role":
2366
          if node.name == master_node:
2367
            val = "M"
2368
          elif node.master_candidate:
2369
            val = "C"
2370
          elif node.drained:
2371
            val = "D"
2372
          elif node.offline:
2373
            val = "O"
2374
          else:
2375
            val = "R"
2376
        else:
2377
          raise errors.ParameterError(field)
2378
        node_output.append(val)
2379
      output.append(node_output)
2380

    
2381
    return output
2382

    
2383

    
2384
class LUQueryNodeVolumes(NoHooksLU):
2385
  """Logical unit for getting volumes on node(s).
2386

2387
  """
2388
  _OP_REQP = ["nodes", "output_fields"]
2389
  REQ_BGL = False
2390
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2391
  _FIELDS_STATIC = utils.FieldSet("node")
2392

    
2393
  def ExpandNames(self):
2394
    _CheckOutputFields(static=self._FIELDS_STATIC,
2395
                       dynamic=self._FIELDS_DYNAMIC,
2396
                       selected=self.op.output_fields)
2397

    
2398
    self.needed_locks = {}
2399
    self.share_locks[locking.LEVEL_NODE] = 1
2400
    if not self.op.nodes:
2401
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2402
    else:
2403
      self.needed_locks[locking.LEVEL_NODE] = \
2404
        _GetWantedNodes(self, self.op.nodes)
2405

    
2406
  def CheckPrereq(self):
2407
    """Check prerequisites.
2408

2409
    This checks that the fields required are valid output fields.
2410

2411
    """
2412
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2413

    
2414
  def Exec(self, feedback_fn):
2415
    """Computes the list of nodes and their attributes.
2416

2417
    """
2418
    nodenames = self.nodes
2419
    volumes = self.rpc.call_node_volumes(nodenames)
2420

    
2421
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2422
             in self.cfg.GetInstanceList()]
2423

    
2424
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2425

    
2426
    output = []
2427
    for node in nodenames:
2428
      nresult = volumes[node]
2429
      if nresult.offline:
2430
        continue
2431
      msg = nresult.fail_msg
2432
      if msg:
2433
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2434
        continue
2435

    
2436
      node_vols = nresult.payload[:]
2437
      node_vols.sort(key=lambda vol: vol['dev'])
2438

    
2439
      for vol in node_vols:
2440
        node_output = []
2441
        for field in self.op.output_fields:
2442
          if field == "node":
2443
            val = node
2444
          elif field == "phys":
2445
            val = vol['dev']
2446
          elif field == "vg":
2447
            val = vol['vg']
2448
          elif field == "name":
2449
            val = vol['name']
2450
          elif field == "size":
2451
            val = int(float(vol['size']))
2452
          elif field == "instance":
2453
            for inst in ilist:
2454
              if node not in lv_by_node[inst]:
2455
                continue
2456
              if vol['name'] in lv_by_node[inst][node]:
2457
                val = inst.name
2458
                break
2459
            else:
2460
              val = '-'
2461
          else:
2462
            raise errors.ParameterError(field)
2463
          node_output.append(str(val))
2464

    
2465
        output.append(node_output)
2466

    
2467
    return output
2468

    
2469

    
2470
class LUQueryNodeStorage(NoHooksLU):
2471
  """Logical unit for getting information on storage units on node(s).
2472

2473
  """
2474
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2475
  REQ_BGL = False
2476
  _FIELDS_STATIC = utils.FieldSet("node")
2477

    
2478
  def ExpandNames(self):
2479
    storage_type = self.op.storage_type
2480

    
2481
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2482
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2483

    
2484
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2485

    
2486
    _CheckOutputFields(static=self._FIELDS_STATIC,
2487
                       dynamic=utils.FieldSet(*dynamic_fields),
2488
                       selected=self.op.output_fields)
2489

    
2490
    self.needed_locks = {}
2491
    self.share_locks[locking.LEVEL_NODE] = 1
2492

    
2493
    if self.op.nodes:
2494
      self.needed_locks[locking.LEVEL_NODE] = \
2495
        _GetWantedNodes(self, self.op.nodes)
2496
    else:
2497
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2498

    
2499
  def CheckPrereq(self):
2500
    """Check prerequisites.
2501

2502
    This checks that the fields required are valid output fields.
2503

2504
    """
2505
    self.op.name = getattr(self.op, "name", None)
2506

    
2507
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2508

    
2509
  def Exec(self, feedback_fn):
2510
    """Computes the list of nodes and their attributes.
2511

2512
    """
2513
    # Always get name to sort by
2514
    if constants.SF_NAME in self.op.output_fields:
2515
      fields = self.op.output_fields[:]
2516
    else:
2517
      fields = [constants.SF_NAME] + self.op.output_fields
2518

    
2519
    # Never ask for node as it's only known to the LU
2520
    while "node" in fields:
2521
      fields.remove("node")
2522

    
2523
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2524
    name_idx = field_idx[constants.SF_NAME]
2525

    
2526
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2527
    data = self.rpc.call_storage_list(self.nodes,
2528
                                      self.op.storage_type, st_args,
2529
                                      self.op.name, fields)
2530

    
2531
    result = []
2532

    
2533
    for node in utils.NiceSort(self.nodes):
2534
      nresult = data[node]
2535
      if nresult.offline:
2536
        continue
2537

    
2538
      msg = nresult.fail_msg
2539
      if msg:
2540
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2541
        continue
2542

    
2543
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2544

    
2545
      for name in utils.NiceSort(rows.keys()):
2546
        row = rows[name]
2547

    
2548
        out = []
2549

    
2550
        for field in self.op.output_fields:
2551
          if field == "node":
2552
            val = node
2553
          elif field in field_idx:
2554
            val = row[field_idx[field]]
2555
          else:
2556
            raise errors.ParameterError(field)
2557

    
2558
          out.append(val)
2559

    
2560
        result.append(out)
2561

    
2562
    return result
2563

    
2564

    
2565
class LUModifyNodeStorage(NoHooksLU):
2566
  """Logical unit for modifying a storage volume on a node.
2567

2568
  """
2569
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2570
  REQ_BGL = False
2571

    
2572
  def CheckArguments(self):
2573
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2574
    if node_name is None:
2575
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2576

    
2577
    self.op.node_name = node_name
2578

    
2579
    storage_type = self.op.storage_type
2580
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2581
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2582

    
2583
  def ExpandNames(self):
2584
    self.needed_locks = {
2585
      locking.LEVEL_NODE: self.op.node_name,
2586
      }
2587

    
2588
  def CheckPrereq(self):
2589
    """Check prerequisites.
2590

2591
    """
2592
    storage_type = self.op.storage_type
2593

    
2594
    try:
2595
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2596
    except KeyError:
2597
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2598
                                 " modified" % storage_type)
2599

    
2600
    diff = set(self.op.changes.keys()) - modifiable
2601
    if diff:
2602
      raise errors.OpPrereqError("The following fields can not be modified for"
2603
                                 " storage units of type '%s': %r" %
2604
                                 (storage_type, list(diff)))
2605

    
2606
  def Exec(self, feedback_fn):
2607
    """Computes the list of nodes and their attributes.
2608

2609
    """
2610
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2611
    result = self.rpc.call_storage_modify(self.op.node_name,
2612
                                          self.op.storage_type, st_args,
2613
                                          self.op.name, self.op.changes)
2614
    result.Raise("Failed to modify storage unit '%s' on %s" %
2615
                 (self.op.name, self.op.node_name))
2616

    
2617

    
2618
class LUAddNode(LogicalUnit):
2619
  """Logical unit for adding node to the cluster.
2620

2621
  """
2622
  HPATH = "node-add"
2623
  HTYPE = constants.HTYPE_NODE
2624
  _OP_REQP = ["node_name"]
2625

    
2626
  def BuildHooksEnv(self):
2627
    """Build hooks env.
2628

2629
    This will run on all nodes before, and on all nodes + the new node after.
2630

2631
    """
2632
    env = {
2633
      "OP_TARGET": self.op.node_name,
2634
      "NODE_NAME": self.op.node_name,
2635
      "NODE_PIP": self.op.primary_ip,
2636
      "NODE_SIP": self.op.secondary_ip,
2637
      }
2638
    nodes_0 = self.cfg.GetNodeList()
2639
    nodes_1 = nodes_0 + [self.op.node_name, ]
2640
    return env, nodes_0, nodes_1
2641

    
2642
  def CheckPrereq(self):
2643
    """Check prerequisites.
2644

2645
    This checks:
2646
     - the new node is not already in the config
2647
     - it is resolvable
2648
     - its parameters (single/dual homed) matches the cluster
2649

2650
    Any errors are signaled by raising errors.OpPrereqError.
2651

2652
    """
2653
    node_name = self.op.node_name
2654
    cfg = self.cfg
2655

    
2656
    dns_data = utils.HostInfo(node_name)
2657

    
2658
    node = dns_data.name
2659
    primary_ip = self.op.primary_ip = dns_data.ip
2660
    secondary_ip = getattr(self.op, "secondary_ip", None)
2661
    if secondary_ip is None:
2662
      secondary_ip = primary_ip
2663
    if not utils.IsValidIP(secondary_ip):
2664
      raise errors.OpPrereqError("Invalid secondary IP given")
2665
    self.op.secondary_ip = secondary_ip
2666

    
2667
    node_list = cfg.GetNodeList()
2668
    if not self.op.readd and node in node_list:
2669
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2670
                                 node)
2671
    elif self.op.readd and node not in node_list:
2672
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2673

    
2674
    for existing_node_name in node_list:
2675
      existing_node = cfg.GetNodeInfo(existing_node_name)
2676

    
2677
      if self.op.readd and node == existing_node_name:
2678
        if (existing_node.primary_ip != primary_ip or
2679
            existing_node.secondary_ip != secondary_ip):
2680
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2681
                                     " address configuration as before")
2682
        continue
2683

    
2684
      if (existing_node.primary_ip == primary_ip or
2685
          existing_node.secondary_ip == primary_ip or
2686
          existing_node.primary_ip == secondary_ip or
2687
          existing_node.secondary_ip == secondary_ip):
2688
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2689
                                   " existing node %s" % existing_node.name)
2690

    
2691
    # check that the type of the node (single versus dual homed) is the
2692
    # same as for the master
2693
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2694
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2695
    newbie_singlehomed = secondary_ip == primary_ip
2696
    if master_singlehomed != newbie_singlehomed:
2697
      if master_singlehomed:
2698
        raise errors.OpPrereqError("The master has no private ip but the"
2699
                                   " new node has one")
2700
      else:
2701
        raise errors.OpPrereqError("The master has a private ip but the"
2702
                                   " new node doesn't have one")
2703

    
2704
    # checks reachability
2705
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2706
      raise errors.OpPrereqError("Node not reachable by ping")
2707

    
2708
    if not newbie_singlehomed:
2709
      # check reachability from my secondary ip to newbie's secondary ip
2710
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2711
                           source=myself.secondary_ip):
2712
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2713
                                   " based ping to noded port")
2714

    
2715
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2716
    if self.op.readd:
2717
      exceptions = [node]
2718
    else:
2719
      exceptions = []
2720
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2721
    # the new node will increase mc_max with one, so:
2722
    mc_max = min(mc_max + 1, cp_size)
2723
    self.master_candidate = mc_now < mc_max
2724

    
2725
    if self.op.readd:
2726
      self.new_node = self.cfg.GetNodeInfo(node)
2727
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2728
    else:
2729
      self.new_node = objects.Node(name=node,
2730
                                   primary_ip=primary_ip,
2731
                                   secondary_ip=secondary_ip,
2732
                                   master_candidate=self.master_candidate,
2733
                                   offline=False, drained=False)
2734

    
2735
  def Exec(self, feedback_fn):
2736
    """Adds the new node to the cluster.
2737

2738
    """
2739
    new_node = self.new_node
2740
    node = new_node.name
2741

    
2742
    # for re-adds, reset the offline/drained/master-candidate flags;
2743
    # we need to reset here, otherwise offline would prevent RPC calls
2744
    # later in the procedure; this also means that if the re-add
2745
    # fails, we are left with a non-offlined, broken node
2746
    if self.op.readd:
2747
      new_node.drained = new_node.offline = False
2748
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2749
      # if we demote the node, we do cleanup later in the procedure
2750
      new_node.master_candidate = self.master_candidate
2751

    
2752
    # notify the user about any possible mc promotion
2753
    if new_node.master_candidate:
2754
      self.LogInfo("Node will be a master candidate")
2755

    
2756
    # check connectivity
2757
    result = self.rpc.call_version([node])[node]
2758
    result.Raise("Can't get version information from node %s" % node)
2759
    if constants.PROTOCOL_VERSION == result.payload:
2760
      logging.info("Communication to node %s fine, sw version %s match",
2761
                   node, result.payload)
2762
    else:
2763
      raise errors.OpExecError("Version mismatch master version %s,"
2764
                               " node version %s" %
2765
                               (constants.PROTOCOL_VERSION, result.payload))
2766

    
2767
    # setup ssh on node
2768
    logging.info("Copy ssh key to node %s", node)
2769
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2770
    keyarray = []
2771
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2772
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2773
                priv_key, pub_key]
2774

    
2775
    for i in keyfiles:
2776
      f = open(i, 'r')
2777
      try:
2778
        keyarray.append(f.read())
2779
      finally:
2780
        f.close()
2781

    
2782
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2783
                                    keyarray[2],
2784
                                    keyarray[3], keyarray[4], keyarray[5])
2785
    result.Raise("Cannot transfer ssh keys to the new node")
2786

    
2787
    # Add node to our /etc/hosts, and add key to known_hosts
2788
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2789
      utils.AddHostToEtcHosts(new_node.name)
2790

    
2791
    if new_node.secondary_ip != new_node.primary_ip:
2792
      result = self.rpc.call_node_has_ip_address(new_node.name,
2793
                                                 new_node.secondary_ip)
2794
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2795
                   prereq=True)
2796
      if not result.payload:
2797
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2798
                                 " you gave (%s). Please fix and re-run this"
2799
                                 " command." % new_node.secondary_ip)
2800

    
2801
    node_verify_list = [self.cfg.GetMasterNode()]
2802
    node_verify_param = {
2803
      'nodelist': [node],
2804
      # TODO: do a node-net-test as well?
2805
    }
2806

    
2807
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2808
                                       self.cfg.GetClusterName())
2809
    for verifier in node_verify_list:
2810
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2811
      nl_payload = result[verifier].payload['nodelist']
2812
      if nl_payload:
2813
        for failed in nl_payload:
2814
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2815
                      (verifier, nl_payload[failed]))
2816
        raise errors.OpExecError("ssh/hostname verification failed.")
2817

    
2818
    if self.op.readd:
2819
      _RedistributeAncillaryFiles(self)
2820
      self.context.ReaddNode(new_node)
2821
      # make sure we redistribute the config
2822
      self.cfg.Update(new_node)
2823
      # and make sure the new node will not have old files around
2824
      if not new_node.master_candidate:
2825
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2826
        msg = result.RemoteFailMsg()
2827
        if msg:
2828
          self.LogWarning("Node failed to demote itself from master"
2829
                          " candidate status: %s" % msg)
2830
    else:
2831
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2832
      self.context.AddNode(new_node)
2833

    
2834

    
2835
class LUSetNodeParams(LogicalUnit):
2836
  """Modifies the parameters of a node.
2837

2838
  """
2839
  HPATH = "node-modify"
2840
  HTYPE = constants.HTYPE_NODE
2841
  _OP_REQP = ["node_name"]
2842
  REQ_BGL = False
2843

    
2844
  def CheckArguments(self):
2845
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2846
    if node_name is None:
2847
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2848
    self.op.node_name = node_name
2849
    _CheckBooleanOpField(self.op, 'master_candidate')
2850
    _CheckBooleanOpField(self.op, 'offline')
2851
    _CheckBooleanOpField(self.op, 'drained')
2852
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2853
    if all_mods.count(None) == 3:
2854
      raise errors.OpPrereqError("Please pass at least one modification")
2855
    if all_mods.count(True) > 1:
2856
      raise errors.OpPrereqError("Can't set the node into more than one"
2857
                                 " state at the same time")
2858

    
2859
  def ExpandNames(self):
2860
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2861

    
2862
  def BuildHooksEnv(self):
2863
    """Build hooks env.
2864

2865
    This runs on the master node.
2866

2867
    """
2868
    env = {
2869
      "OP_TARGET": self.op.node_name,
2870
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2871
      "OFFLINE": str(self.op.offline),
2872
      "DRAINED": str(self.op.drained),
2873
      }
2874
    nl = [self.cfg.GetMasterNode(),
2875
          self.op.node_name]
2876
    return env, nl, nl
2877

    
2878
  def CheckPrereq(self):
2879
    """Check prerequisites.
2880

2881
    This only checks the instance list against the existing names.
2882

2883
    """
2884
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2885

    
2886
    if ((self.op.master_candidate == False or self.op.offline == True or
2887
         self.op.drained == True) and node.master_candidate):
2888
      # we will demote the node from master_candidate
2889
      if self.op.node_name == self.cfg.GetMasterNode():
2890
        raise errors.OpPrereqError("The master node has to be a"
2891
                                   " master candidate, online and not drained")
2892
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2893
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2894
      if num_candidates <= cp_size:
2895
        msg = ("Not enough master candidates (desired"
2896
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2897
        if self.op.force:
2898
          self.LogWarning(msg)
2899
        else:
2900
          raise errors.OpPrereqError(msg)
2901

    
2902
    if (self.op.master_candidate == True and
2903
        ((node.offline and not self.op.offline == False) or
2904
         (node.drained and not self.op.drained == False))):
2905
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2906
                                 " to master_candidate" % node.name)
2907

    
2908
    return
2909

    
2910
  def Exec(self, feedback_fn):
2911
    """Modifies a node.
2912

2913
    """
2914
    node = self.node
2915

    
2916
    result = []
2917
    changed_mc = False
2918

    
2919
    if self.op.offline is not None:
2920
      node.offline = self.op.offline
2921
      result.append(("offline", str(self.op.offline)))
2922
      if self.op.offline == True:
2923
        if node.master_candidate:
2924
          node.master_candidate = False
2925
          changed_mc = True
2926
          result.append(("master_candidate", "auto-demotion due to offline"))
2927
        if node.drained:
2928
          node.drained = False
2929
          result.append(("drained", "clear drained status due to offline"))
2930

    
2931
    if self.op.master_candidate is not None:
2932
      node.master_candidate = self.op.master_candidate
2933
      changed_mc = True
2934
      result.append(("master_candidate", str(self.op.master_candidate)))
2935
      if self.op.master_candidate == False:
2936
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2937
        msg = rrc.fail_msg
2938
        if msg:
2939
          self.LogWarning("Node failed to demote itself: %s" % msg)
2940

    
2941
    if self.op.drained is not None:
2942
      node.drained = self.op.drained
2943
      result.append(("drained", str(self.op.drained)))
2944
      if self.op.drained == True:
2945
        if node.master_candidate:
2946
          node.master_candidate = False
2947
          changed_mc = True
2948
          result.append(("master_candidate", "auto-demotion due to drain"))
2949
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2950
          msg = rrc.RemoteFailMsg()
2951
          if msg:
2952
            self.LogWarning("Node failed to demote itself: %s" % msg)
2953
        if node.offline:
2954
          node.offline = False
2955
          result.append(("offline", "clear offline status due to drain"))
2956

    
2957
    # this will trigger configuration file update, if needed
2958
    self.cfg.Update(node)
2959
    # this will trigger job queue propagation or cleanup
2960
    if changed_mc:
2961
      self.context.ReaddNode(node)
2962

    
2963
    return result
2964

    
2965

    
2966
class LUPowercycleNode(NoHooksLU):
2967
  """Powercycles a node.
2968

2969
  """
2970
  _OP_REQP = ["node_name", "force"]
2971
  REQ_BGL = False
2972

    
2973
  def CheckArguments(self):
2974
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2975
    if node_name is None:
2976
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2977
    self.op.node_name = node_name
2978
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
2979
      raise errors.OpPrereqError("The node is the master and the force"
2980
                                 " parameter was not set")
2981

    
2982
  def ExpandNames(self):
2983
    """Locking for PowercycleNode.
2984

2985
    This is a last-resort option and shouldn't block on other
2986
    jobs. Therefore, we grab no locks.
2987

2988
    """
2989
    self.needed_locks = {}
2990

    
2991
  def CheckPrereq(self):
2992
    """Check prerequisites.
2993

2994
    This LU has no prereqs.
2995

2996
    """
2997
    pass
2998

    
2999
  def Exec(self, feedback_fn):
3000
    """Reboots a node.
3001

3002
    """
3003
    result = self.rpc.call_node_powercycle(self.op.node_name,
3004
                                           self.cfg.GetHypervisorType())
3005
    result.Raise("Failed to schedule the reboot")
3006
    return result.payload
3007

    
3008

    
3009
class LUQueryClusterInfo(NoHooksLU):
3010
  """Query cluster configuration.
3011

3012
  """
3013
  _OP_REQP = []
3014
  REQ_BGL = False
3015

    
3016
  def ExpandNames(self):
3017
    self.needed_locks = {}
3018

    
3019
  def CheckPrereq(self):
3020
    """No prerequsites needed for this LU.
3021

3022
    """
3023
    pass
3024

    
3025
  def Exec(self, feedback_fn):
3026
    """Return cluster config.
3027

3028
    """
3029
    cluster = self.cfg.GetClusterInfo()
3030
    result = {
3031
      "software_version": constants.RELEASE_VERSION,
3032
      "protocol_version": constants.PROTOCOL_VERSION,
3033
      "config_version": constants.CONFIG_VERSION,
3034
      "os_api_version": max(constants.OS_API_VERSIONS),
3035
      "export_version": constants.EXPORT_VERSION,
3036
      "architecture": (platform.architecture()[0], platform.machine()),
3037
      "name": cluster.cluster_name,
3038
      "master": cluster.master_node,
3039
      "default_hypervisor": cluster.enabled_hypervisors[0],
3040
      "enabled_hypervisors": cluster.enabled_hypervisors,
3041
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3042
                        for hypervisor_name in cluster.enabled_hypervisors]),
3043
      "beparams": cluster.beparams,
3044
      "nicparams": cluster.nicparams,
3045
      "candidate_pool_size": cluster.candidate_pool_size,
3046
      "master_netdev": cluster.master_netdev,
3047
      "volume_group_name": cluster.volume_group_name,
3048
      "file_storage_dir": cluster.file_storage_dir,
3049
      "ctime": cluster.ctime,
3050
      "mtime": cluster.mtime,
3051
      }
3052

    
3053
    return result
3054

    
3055

    
3056
class LUQueryConfigValues(NoHooksLU):
3057
  """Return configuration values.
3058

3059
  """
3060
  _OP_REQP = []
3061
  REQ_BGL = False
3062
  _FIELDS_DYNAMIC = utils.FieldSet()
3063
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
3064

    
3065
  def ExpandNames(self):
3066
    self.needed_locks = {}
3067

    
3068
    _CheckOutputFields(static=self._FIELDS_STATIC,
3069
                       dynamic=self._FIELDS_DYNAMIC,
3070
                       selected=self.op.output_fields)
3071

    
3072
  def CheckPrereq(self):
3073
    """No prerequisites.
3074

3075
    """
3076
    pass
3077

    
3078
  def Exec(self, feedback_fn):
3079
    """Dump a representation of the cluster config to the standard output.
3080

3081
    """
3082
    values = []
3083
    for field in self.op.output_fields:
3084
      if field == "cluster_name":
3085
        entry = self.cfg.GetClusterName()
3086
      elif field == "master_node":
3087
        entry = self.cfg.GetMasterNode()
3088
      elif field == "drain_flag":
3089
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3090
      else:
3091
        raise errors.ParameterError(field)
3092
      values.append(entry)
3093
    return values
3094

    
3095

    
3096
class LUActivateInstanceDisks(NoHooksLU):
3097
  """Bring up an instance's disks.
3098

3099
  """
3100
  _OP_REQP = ["instance_name"]
3101
  REQ_BGL = False
3102

    
3103
  def ExpandNames(self):
3104
    self._ExpandAndLockInstance()
3105
    self.needed_locks[locking.LEVEL_NODE] = []
3106
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3107

    
3108
  def DeclareLocks(self, level):
3109
    if level == locking.LEVEL_NODE:
3110
      self._LockInstancesNodes()
3111

    
3112
  def CheckPrereq(self):
3113
    """Check prerequisites.
3114

3115
    This checks that the instance is in the cluster.
3116

3117
    """
3118
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3119
    assert self.instance is not None, \
3120
      "Cannot retrieve locked instance %s" % self.op.instance_name
3121
    _CheckNodeOnline(self, self.instance.primary_node)
3122
    if not hasattr(self.op, "ignore_size"):
3123
      self.op.ignore_size = False
3124

    
3125
  def Exec(self, feedback_fn):
3126
    """Activate the disks.
3127

3128
    """
3129
    disks_ok, disks_info = \
3130
              _AssembleInstanceDisks(self, self.instance,
3131
                                     ignore_size=self.op.ignore_size)
3132
    if not disks_ok:
3133
      raise errors.OpExecError("Cannot activate block devices")
3134

    
3135
    return disks_info
3136

    
3137

    
3138
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3139
                           ignore_size=False):
3140
  """Prepare the block devices for an instance.
3141

3142
  This sets up the block devices on all nodes.
3143

3144
  @type lu: L{LogicalUnit}
3145
  @param lu: the logical unit on whose behalf we execute
3146
  @type instance: L{objects.Instance}
3147
  @param instance: the instance for whose disks we assemble
3148
  @type ignore_secondaries: boolean
3149
  @param ignore_secondaries: if true, errors on secondary nodes
3150
      won't result in an error return from the function
3151
  @type ignore_size: boolean
3152
  @param ignore_size: if true, the current known size of the disk
3153
      will not be used during the disk activation, useful for cases
3154
      when the size is wrong
3155
  @return: False if the operation failed, otherwise a list of
3156
      (host, instance_visible_name, node_visible_name)
3157
      with the mapping from node devices to instance devices
3158

3159
  """
3160
  device_info = []
3161
  disks_ok = True
3162
  iname = instance.name
3163
  # With the two passes mechanism we try to reduce the window of
3164
  # opportunity for the race condition of switching DRBD to primary
3165
  # before handshaking occured, but we do not eliminate it
3166

    
3167
  # The proper fix would be to wait (with some limits) until the
3168
  # connection has been made and drbd transitions from WFConnection
3169
  # into any other network-connected state (Connected, SyncTarget,
3170
  # SyncSource, etc.)
3171

    
3172
  # 1st pass, assemble on all nodes in secondary mode
3173
  for inst_disk in instance.disks:
3174
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3175
      if ignore_size:
3176
        node_disk = node_disk.Copy()
3177
        node_disk.UnsetSize()
3178
      lu.cfg.SetDiskID(node_disk, node)
3179
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3180
      msg = result.fail_msg
3181
      if msg:
3182
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3183
                           " (is_primary=False, pass=1): %s",
3184
                           inst_disk.iv_name, node, msg)
3185
        if not ignore_secondaries:
3186
          disks_ok = False
3187

    
3188
  # FIXME: race condition on drbd migration to primary
3189

    
3190
  # 2nd pass, do only the primary node
3191
  for inst_disk in instance.disks:
3192
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3193
      if node != instance.primary_node:
3194
        continue
3195
      if ignore_size:
3196
        node_disk = node_disk.Copy()
3197
        node_disk.UnsetSize()
3198
      lu.cfg.SetDiskID(node_disk, node)
3199
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3200
      msg = result.fail_msg
3201
      if msg:
3202
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3203
                           " (is_primary=True, pass=2): %s",
3204
                           inst_disk.iv_name, node, msg)
3205
        disks_ok = False
3206
    device_info.append((instance.primary_node, inst_disk.iv_name,
3207
                        result.payload))
3208

    
3209
  # leave the disks configured for the primary node
3210
  # this is a workaround that would be fixed better by
3211
  # improving the logical/physical id handling
3212
  for disk in instance.disks:
3213
    lu.cfg.SetDiskID(disk, instance.primary_node)
3214

    
3215
  return disks_ok, device_info
3216

    
3217

    
3218
def _StartInstanceDisks(lu, instance, force):
3219
  """Start the disks of an instance.
3220

3221
  """
3222
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3223
                                           ignore_secondaries=force)
3224
  if not disks_ok:
3225
    _ShutdownInstanceDisks(lu, instance)
3226
    if force is not None and not force:
3227
      lu.proc.LogWarning("", hint="If the message above refers to a"
3228
                         " secondary node,"
3229
                         " you can retry the operation using '--force'.")
3230
    raise errors.OpExecError("Disk consistency error")
3231

    
3232

    
3233
class LUDeactivateInstanceDisks(NoHooksLU):
3234
  """Shutdown an instance's disks.
3235

3236
  """
3237
  _OP_REQP = ["instance_name"]
3238
  REQ_BGL = False
3239

    
3240
  def ExpandNames(self):
3241
    self._ExpandAndLockInstance()
3242
    self.needed_locks[locking.LEVEL_NODE] = []
3243
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3244

    
3245
  def DeclareLocks(self, level):
3246
    if level == locking.LEVEL_NODE:
3247
      self._LockInstancesNodes()
3248

    
3249
  def CheckPrereq(self):
3250
    """Check prerequisites.
3251

3252
    This checks that the instance is in the cluster.
3253

3254
    """
3255
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3256
    assert self.instance is not None, \
3257
      "Cannot retrieve locked instance %s" % self.op.instance_name
3258

    
3259
  def Exec(self, feedback_fn):
3260
    """Deactivate the disks
3261

3262
    """
3263
    instance = self.instance
3264
    _SafeShutdownInstanceDisks(self, instance)
3265

    
3266

    
3267
def _SafeShutdownInstanceDisks(lu, instance):
3268
  """Shutdown block devices of an instance.
3269

3270
  This function checks if an instance is running, before calling
3271
  _ShutdownInstanceDisks.
3272

3273
  """
3274
  pnode = instance.primary_node
3275
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3276
  ins_l.Raise("Can't contact node %s" % pnode)
3277

    
3278
  if instance.name in ins_l.payload:
3279
    raise errors.OpExecError("Instance is running, can't shutdown"
3280
                             " block devices.")
3281

    
3282
  _ShutdownInstanceDisks(lu, instance)
3283

    
3284

    
3285
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3286
  """Shutdown block devices of an instance.
3287

3288
  This does the shutdown on all nodes of the instance.
3289

3290
  If the ignore_primary is false, errors on the primary node are
3291
  ignored.
3292

3293
  """
3294
  all_result = True
3295
  for disk in instance.disks:
3296
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3297
      lu.cfg.SetDiskID(top_disk, node)
3298
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3299
      msg = result.fail_msg
3300
      if msg:
3301
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3302
                      disk.iv_name, node, msg)
3303
        if not ignore_primary or node != instance.primary_node:
3304
          all_result = False
3305
  return all_result
3306

    
3307

    
3308
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3309
  """Checks if a node has enough free memory.
3310

3311
  This function check if a given node has the needed amount of free
3312
  memory. In case the node has less memory or we cannot get the
3313
  information from the node, this function raise an OpPrereqError
3314
  exception.
3315

3316
  @type lu: C{LogicalUnit}
3317
  @param lu: a logical unit from which we get configuration data
3318
  @type node: C{str}
3319
  @param node: the node to check
3320
  @type reason: C{str}
3321
  @param reason: string to use in the error message
3322
  @type requested: C{int}
3323
  @param requested: the amount of memory in MiB to check for
3324
  @type hypervisor_name: C{str}
3325
  @param hypervisor_name: the hypervisor to ask for memory stats
3326
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3327
      we cannot check the node
3328

3329
  """
3330
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3331
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3332
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3333
  if not isinstance(free_mem, int):
3334
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3335
                               " was '%s'" % (node, free_mem))
3336
  if requested > free_mem:
3337
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3338
                               " needed %s MiB, available %s MiB" %
3339
                               (node, reason, requested, free_mem))
3340

    
3341

    
3342
class LUStartupInstance(LogicalUnit):
3343
  """Starts an instance.
3344

3345
  """
3346
  HPATH = "instance-start"
3347
  HTYPE = constants.HTYPE_INSTANCE
3348
  _OP_REQP = ["instance_name", "force"]
3349
  REQ_BGL = False
3350

    
3351
  def ExpandNames(self):
3352
    self._ExpandAndLockInstance()
3353

    
3354
  def BuildHooksEnv(self):
3355
    """Build hooks env.
3356

3357
    This runs on master, primary and secondary nodes of the instance.
3358

3359
    """
3360
    env = {
3361
      "FORCE": self.op.force,
3362
      }
3363
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3364
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3365
    return env, nl, nl
3366

    
3367
  def CheckPrereq(self):
3368
    """Check prerequisites.
3369

3370
    This checks that the instance is in the cluster.
3371

3372
    """
3373
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3374
    assert self.instance is not None, \
3375
      "Cannot retrieve locked instance %s" % self.op.instance_name
3376

    
3377
    # extra beparams
3378
    self.beparams = getattr(self.op, "beparams", {})
3379
    if self.beparams:
3380
      if not isinstance(self.beparams, dict):
3381
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3382
                                   " dict" % (type(self.beparams), ))
3383
      # fill the beparams dict
3384
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3385
      self.op.beparams = self.beparams
3386

    
3387
    # extra hvparams
3388
    self.hvparams = getattr(self.op, "hvparams", {})
3389
    if self.hvparams:
3390
      if not isinstance(self.hvparams, dict):
3391
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3392
                                   " dict" % (type(self.hvparams), ))
3393

    
3394
      # check hypervisor parameter syntax (locally)
3395
      cluster = self.cfg.GetClusterInfo()
3396
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3397
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3398
                                    instance.hvparams)
3399
      filled_hvp.update(self.hvparams)
3400
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3401
      hv_type.CheckParameterSyntax(filled_hvp)
3402
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3403
      self.op.hvparams = self.hvparams
3404

    
3405
    _CheckNodeOnline(self, instance.primary_node)
3406

    
3407
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3408
    # check bridges existence
3409
    _CheckInstanceBridgesExist(self, instance)
3410

    
3411
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3412
                                              instance.name,
3413
                                              instance.hypervisor)
3414
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3415
                      prereq=True)
3416
    if not remote_info.payload: # not running already
3417
      _CheckNodeFreeMemory(self, instance.primary_node,
3418
                           "starting instance %s" % instance.name,
3419
                           bep[constants.BE_MEMORY], instance.hypervisor)
3420

    
3421
  def Exec(self, feedback_fn):
3422
    """Start the instance.
3423

3424
    """
3425
    instance = self.instance
3426
    force = self.op.force
3427

    
3428
    self.cfg.MarkInstanceUp(instance.name)
3429

    
3430
    node_current = instance.primary_node
3431

    
3432
    _StartInstanceDisks(self, instance, force)
3433

    
3434
    result = self.rpc.call_instance_start(node_current, instance,
3435
                                          self.hvparams, self.beparams)
3436
    msg = result.fail_msg
3437
    if msg:
3438
      _ShutdownInstanceDisks(self, instance)
3439
      raise errors.OpExecError("Could not start instance: %s" % msg)
3440

    
3441

    
3442
class LURebootInstance(LogicalUnit):
3443
  """Reboot an instance.
3444

3445
  """
3446
  HPATH = "instance-reboot"
3447
  HTYPE = constants.HTYPE_INSTANCE
3448
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3449
  REQ_BGL = False
3450

    
3451
  def ExpandNames(self):
3452
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3453
                                   constants.INSTANCE_REBOOT_HARD,
3454
                                   constants.INSTANCE_REBOOT_FULL]:
3455
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3456
                                  (constants.INSTANCE_REBOOT_SOFT,
3457
                                   constants.INSTANCE_REBOOT_HARD,
3458
                                   constants.INSTANCE_REBOOT_FULL))
3459
    self._ExpandAndLockInstance()
3460

    
3461
  def BuildHooksEnv(self):
3462
    """Build hooks env.
3463

3464
    This runs on master, primary and secondary nodes of the instance.
3465

3466
    """
3467
    env = {
3468
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3469
      "REBOOT_TYPE": self.op.reboot_type,
3470
      }
3471
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3472
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3473
    return env, nl, nl
3474

    
3475
  def CheckPrereq(self):
3476
    """Check prerequisites.
3477

3478
    This checks that the instance is in the cluster.
3479

3480
    """
3481
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3482
    assert self.instance is not None, \
3483
      "Cannot retrieve locked instance %s" % self.op.instance_name
3484

    
3485
    _CheckNodeOnline(self, instance.primary_node)
3486

    
3487
    # check bridges existence
3488
    _CheckInstanceBridgesExist(self, instance)
3489

    
3490
  def Exec(self, feedback_fn):
3491
    """Reboot the instance.
3492

3493
    """
3494
    instance = self.instance
3495
    ignore_secondaries = self.op.ignore_secondaries
3496
    reboot_type = self.op.reboot_type
3497

    
3498
    node_current = instance.primary_node
3499

    
3500
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3501
                       constants.INSTANCE_REBOOT_HARD]:
3502
      for disk in instance.disks:
3503
        self.cfg.SetDiskID(disk, node_current)
3504
      result = self.rpc.call_instance_reboot(node_current, instance,
3505
                                             reboot_type)
3506
      result.Raise("Could not reboot instance")
3507
    else:
3508
      result = self.rpc.call_instance_shutdown(node_current, instance)
3509
      result.Raise("Could not shutdown instance for full reboot")
3510
      _ShutdownInstanceDisks(self, instance)
3511
      _StartInstanceDisks(self, instance, ignore_secondaries)
3512
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3513
      msg = result.fail_msg
3514
      if msg:
3515
        _ShutdownInstanceDisks(self, instance)
3516
        raise errors.OpExecError("Could not start instance for"
3517
                                 " full reboot: %s" % msg)
3518

    
3519
    self.cfg.MarkInstanceUp(instance.name)
3520

    
3521

    
3522
class LUShutdownInstance(LogicalUnit):
3523
  """Shutdown an instance.
3524

3525
  """
3526
  HPATH = "instance-stop"
3527
  HTYPE = constants.HTYPE_INSTANCE
3528
  _OP_REQP = ["instance_name"]
3529
  REQ_BGL = False
3530

    
3531
  def ExpandNames(self):
3532
    self._ExpandAndLockInstance()
3533

    
3534
  def BuildHooksEnv(self):
3535
    """Build hooks env.
3536

3537
    This runs on master, primary and secondary nodes of the instance.
3538

3539
    """
3540
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3541
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3542
    return env, nl, nl
3543

    
3544
  def CheckPrereq(self):
3545
    """Check prerequisites.
3546

3547
    This checks that the instance is in the cluster.
3548

3549
    """
3550
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3551
    assert self.instance is not None, \
3552
      "Cannot retrieve locked instance %s" % self.op.instance_name
3553
    _CheckNodeOnline(self, self.instance.primary_node)
3554

    
3555
  def Exec(self, feedback_fn):
3556
    """Shutdown the instance.
3557

3558
    """
3559
    instance = self.instance
3560
    node_current = instance.primary_node
3561
    self.cfg.MarkInstanceDown(instance.name)
3562
    result = self.rpc.call_instance_shutdown(node_current, instance)
3563
    msg = result.fail_msg
3564
    if msg:
3565
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3566

    
3567
    _ShutdownInstanceDisks(self, instance)
3568

    
3569

    
3570
class LUReinstallInstance(LogicalUnit):
3571
  """Reinstall an instance.
3572

3573
  """
3574
  HPATH = "instance-reinstall"
3575
  HTYPE = constants.HTYPE_INSTANCE
3576
  _OP_REQP = ["instance_name"]
3577
  REQ_BGL = False
3578

    
3579
  def ExpandNames(self):
3580
    self._ExpandAndLockInstance()
3581

    
3582
  def BuildHooksEnv(self):
3583
    """Build hooks env.
3584

3585
    This runs on master, primary and secondary nodes of the instance.
3586

3587
    """
3588
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3589
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3590
    return env, nl, nl
3591

    
3592
  def CheckPrereq(self):
3593
    """Check prerequisites.
3594

3595
    This checks that the instance is in the cluster and is not running.
3596

3597
    """
3598
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3599
    assert instance is not None, \
3600
      "Cannot retrieve locked instance %s" % self.op.instance_name
3601
    _CheckNodeOnline(self, instance.primary_node)
3602

    
3603
    if instance.disk_template == constants.DT_DISKLESS:
3604
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3605
                                 self.op.instance_name)
3606
    if instance.admin_up:
3607
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3608
                                 self.op.instance_name)
3609
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3610
                                              instance.name,
3611
                                              instance.hypervisor)
3612
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3613
                      prereq=True)
3614
    if remote_info.payload:
3615
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3616
                                 (self.op.instance_name,
3617
                                  instance.primary_node))
3618

    
3619
    self.op.os_type = getattr(self.op, "os_type", None)
3620
    if self.op.os_type is not None:
3621
      # OS verification
3622
      pnode = self.cfg.GetNodeInfo(
3623
        self.cfg.ExpandNodeName(instance.primary_node))
3624
      if pnode is None:
3625
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3626
                                   self.op.pnode)
3627
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3628
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3629
                   (self.op.os_type, pnode.name), prereq=True)
3630

    
3631
    self.instance = instance
3632

    
3633
  def Exec(self, feedback_fn):
3634
    """Reinstall the instance.
3635

3636
    """
3637
    inst = self.instance
3638

    
3639
    if self.op.os_type is not None:
3640
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3641
      inst.os = self.op.os_type
3642
      self.cfg.Update(inst)
3643

    
3644
    _StartInstanceDisks(self, inst, None)
3645
    try:
3646
      feedback_fn("Running the instance OS create scripts...")
3647
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3648
      result.Raise("Could not install OS for instance %s on node %s" %
3649
                   (inst.name, inst.primary_node))
3650
    finally:
3651
      _ShutdownInstanceDisks(self, inst)
3652

    
3653

    
3654
class LURecreateInstanceDisks(LogicalUnit):
3655
  """Recreate an instance's missing disks.
3656

3657
  """
3658
  HPATH = "instance-recreate-disks"
3659
  HTYPE = constants.HTYPE_INSTANCE
3660
  _OP_REQP = ["instance_name", "disks"]
3661
  REQ_BGL = False
3662

    
3663
  def CheckArguments(self):
3664
    """Check the arguments.
3665

3666
    """
3667
    if not isinstance(self.op.disks, list):
3668
      raise errors.OpPrereqError("Invalid disks parameter")
3669
    for item in self.op.disks:
3670
      if (not isinstance(item, int) or
3671
          item < 0):
3672
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3673
                                   str(item))
3674

    
3675
  def ExpandNames(self):
3676
    self._ExpandAndLockInstance()
3677

    
3678
  def BuildHooksEnv(self):
3679
    """Build hooks env.
3680

3681
    This runs on master, primary and secondary nodes of the instance.
3682

3683
    """
3684
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3685
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3686
    return env, nl, nl
3687

    
3688
  def CheckPrereq(self):
3689
    """Check prerequisites.
3690

3691
    This checks that the instance is in the cluster and is not running.
3692

3693
    """
3694
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3695
    assert instance is not None, \
3696
      "Cannot retrieve locked instance %s" % self.op.instance_name
3697
    _CheckNodeOnline(self, instance.primary_node)
3698

    
3699
    if instance.disk_template == constants.DT_DISKLESS:
3700
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3701
                                 self.op.instance_name)
3702
    if instance.admin_up:
3703
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3704
                                 self.op.instance_name)
3705
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3706
                                              instance.name,
3707
                                              instance.hypervisor)
3708
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3709
                      prereq=True)
3710
    if remote_info.payload:
3711
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3712
                                 (self.op.instance_name,
3713
                                  instance.primary_node))
3714

    
3715
    if not self.op.disks:
3716
      self.op.disks = range(len(instance.disks))
3717
    else:
3718
      for idx in self.op.disks:
3719
        if idx >= len(instance.disks):
3720
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3721

    
3722
    self.instance = instance
3723

    
3724
  def Exec(self, feedback_fn):
3725
    """Recreate the disks.
3726

3727
    """
3728
    to_skip = []
3729
    for idx, disk in enumerate(self.instance.disks):
3730
      if idx not in self.op.disks: # disk idx has not been passed in
3731
        to_skip.append(idx)
3732
        continue
3733

    
3734
    _CreateDisks(self, self.instance, to_skip=to_skip)
3735

    
3736

    
3737
class LURenameInstance(LogicalUnit):
3738
  """Rename an instance.
3739

3740
  """
3741
  HPATH = "instance-rename"
3742
  HTYPE = constants.HTYPE_INSTANCE
3743
  _OP_REQP = ["instance_name", "new_name"]
3744

    
3745
  def BuildHooksEnv(self):
3746
    """Build hooks env.
3747

3748
    This runs on master, primary and secondary nodes of the instance.
3749

3750
    """
3751
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3752
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3753
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3754
    return env, nl, nl
3755

    
3756
  def CheckPrereq(self):
3757
    """Check prerequisites.
3758

3759
    This checks that the instance is in the cluster and is not running.
3760

3761
    """
3762
    instance = self.cfg.GetInstanceInfo(
3763
      self.cfg.ExpandInstanceName(self.op.instance_name))
3764
    if instance is None:
3765
      raise errors.OpPrereqError("Instance '%s' not known" %
3766
                                 self.op.instance_name)
3767
    _CheckNodeOnline(self, instance.primary_node)
3768

    
3769
    if instance.admin_up:
3770
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3771
                                 self.op.instance_name)
3772
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3773
                                              instance.name,
3774
                                              instance.hypervisor)
3775
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3776
                      prereq=True)
3777
    if remote_info.payload:
3778
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3779
                                 (self.op.instance_name,
3780
                                  instance.primary_node))
3781
    self.instance = instance
3782

    
3783
    # new name verification
3784
    name_info = utils.HostInfo(self.op.new_name)
3785

    
3786
    self.op.new_name = new_name = name_info.name
3787
    instance_list = self.cfg.GetInstanceList()
3788
    if new_name in instance_list:
3789
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3790
                                 new_name)
3791

    
3792
    if not getattr(self.op, "ignore_ip", False):
3793
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3794
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3795
                                   (name_info.ip, new_name))
3796

    
3797

    
3798
  def Exec(self, feedback_fn):
3799
    """Reinstall the instance.
3800

3801
    """
3802
    inst = self.instance
3803
    old_name = inst.name
3804

    
3805
    if inst.disk_template == constants.DT_FILE:
3806
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3807

    
3808
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3809
    # Change the instance lock. This is definitely safe while we hold the BGL
3810
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3811
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3812

    
3813
    # re-read the instance from the configuration after rename
3814
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3815

    
3816
    if inst.disk_template == constants.DT_FILE:
3817
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3818
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3819
                                                     old_file_storage_dir,
3820
                                                     new_file_storage_dir)
3821
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3822
                   " (but the instance has been renamed in Ganeti)" %
3823
                   (inst.primary_node, old_file_storage_dir,
3824
                    new_file_storage_dir))
3825

    
3826
    _StartInstanceDisks(self, inst, None)
3827
    try:
3828
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3829
                                                 old_name)
3830
      msg = result.fail_msg
3831
      if msg:
3832
        msg = ("Could not run OS rename script for instance %s on node %s"
3833
               " (but the instance has been renamed in Ganeti): %s" %
3834
               (inst.name, inst.primary_node, msg))
3835
        self.proc.LogWarning(msg)
3836
    finally:
3837
      _ShutdownInstanceDisks(self, inst)
3838

    
3839

    
3840
class LURemoveInstance(LogicalUnit):
3841
  """Remove an instance.
3842

3843
  """
3844
  HPATH = "instance-remove"
3845
  HTYPE = constants.HTYPE_INSTANCE
3846
  _OP_REQP = ["instance_name", "ignore_failures"]
3847
  REQ_BGL = False
3848

    
3849
  def ExpandNames(self):
3850
    self._ExpandAndLockInstance()
3851
    self.needed_locks[locking.LEVEL_NODE] = []
3852
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3853

    
3854
  def DeclareLocks(self, level):
3855
    if level == locking.LEVEL_NODE:
3856
      self._LockInstancesNodes()
3857

    
3858
  def BuildHooksEnv(self):
3859
    """Build hooks env.
3860

3861
    This runs on master, primary and secondary nodes of the instance.
3862

3863
    """
3864
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3865
    nl = [self.cfg.GetMasterNode()]
3866
    return env, nl, nl
3867

    
3868
  def CheckPrereq(self):
3869
    """Check prerequisites.
3870

3871
    This checks that the instance is in the cluster.
3872

3873
    """
3874
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3875
    assert self.instance is not None, \
3876
      "Cannot retrieve locked instance %s" % self.op.instance_name
3877

    
3878
  def Exec(self, feedback_fn):
3879
    """Remove the instance.
3880

3881
    """
3882
    instance = self.instance
3883
    logging.info("Shutting down instance %s on node %s",
3884
                 instance.name, instance.primary_node)
3885

    
3886
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3887
    msg = result.fail_msg
3888
    if msg:
3889
      if self.op.ignore_failures:
3890
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3891
      else:
3892
        raise errors.OpExecError("Could not shutdown instance %s on"
3893
                                 " node %s: %s" %
3894
                                 (instance.name, instance.primary_node, msg))
3895

    
3896
    logging.info("Removing block devices for instance %s", instance.name)
3897

    
3898
    if not _RemoveDisks(self, instance):
3899
      if self.op.ignore_failures:
3900
        feedback_fn("Warning: can't remove instance's disks")
3901
      else:
3902
        raise errors.OpExecError("Can't remove instance's disks")
3903

    
3904
    logging.info("Removing instance %s out of cluster config", instance.name)
3905

    
3906
    self.cfg.RemoveInstance(instance.name)
3907
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3908

    
3909

    
3910
class LUQueryInstances(NoHooksLU):
3911
  """Logical unit for querying instances.
3912

3913
  """
3914
  _OP_REQP = ["output_fields", "names", "use_locking"]
3915
  REQ_BGL = False
3916
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3917
                                    "admin_state",
3918
                                    "disk_template", "ip", "mac", "bridge",
3919
                                    "nic_mode", "nic_link",
3920
                                    "sda_size", "sdb_size", "vcpus", "tags",
3921
                                    "network_port", "beparams",
3922
                                    r"(disk)\.(size)/([0-9]+)",
3923
                                    r"(disk)\.(sizes)", "disk_usage",
3924
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3925
                                    r"(nic)\.(bridge)/([0-9]+)",
3926
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3927
                                    r"(disk|nic)\.(count)",
3928
                                    "serial_no", "hypervisor", "hvparams",
3929
                                    "ctime", "mtime",
3930
                                    ] +
3931
                                  ["hv/%s" % name
3932
                                   for name in constants.HVS_PARAMETERS] +
3933
                                  ["be/%s" % name
3934
                                   for name in constants.BES_PARAMETERS])
3935
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3936

    
3937

    
3938
  def ExpandNames(self):
3939
    _CheckOutputFields(static=self._FIELDS_STATIC,
3940
                       dynamic=self._FIELDS_DYNAMIC,
3941
                       selected=self.op.output_fields)
3942

    
3943
    self.needed_locks = {}
3944
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3945
    self.share_locks[locking.LEVEL_NODE] = 1
3946

    
3947
    if self.op.names:
3948
      self.wanted = _GetWantedInstances(self, self.op.names)
3949
    else:
3950
      self.wanted = locking.ALL_SET
3951

    
3952
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3953
    self.do_locking = self.do_node_query and self.op.use_locking
3954
    if self.do_locking:
3955
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3956
      self.needed_locks[locking.LEVEL_NODE] = []
3957
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3958

    
3959
  def DeclareLocks(self, level):
3960
    if level == locking.LEVEL_NODE and self.do_locking:
3961
      self._LockInstancesNodes()
3962

    
3963
  def CheckPrereq(self):
3964
    """Check prerequisites.
3965

3966
    """
3967
    pass
3968

    
3969
  def Exec(self, feedback_fn):
3970
    """Computes the list of nodes and their attributes.
3971

3972
    """
3973
    all_info = self.cfg.GetAllInstancesInfo()
3974
    if self.wanted == locking.ALL_SET:
3975
      # caller didn't specify instance names, so ordering is not important
3976
      if self.do_locking:
3977
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
3978
      else:
3979
        instance_names = all_info.keys()
3980
      instance_names = utils.NiceSort(instance_names)
3981
    else:
3982
      # caller did specify names, so we must keep the ordering
3983
      if self.do_locking:
3984
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
3985
      else:
3986
        tgt_set = all_info.keys()
3987
      missing = set(self.wanted).difference(tgt_set)
3988
      if missing:
3989
        raise errors.OpExecError("Some instances were removed before"
3990
                                 " retrieving their data: %s" % missing)
3991
      instance_names = self.wanted
3992

    
3993
    instance_list = [all_info[iname] for iname in instance_names]
3994

    
3995
    # begin data gathering
3996

    
3997
    nodes = frozenset([inst.primary_node for inst in instance_list])
3998
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
3999

    
4000
    bad_nodes = []
4001
    off_nodes = []
4002
    if self.do_node_query:
4003
      live_data = {}
4004
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4005
      for name in nodes:
4006
        result = node_data[name]
4007
        if result.offline:
4008
          # offline nodes will be in both lists
4009
          off_nodes.append(name)
4010
        if result.failed or result.fail_msg:
4011
          bad_nodes.append(name)
4012
        else:
4013
          if result.payload:
4014
            live_data.update(result.payload)
4015
          # else no instance is alive
4016
    else:
4017
      live_data = dict([(name, {}) for name in instance_names])
4018

    
4019
    # end data gathering
4020

    
4021
    HVPREFIX = "hv/"
4022
    BEPREFIX = "be/"
4023
    output = []
4024
    cluster = self.cfg.GetClusterInfo()
4025
    for instance in instance_list:
4026
      iout = []
4027
      i_hv = cluster.FillHV(instance)
4028
      i_be = cluster.FillBE(instance)
4029
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4030
                                 nic.nicparams) for nic in instance.nics]
4031
      for field in self.op.output_fields:
4032
        st_match = self._FIELDS_STATIC.Matches(field)
4033
        if field == "name":
4034
          val = instance.name
4035
        elif field == "os":
4036
          val = instance.os
4037
        elif field == "pnode":
4038
          val = instance.primary_node
4039
        elif field == "snodes":
4040
          val = list(instance.secondary_nodes)
4041
        elif field == "admin_state":
4042
          val = instance.admin_up
4043
        elif field == "oper_state":
4044
          if instance.primary_node in bad_nodes:
4045
            val = None
4046
          else:
4047
            val = bool(live_data.get(instance.name))
4048
        elif field == "status":
4049
          if instance.primary_node in off_nodes:
4050
            val = "ERROR_nodeoffline"
4051
          elif instance.primary_node in bad_nodes:
4052
            val = "ERROR_nodedown"
4053
          else:
4054
            running = bool(live_data.get(instance.name))
4055
            if running:
4056
              if instance.admin_up:
4057
                val = "running"
4058
              else:
4059
                val = "ERROR_up"
4060
            else:
4061
              if instance.admin_up:
4062
                val = "ERROR_down"
4063
              else:
4064
                val = "ADMIN_down"
4065
        elif field == "oper_ram":
4066
          if instance.primary_node in bad_nodes:
4067
            val = None
4068
          elif instance.name in live_data:
4069
            val = live_data[instance.name].get("memory", "?")
4070
          else:
4071
            val = "-"
4072
        elif field == "vcpus":
4073
          val = i_be[constants.BE_VCPUS]
4074
        elif field == "disk_template":
4075
          val = instance.disk_template
4076
        elif field == "ip":
4077
          if instance.nics:
4078
            val = instance.nics[0].ip
4079
          else:
4080
            val = None
4081
        elif field == "nic_mode":
4082
          if instance.nics:
4083
            val = i_nicp[0][constants.NIC_MODE]
4084
          else:
4085
            val = None
4086
        elif field == "nic_link":
4087
          if instance.nics:
4088
            val = i_nicp[0][constants.NIC_LINK]
4089
          else:
4090
            val = None
4091
        elif field == "bridge":
4092
          if (instance.nics and
4093
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4094
            val = i_nicp[0][constants.NIC_LINK]
4095
          else:
4096
            val = None
4097
        elif field == "mac":
4098
          if instance.nics:
4099
            val = instance.nics[0].mac
4100
          else:
4101
            val = None
4102
        elif field == "sda_size" or field == "sdb_size":
4103
          idx = ord(field[2]) - ord('a')
4104
          try:
4105
            val = instance.FindDisk(idx).size
4106
          except errors.OpPrereqError:
4107
            val = None
4108
        elif field == "disk_usage": # total disk usage per node
4109
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4110
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4111
        elif field == "tags":
4112
          val = list(instance.GetTags())
4113
        elif field == "serial_no":
4114
          val = instance.serial_no
4115
        elif field == "ctime":
4116
          val = instance.ctime
4117
        elif field == "mtime":
4118
          val = instance.mtime
4119
        elif field == "network_port":
4120
          val = instance.network_port
4121
        elif field == "hypervisor":
4122
          val = instance.hypervisor
4123
        elif field == "hvparams":
4124
          val = i_hv
4125
        elif (field.startswith(HVPREFIX) and
4126
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4127
          val = i_hv.get(field[len(HVPREFIX):], None)
4128
        elif field == "beparams":
4129
          val = i_be
4130
        elif (field.startswith(BEPREFIX) and
4131
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4132
          val = i_be.get(field[len(BEPREFIX):], None)
4133
        elif st_match and st_match.groups():
4134
          # matches a variable list
4135
          st_groups = st_match.groups()
4136
          if st_groups and st_groups[0] == "disk":
4137
            if st_groups[1] == "count":
4138
              val = len(instance.disks)
4139
            elif st_groups[1] == "sizes":
4140
              val = [disk.size for disk in instance.disks]
4141
            elif st_groups[1] == "size":
4142
              try:
4143
                val = instance.FindDisk(st_groups[2]).size
4144
              except errors.OpPrereqError:
4145
                val = None
4146
            else:
4147
              assert False, "Unhandled disk parameter"
4148
          elif st_groups[0] == "nic":
4149
            if st_groups[1] == "count":
4150
              val = len(instance.nics)
4151
            elif st_groups[1] == "macs":
4152
              val = [nic.mac for nic in instance.nics]
4153
            elif st_groups[1] == "ips":
4154
              val = [nic.ip for nic in instance.nics]
4155
            elif st_groups[1] == "modes":
4156
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4157
            elif st_groups[1] == "links":
4158
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4159
            elif st_groups[1] == "bridges":
4160
              val = []
4161
              for nicp in i_nicp:
4162
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4163
                  val.append(nicp[constants.NIC_LINK])
4164
                else:
4165
                  val.append(None)
4166
            else:
4167
              # index-based item
4168
              nic_idx = int(st_groups[2])
4169
              if nic_idx >= len(instance.nics):
4170
                val = None
4171
              else:
4172
                if st_groups[1] == "mac":
4173
                  val = instance.nics[nic_idx].mac
4174
                elif st_groups[1] == "ip":
4175
                  val = instance.nics[nic_idx].ip
4176
                elif st_groups[1] == "mode":
4177
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4178
                elif st_groups[1] == "link":
4179
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4180
                elif st_groups[1] == "bridge":
4181
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4182
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4183
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4184
                  else:
4185
                    val = None
4186
                else:
4187
                  assert False, "Unhandled NIC parameter"
4188
          else:
4189
            assert False, ("Declared but unhandled variable parameter '%s'" %
4190
                           field)
4191
        else:
4192
          assert False, "Declared but unhandled parameter '%s'" % field
4193
        iout.append(val)
4194
      output.append(iout)
4195

    
4196
    return output
4197

    
4198

    
4199
class LUFailoverInstance(LogicalUnit):
4200
  """Failover an instance.
4201

4202
  """
4203
  HPATH = "instance-failover"
4204
  HTYPE = constants.HTYPE_INSTANCE
4205
  _OP_REQP = ["instance_name", "ignore_consistency"]
4206
  REQ_BGL = False
4207

    
4208
  def ExpandNames(self):
4209
    self._ExpandAndLockInstance()
4210
    self.needed_locks[locking.LEVEL_NODE] = []
4211
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4212

    
4213
  def DeclareLocks(self, level):
4214
    if level == locking.LEVEL_NODE:
4215
      self._LockInstancesNodes()
4216

    
4217
  def BuildHooksEnv(self):
4218
    """Build hooks env.
4219

4220
    This runs on master, primary and secondary nodes of the instance.
4221

4222
    """
4223
    env = {
4224
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4225
      }
4226
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4227
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4228
    return env, nl, nl
4229

    
4230
  def CheckPrereq(self):
4231
    """Check prerequisites.
4232

4233
    This checks that the instance is in the cluster.
4234

4235
    """
4236
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4237
    assert self.instance is not None, \
4238
      "Cannot retrieve locked instance %s" % self.op.instance_name
4239

    
4240
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4241
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4242
      raise errors.OpPrereqError("Instance's disk layout is not"
4243
                                 " network mirrored, cannot failover.")
4244

    
4245
    secondary_nodes = instance.secondary_nodes
4246
    if not secondary_nodes:
4247
      raise errors.ProgrammerError("no secondary node but using "
4248
                                   "a mirrored disk template")
4249

    
4250
    target_node = secondary_nodes[0]
4251
    _CheckNodeOnline(self, target_node)
4252
    _CheckNodeNotDrained(self, target_node)
4253
    if instance.admin_up:
4254
      # check memory requirements on the secondary node
4255
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4256
                           instance.name, bep[constants.BE_MEMORY],
4257
                           instance.hypervisor)
4258
    else:
4259
      self.LogInfo("Not checking memory on the secondary node as"
4260
                   " instance will not be started")
4261

    
4262
    # check bridge existance
4263
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4264

    
4265
  def Exec(self, feedback_fn):
4266
    """Failover an instance.
4267

4268
    The failover is done by shutting it down on its present node and
4269
    starting it on the secondary.
4270

4271
    """
4272
    instance = self.instance
4273

    
4274
    source_node = instance.primary_node
4275
    target_node = instance.secondary_nodes[0]
4276

    
4277
    feedback_fn("* checking disk consistency between source and target")
4278
    for dev in instance.disks:
4279
      # for drbd, these are drbd over lvm
4280
      if not _CheckDiskConsistency(self, dev, target_node, False):
4281
        if instance.admin_up and not self.op.ignore_consistency:
4282
          raise errors.OpExecError("Disk %s is degraded on target node,"
4283
                                   " aborting failover." % dev.iv_name)
4284

    
4285
    feedback_fn("* shutting down instance on source node")
4286
    logging.info("Shutting down instance %s on node %s",
4287
                 instance.name, source_node)
4288

    
4289
    result = self.rpc.call_instance_shutdown(source_node, instance)
4290
    msg = result.fail_msg
4291
    if msg:
4292
      if self.op.ignore_consistency:
4293
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4294
                             " Proceeding anyway. Please make sure node"
4295
                             " %s is down. Error details: %s",
4296
                             instance.name, source_node, source_node, msg)
4297
      else:
4298
        raise errors.OpExecError("Could not shutdown instance %s on"
4299
                                 " node %s: %s" %
4300
                                 (instance.name, source_node, msg))
4301

    
4302
    feedback_fn("* deactivating the instance's disks on source node")
4303
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4304
      raise errors.OpExecError("Can't shut down the instance's disks.")
4305

    
4306
    instance.primary_node = target_node
4307
    # distribute new instance config to the other nodes
4308
    self.cfg.Update(instance)
4309

    
4310
    # Only start the instance if it's marked as up
4311
    if instance.admin_up:
4312
      feedback_fn("* activating the instance's disks on target node")
4313
      logging.info("Starting instance %s on node %s",
4314
                   instance.name, target_node)
4315

    
4316
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4317
                                               ignore_secondaries=True)
4318
      if not disks_ok:
4319
        _ShutdownInstanceDisks(self, instance)
4320
        raise errors.OpExecError("Can't activate the instance's disks")
4321

    
4322
      feedback_fn("* starting the instance on the target node")
4323
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4324
      msg = result.fail_msg
4325
      if msg:
4326
        _ShutdownInstanceDisks(self, instance)
4327
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4328
                                 (instance.name, target_node, msg))
4329

    
4330

    
4331
class LUMigrateInstance(LogicalUnit):
4332
  """Migrate an instance.
4333

4334
  This is migration without shutting down, compared to the failover,
4335
  which is done with shutdown.
4336

4337
  """
4338
  HPATH = "instance-migrate"
4339
  HTYPE = constants.HTYPE_INSTANCE
4340
  _OP_REQP = ["instance_name", "live", "cleanup"]
4341

    
4342
  REQ_BGL = False
4343

    
4344
  def ExpandNames(self):
4345
    self._ExpandAndLockInstance()
4346

    
4347
    self.needed_locks[locking.LEVEL_NODE] = []
4348
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4349

    
4350
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4351
                                       self.op.live, self.op.cleanup)
4352
    self.tasklets = [self._migrater]
4353

    
4354
  def DeclareLocks(self, level):
4355
    if level == locking.LEVEL_NODE:
4356
      self._LockInstancesNodes()
4357

    
4358
  def BuildHooksEnv(self):
4359
    """Build hooks env.
4360

4361
    This runs on master, primary and secondary nodes of the instance.
4362

4363
    """
4364
    instance = self._migrater.instance
4365
    env = _BuildInstanceHookEnvByObject(self, instance)
4366
    env["MIGRATE_LIVE"] = self.op.live
4367
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4368
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4369
    return env, nl, nl
4370

    
4371

    
4372
class LUMigrateNode(LogicalUnit):
4373
  """Migrate all instances from a node.
4374

4375
  """
4376
  HPATH = "node-migrate"
4377
  HTYPE = constants.HTYPE_NODE
4378
  _OP_REQP = ["node_name", "live"]
4379
  REQ_BGL = False
4380

    
4381
  def ExpandNames(self):
4382
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4383
    if self.op.node_name is None:
4384
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4385

    
4386
    self.needed_locks = {
4387
      locking.LEVEL_NODE: [self.op.node_name],
4388
      }
4389

    
4390
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4391

    
4392
    # Create tasklets for migrating instances for all instances on this node
4393
    names = []
4394
    tasklets = []
4395

    
4396
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4397
      logging.debug("Migrating instance %s", inst.name)
4398
      names.append(inst.name)
4399

    
4400
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4401

    
4402
    self.tasklets = tasklets
4403

    
4404
    # Declare instance locks
4405
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4406

    
4407
  def DeclareLocks(self, level):
4408
    if level == locking.LEVEL_NODE:
4409
      self._LockInstancesNodes()
4410

    
4411
  def BuildHooksEnv(self):
4412
    """Build hooks env.
4413

4414
    This runs on the master, the primary and all the secondaries.
4415

4416
    """
4417
    env = {
4418
      "NODE_NAME": self.op.node_name,
4419
      }
4420

    
4421
    nl = [self.cfg.GetMasterNode()]
4422

    
4423
    return (env, nl, nl)
4424

    
4425

    
4426
class TLMigrateInstance(Tasklet):
4427
  def __init__(self, lu, instance_name, live, cleanup):
4428
    """Initializes this class.
4429

4430
    """
4431
    Tasklet.__init__(self, lu)
4432

    
4433
    # Parameters
4434
    self.instance_name = instance_name
4435
    self.live = live
4436
    self.cleanup = cleanup
4437

    
4438
  def CheckPrereq(self):
4439
    """Check prerequisites.
4440

4441
    This checks that the instance is in the cluster.
4442

4443
    """
4444
    instance = self.cfg.GetInstanceInfo(
4445
      self.cfg.ExpandInstanceName(self.instance_name))
4446
    if instance is None:
4447
      raise errors.OpPrereqError("Instance '%s' not known" %
4448
                                 self.instance_name)
4449

    
4450
    if instance.disk_template != constants.DT_DRBD8:
4451
      raise errors.OpPrereqError("Instance's disk layout is not"
4452
                                 " drbd8, cannot migrate.")
4453

    
4454
    secondary_nodes = instance.secondary_nodes
4455
    if not secondary_nodes:
4456
      raise errors.ConfigurationError("No secondary node but using"
4457
                                      " drbd8 disk template")
4458

    
4459
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4460

    
4461
    target_node = secondary_nodes[0]
4462
    # check memory requirements on the secondary node
4463
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4464
                         instance.name, i_be[constants.BE_MEMORY],
4465
                         instance.hypervisor)
4466

    
4467
    # check bridge existance
4468
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4469

    
4470
    if not self.cleanup:
4471
      _CheckNodeNotDrained(self, target_node)
4472
      result = self.rpc.call_instance_migratable(instance.primary_node,
4473
                                                 instance)
4474
      result.Raise("Can't migrate, please use failover", prereq=True)
4475

    
4476
    self.instance = instance
4477

    
4478
  def _WaitUntilSync(self):
4479
    """Poll with custom rpc for disk sync.
4480

4481
    This uses our own step-based rpc call.
4482

4483
    """
4484
    self.feedback_fn("* wait until resync is done")
4485
    all_done = False
4486
    while not all_done:
4487
      all_done = True
4488
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4489
                                            self.nodes_ip,
4490
                                            self.instance.disks)
4491
      min_percent = 100
4492
      for node, nres in result.items():
4493
        nres.Raise("Cannot resync disks on node %s" % node)
4494
        node_done, node_percent = nres.payload
4495
        all_done = all_done and node_done
4496
        if node_percent is not None:
4497
          min_percent = min(min_percent, node_percent)
4498
      if not all_done:
4499
        if min_percent < 100:
4500
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4501
        time.sleep(2)
4502

    
4503
  def _EnsureSecondary(self, node):
4504
    """Demote a node to secondary.
4505

4506
    """
4507
    self.feedback_fn("* switching node %s to secondary mode" % node)
4508

    
4509
    for dev in self.instance.disks:
4510
      self.cfg.SetDiskID(dev, node)
4511

    
4512
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4513
                                          self.instance.disks)
4514
    result.Raise("Cannot change disk to secondary on node %s" % node)
4515

    
4516
  def _GoStandalone(self):
4517
    """Disconnect from the network.
4518

4519
    """
4520
    self.feedback_fn("* changing into standalone mode")
4521
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4522
                                               self.instance.disks)
4523
    for node, nres in result.items():
4524
      nres.Raise("Cannot disconnect disks node %s" % node)
4525

    
4526
  def _GoReconnect(self, multimaster):
4527
    """Reconnect to the network.
4528

4529
    """
4530
    if multimaster:
4531
      msg = "dual-master"
4532
    else:
4533
      msg = "single-master"
4534
    self.feedback_fn("* changing disks into %s mode" % msg)
4535
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4536
                                           self.instance.disks,
4537
                                           self.instance.name, multimaster)
4538
    for node, nres in result.items():
4539
      nres.Raise("Cannot change disks config on node %s" % node)
4540

    
4541
  def _ExecCleanup(self):
4542
    """Try to cleanup after a failed migration.
4543

4544
    The cleanup is done by:
4545
      - check that the instance is running only on one node
4546
        (and update the config if needed)
4547
      - change disks on its secondary node to secondary
4548
      - wait until disks are fully synchronized
4549
      - disconnect from the network
4550
      - change disks into single-master mode
4551
      - wait again until disks are fully synchronized
4552

4553
    """
4554
    instance = self.instance
4555
    target_node = self.target_node
4556
    source_node = self.source_node
4557

    
4558
    # check running on only one node
4559
    self.feedback_fn("* checking where the instance actually runs"
4560
                     " (if this hangs, the hypervisor might be in"
4561
                     " a bad state)")
4562
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4563
    for node, result in ins_l.items():
4564
      result.Raise("Can't contact node %s" % node)
4565

    
4566
    runningon_source = instance.name in ins_l[source_node].payload
4567
    runningon_target = instance.name in ins_l[target_node].payload
4568

    
4569
    if runningon_source and runningon_target:
4570
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4571
                               " or the hypervisor is confused. You will have"
4572
                               " to ensure manually that it runs only on one"
4573
                               " and restart this operation.")
4574

    
4575
    if not (runningon_source or runningon_target):
4576
      raise errors.OpExecError("Instance does not seem to be running at all."
4577
                               " In this case, it's safer to repair by"
4578
                               " running 'gnt-instance stop' to ensure disk"
4579
                               " shutdown, and then restarting it.")
4580

    
4581
    if runningon_target:
4582
      # the migration has actually succeeded, we need to update the config
4583
      self.feedback_fn("* instance running on secondary node (%s),"
4584
                       " updating config" % target_node)
4585
      instance.primary_node = target_node
4586
      self.cfg.Update(instance)
4587
      demoted_node = source_node
4588
    else:
4589
      self.feedback_fn("* instance confirmed to be running on its"
4590
                       " primary node (%s)" % source_node)
4591
      demoted_node = target_node
4592

    
4593
    self._EnsureSecondary(demoted_node)
4594
    try:
4595
      self._WaitUntilSync()
4596
    except errors.OpExecError:
4597
      # we ignore here errors, since if the device is standalone, it
4598
      # won't be able to sync
4599
      pass
4600
    self._GoStandalone()
4601
    self._GoReconnect(False)
4602
    self._WaitUntilSync()
4603

    
4604
    self.feedback_fn("* done")
4605

    
4606
  def _RevertDiskStatus(self):
4607
    """Try to revert the disk status after a failed migration.
4608

4609
    """
4610
    target_node = self.target_node
4611
    try:
4612
      self._EnsureSecondary(target_node)
4613
      self._GoStandalone()
4614
      self._GoReconnect(False)
4615
      self._WaitUntilSync()
4616
    except errors.OpExecError, err:
4617
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4618
                         " drives: error '%s'\n"
4619
                         "Please look and recover the instance status" %
4620
                         str(err))
4621

    
4622
  def _AbortMigration(self):
4623
    """Call the hypervisor code to abort a started migration.
4624

4625
    """
4626
    instance = self.instance
4627
    target_node = self.target_node
4628
    migration_info = self.migration_info
4629

    
4630
    abort_result = self.rpc.call_finalize_migration(target_node,
4631
                                                    instance,
4632
                                                    migration_info,
4633
                                                    False)
4634
    abort_msg = abort_result.fail_msg
4635
    if abort_msg:
4636
      logging.error("Aborting migration failed on target node %s: %s" %
4637
                    (target_node, abort_msg))
4638
      # Don't raise an exception here, as we stil have to try to revert the
4639
      # disk status, even if this step failed.
4640

    
4641
  def _ExecMigration(self):
4642
    """Migrate an instance.
4643

4644
    The migrate is done by:
4645
      - change the disks into dual-master mode
4646
      - wait until disks are fully synchronized again
4647
      - migrate the instance
4648
      - change disks on the new secondary node (the old primary) to secondary
4649
      - wait until disks are fully synchronized
4650
      - change disks into single-master mode
4651

4652
    """
4653
    instance = self.instance
4654
    target_node = self.target_node
4655
    source_node = self.source_node
4656

    
4657
    self.feedback_fn("* checking disk consistency between source and target")
4658
    for dev in instance.disks:
4659
      if not _CheckDiskConsistency(self, dev, target_node, False):
4660
        raise errors.OpExecError("Disk %s is degraded or not fully"
4661
                                 " synchronized on target node,"
4662
                                 " aborting migrate." % dev.iv_name)
4663

    
4664
    # First get the migration information from the remote node
4665
    result = self.rpc.call_migration_info(source_node, instance)
4666
    msg = result.fail_msg
4667
    if msg:
4668
      log_err = ("Failed fetching source migration information from %s: %s" %
4669
                 (source_node, msg))
4670
      logging.error(log_err)
4671
      raise errors.OpExecError(log_err)
4672

    
4673
    self.migration_info = migration_info = result.payload
4674

    
4675
    # Then switch the disks to master/master mode
4676
    self._EnsureSecondary(target_node)
4677
    self._GoStandalone()
4678
    self._GoReconnect(True)
4679
    self._WaitUntilSync()
4680

    
4681
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4682
    result = self.rpc.call_accept_instance(target_node,
4683
                                           instance,
4684
                                           migration_info,
4685
                                           self.nodes_ip[target_node])
4686

    
4687
    msg = result.fail_msg
4688
    if msg:
4689
      logging.error("Instance pre-migration failed, trying to revert"
4690
                    " disk status: %s", msg)
4691
      self._AbortMigration()
4692
      self._RevertDiskStatus()
4693
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4694
                               (instance.name, msg))
4695

    
4696
    self.feedback_fn("* migrating instance to %s" % target_node)
4697
    time.sleep(10)
4698
    result = self.rpc.call_instance_migrate(source_node, instance,
4699
                                            self.nodes_ip[target_node],
4700
                                            self.live)
4701
    msg = result.fail_msg
4702
    if msg:
4703
      logging.error("Instance migration failed, trying to revert"
4704
                    " disk status: %s", msg)
4705
      self._AbortMigration()
4706
      self._RevertDiskStatus()
4707
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4708
                               (instance.name, msg))
4709
    time.sleep(10)
4710

    
4711
    instance.primary_node = target_node
4712
    # distribute new instance config to the other nodes
4713
    self.cfg.Update(instance)
4714

    
4715
    result = self.rpc.call_finalize_migration(target_node,
4716
                                              instance,
4717
                                              migration_info,
4718
                                              True)
4719
    msg = result.fail_msg
4720
    if msg:
4721
      logging.error("Instance migration succeeded, but finalization failed:"
4722
                    " %s" % msg)
4723
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4724
                               msg)
4725

    
4726
    self._EnsureSecondary(source_node)
4727
    self._WaitUntilSync()
4728
    self._GoStandalone()
4729
    self._GoReconnect(False)
4730
    self._WaitUntilSync()
4731

    
4732
    self.feedback_fn("* done")
4733

    
4734
  def Exec(self, feedback_fn):
4735
    """Perform the migration.
4736

4737
    """
4738
    feedback_fn("Migrating instance %s" % self.instance.name)
4739

    
4740
    self.feedback_fn = feedback_fn
4741

    
4742
    self.source_node = self.instance.primary_node
4743
    self.target_node = self.instance.secondary_nodes[0]
4744
    self.all_nodes = [self.source_node, self.target_node]
4745
    self.nodes_ip = {
4746
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4747
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4748
      }
4749

    
4750
    if self.cleanup:
4751
      return self._ExecCleanup()
4752
    else:
4753
      return self._ExecMigration()
4754

    
4755

    
4756
def _CreateBlockDev(lu, node, instance, device, force_create,
4757
                    info, force_open):
4758
  """Create a tree of block devices on a given node.
4759

4760
  If this device type has to be created on secondaries, create it and
4761
  all its children.
4762

4763
  If not, just recurse to children keeping the same 'force' value.
4764

4765
  @param lu: the lu on whose behalf we execute
4766
  @param node: the node on which to create the device
4767
  @type instance: L{objects.Instance}
4768
  @param instance: the instance which owns the device
4769
  @type device: L{objects.Disk}
4770
  @param device: the device to create
4771
  @type force_create: boolean
4772
  @param force_create: whether to force creation of this device; this
4773
      will be change to True whenever we find a device which has
4774
      CreateOnSecondary() attribute
4775
  @param info: the extra 'metadata' we should attach to the device
4776
      (this will be represented as a LVM tag)
4777
  @type force_open: boolean
4778
  @param force_open: this parameter will be passes to the
4779
      L{backend.BlockdevCreate} function where it specifies
4780
      whether we run on primary or not, and it affects both
4781
      the child assembly and the device own Open() execution
4782

4783
  """
4784
  if device.CreateOnSecondary():
4785
    force_create = True
4786

    
4787
  if device.children:
4788
    for child in device.children:
4789
      _CreateBlockDev(lu, node, instance, child, force_create,
4790
                      info, force_open)
4791

    
4792
  if not force_create:
4793
    return
4794

    
4795
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4796

    
4797

    
4798
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4799
  """Create a single block device on a given node.
4800

4801
  This will not recurse over children of the device, so they must be
4802
  created in advance.
4803

4804
  @param lu: the lu on whose behalf we execute
4805
  @param node: the node on which to create the device
4806
  @type instance: L{objects.Instance}
4807
  @param instance: the instance which owns the device
4808
  @type device: L{objects.Disk}
4809
  @param device: the device to create
4810
  @param info: the extra 'metadata' we should attach to the device
4811
      (this will be represented as a LVM tag)
4812
  @type force_open: boolean
4813
  @param force_open: this parameter will be passes to the
4814
      L{backend.BlockdevCreate} function where it specifies
4815
      whether we run on primary or not, and it affects both
4816
      the child assembly and the device own Open() execution
4817

4818
  """
4819
  lu.cfg.SetDiskID(device, node)
4820
  result = lu.rpc.call_blockdev_create(node, device, device.size,
4821
                                       instance.name, force_open, info)
4822
  result.Raise("Can't create block device %s on"
4823
               " node %s for instance %s" % (device, node, instance.name))
4824
  if device.physical_id is None:
4825
    device.physical_id = result.payload
4826

    
4827

    
4828
def _GenerateUniqueNames(lu, exts):
4829
  """Generate a suitable LV name.
4830

4831
  This will generate a logical volume name for the given instance.
4832

4833
  """
4834
  results = []
4835
  for val in exts:
4836
    new_id = lu.cfg.GenerateUniqueID()
4837
    results.append("%s%s" % (new_id, val))
4838
  return results
4839

    
4840

    
4841
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
4842
                         p_minor, s_minor):
4843
  """Generate a drbd8 device complete with its children.
4844

4845
  """
4846
  port = lu.cfg.AllocatePort()
4847
  vgname = lu.cfg.GetVGName()
4848
  shared_secret = lu.cfg.GenerateDRBDSecret()
4849
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
4850
                          logical_id=(vgname, names[0]))
4851
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
4852
                          logical_id=(vgname, names[1]))
4853
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
4854
                          logical_id=(primary, secondary, port,
4855
                                      p_minor, s_minor,
4856
                                      shared_secret),
4857
                          children=[dev_data, dev_meta],
4858
                          iv_name=iv_name)
4859
  return drbd_dev
4860

    
4861

    
4862
def _GenerateDiskTemplate(lu, template_name,
4863
                          instance_name, primary_node,
4864
                          secondary_nodes, disk_info,
4865
                          file_storage_dir, file_driver,
4866
                          base_index):
4867
  """Generate the entire disk layout for a given template type.
4868

4869
  """
4870
  #TODO: compute space requirements
4871

    
4872
  vgname = lu.cfg.GetVGName()
4873
  disk_count = len(disk_info)
4874
  disks = []
4875
  if template_name == constants.DT_DISKLESS:
4876
    pass
4877
  elif template_name == constants.DT_PLAIN:
4878
    if len(secondary_nodes) != 0:
4879
      raise errors.ProgrammerError("Wrong template configuration")
4880

    
4881
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4882
                                      for i in range(disk_count)])
4883
    for idx, disk in enumerate(disk_info):
4884
      disk_index = idx + base_index
4885
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
4886
                              logical_id=(vgname, names[idx]),
4887
                              iv_name="disk/%d" % disk_index,
4888
                              mode=disk["mode"])
4889
      disks.append(disk_dev)
4890
  elif template_name == constants.DT_DRBD8:
4891
    if len(secondary_nodes) != 1:
4892
      raise errors.ProgrammerError("Wrong template configuration")
4893
    remote_node = secondary_nodes[0]
4894
    minors = lu.cfg.AllocateDRBDMinor(
4895
      [primary_node, remote_node] * len(disk_info), instance_name)
4896

    
4897
    names = []
4898
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
4899
                                               for i in range(disk_count)]):
4900
      names.append(lv_prefix + "_data")
4901
      names.append(lv_prefix + "_meta")
4902
    for idx, disk in enumerate(disk_info):
4903
      disk_index = idx + base_index
4904
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
4905
                                      disk["size"], names[idx*2:idx*2+2],
4906
                                      "disk/%d" % disk_index,
4907
                                      minors[idx*2], minors[idx*2+1])
4908
      disk_dev.mode = disk["mode"]
4909
      disks.append(disk_dev)
4910
  elif template_name == constants.DT_FILE:
4911
    if len(secondary_nodes) != 0:
4912
      raise errors.ProgrammerError("Wrong template configuration")
4913

    
4914
    for idx, disk in enumerate(disk_info):
4915
      disk_index = idx + base_index
4916
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
4917
                              iv_name="disk/%d" % disk_index,
4918
                              logical_id=(file_driver,
4919
                                          "%s/disk%d" % (file_storage_dir,
4920
                                                         disk_index)),
4921
                              mode=disk["mode"])
4922
      disks.append(disk_dev)
4923
  else:
4924
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
4925
  return disks
4926

    
4927

    
4928
def _GetInstanceInfoText(instance):
4929
  """Compute that text that should be added to the disk's metadata.
4930

4931
  """
4932
  return "originstname+%s" % instance.name
4933

    
4934

    
4935
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
4936
  """Create all disks for an instance.
4937

4938
  This abstracts away some work from AddInstance.
4939

4940
  @type lu: L{LogicalUnit}
4941
  @param lu: the logical unit on whose behalf we execute
4942
  @type instance: L{objects.Instance}
4943
  @param instance: the instance whose disks we should create
4944
  @type to_skip: list
4945
  @param to_skip: list of indices to skip
4946
  @type target_node: string
4947
  @param target_node: if passed, overrides the target node for creation
4948
  @rtype: boolean
4949
  @return: the success of the creation
4950

4951
  """
4952
  info = _GetInstanceInfoText(instance)
4953
  if target_node is None:
4954
    pnode = instance.primary_node
4955
    all_nodes = instance.all_nodes
4956
  else:
4957
    pnode = target_node
4958
    all_nodes = [pnode]
4959

    
4960
  if instance.disk_template == constants.DT_FILE:
4961
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
4962
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
4963

    
4964
    result.Raise("Failed to create directory '%s' on"
4965
                 " node %s: %s" % (file_storage_dir, pnode))
4966

    
4967
  # Note: this needs to be kept in sync with adding of disks in
4968
  # LUSetInstanceParams
4969
  for idx, device in enumerate(instance.disks):
4970
    if to_skip and idx in to_skip:
4971
      continue
4972
    logging.info("Creating volume %s for instance %s",
4973
                 device.iv_name, instance.name)
4974
    #HARDCODE
4975
    for node in all_nodes:
4976
      f_create = node == pnode
4977
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
4978

    
4979

    
4980
def _RemoveDisks(lu, instance, target_node=None):
4981
  """Remove all disks for an instance.
4982

4983
  This abstracts away some work from `AddInstance()` and
4984
  `RemoveInstance()`. Note that in case some of the devices couldn't
4985
  be removed, the removal will continue with the other ones (compare
4986
  with `_CreateDisks()`).
4987

4988
  @type lu: L{LogicalUnit}
4989
  @param lu: the logical unit on whose behalf we execute
4990
  @type instance: L{objects.Instance}
4991
  @param instance: the instance whose disks we should remove
4992
  @type target_node: string
4993
  @param target_node: used to override the node on which to remove the disks
4994
  @rtype: boolean
4995
  @return: the success of the removal
4996

4997
  """
4998
  logging.info("Removing block devices for instance %s", instance.name)
4999

    
5000
  all_result = True
5001
  for device in instance.disks:
5002
    if target_node:
5003
      edata = [(target_node, device)]
5004
    else:
5005
      edata = device.ComputeNodeTree(instance.primary_node)
5006
    for node, disk in edata:
5007
      lu.cfg.SetDiskID(disk, node)
5008
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5009
      if msg:
5010
        lu.LogWarning("Could not remove block device %s on node %s,"
5011
                      " continuing anyway: %s", device.iv_name, node, msg)
5012
        all_result = False
5013

    
5014
  if instance.disk_template == constants.DT_FILE:
5015
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5016
    if target_node is node:
5017
      tgt = instance.primary_node
5018
    else:
5019
      tgt = instance.target_node
5020
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5021
    if result.fail_msg:
5022
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5023
                    file_storage_dir, instance.primary_node, result.fail_msg)
5024
      all_result = False
5025

    
5026
  return all_result
5027

    
5028

    
5029
def _ComputeDiskSize(disk_template, disks):
5030
  """Compute disk size requirements in the volume group
5031

5032
  """
5033
  # Required free disk space as a function of disk and swap space
5034
  req_size_dict = {
5035
    constants.DT_DISKLESS: None,
5036
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5037
    # 128 MB are added for drbd metadata for each disk
5038
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5039
    constants.DT_FILE: None,
5040
  }
5041

    
5042
  if disk_template not in req_size_dict:
5043
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5044
                                 " is unknown" %  disk_template)
5045

    
5046
  return req_size_dict[disk_template]
5047

    
5048

    
5049
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5050
  """Hypervisor parameter validation.
5051

5052
  This function abstract the hypervisor parameter validation to be
5053
  used in both instance create and instance modify.
5054

5055
  @type lu: L{LogicalUnit}
5056
  @param lu: the logical unit for which we check
5057
  @type nodenames: list
5058
  @param nodenames: the list of nodes on which we should check
5059
  @type hvname: string
5060
  @param hvname: the name of the hypervisor we should use
5061
  @type hvparams: dict
5062
  @param hvparams: the parameters which we need to check
5063
  @raise errors.OpPrereqError: if the parameters are not valid
5064

5065
  """
5066
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5067
                                                  hvname,
5068
                                                  hvparams)
5069
  for node in nodenames:
5070
    info = hvinfo[node]
5071
    if info.offline:
5072
      continue
5073
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5074

    
5075

    
5076
class LUCreateInstance(LogicalUnit):
5077
  """Create an instance.
5078

5079
  """
5080
  HPATH = "instance-add"
5081
  HTYPE = constants.HTYPE_INSTANCE
5082
  _OP_REQP = ["instance_name", "disks", "disk_template",
5083
              "mode", "start",
5084
              "wait_for_sync", "ip_check", "nics",
5085
              "hvparams", "beparams"]
5086
  REQ_BGL = False
5087

    
5088
  def _ExpandNode(self, node):
5089
    """Expands and checks one node name.
5090

5091
    """
5092
    node_full = self.cfg.ExpandNodeName(node)
5093
    if node_full is None:
5094
      raise errors.OpPrereqError("Unknown node %s" % node)
5095
    return node_full
5096

    
5097
  def ExpandNames(self):
5098
    """ExpandNames for CreateInstance.
5099

5100
    Figure out the right locks for instance creation.
5101

5102
    """
5103
    self.needed_locks = {}
5104

    
5105
    # set optional parameters to none if they don't exist
5106
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5107
      if not hasattr(self.op, attr):
5108
        setattr(self.op, attr, None)
5109

    
5110
    # cheap checks, mostly valid constants given
5111

    
5112
    # verify creation mode
5113
    if self.op.mode not in (constants.INSTANCE_CREATE,
5114
                            constants.INSTANCE_IMPORT):
5115
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5116
                                 self.op.mode)
5117

    
5118
    # disk template and mirror node verification
5119
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5120
      raise errors.OpPrereqError("Invalid disk template name")
5121

    
5122
    if self.op.hypervisor is None:
5123
      self.op.hypervisor = self.cfg.GetHypervisorType()
5124

    
5125
    cluster = self.cfg.GetClusterInfo()
5126
    enabled_hvs = cluster.enabled_hypervisors
5127
    if self.op.hypervisor not in enabled_hvs:
5128
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5129
                                 " cluster (%s)" % (self.op.hypervisor,
5130
                                  ",".join(enabled_hvs)))
5131

    
5132
    # check hypervisor parameter syntax (locally)
5133
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5134
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5135
                                  self.op.hvparams)
5136
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5137
    hv_type.CheckParameterSyntax(filled_hvp)
5138
    self.hv_full = filled_hvp
5139

    
5140
    # fill and remember the beparams dict
5141
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5142
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5143
                                    self.op.beparams)
5144

    
5145
    #### instance parameters check
5146

    
5147
    # instance name verification
5148
    hostname1 = utils.HostInfo(self.op.instance_name)
5149
    self.op.instance_name = instance_name = hostname1.name
5150

    
5151
    # this is just a preventive check, but someone might still add this
5152
    # instance in the meantime, and creation will fail at lock-add time
5153
    if instance_name in self.cfg.GetInstanceList():
5154
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5155
                                 instance_name)
5156

    
5157
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5158

    
5159
    # NIC buildup
5160
    self.nics = []
5161
    for idx, nic in enumerate(self.op.nics):
5162
      nic_mode_req = nic.get("mode", None)
5163
      nic_mode = nic_mode_req
5164
      if nic_mode is None:
5165
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5166

    
5167
      # in routed mode, for the first nic, the default ip is 'auto'
5168
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5169
        default_ip_mode = constants.VALUE_AUTO
5170
      else:
5171
        default_ip_mode = constants.VALUE_NONE
5172

    
5173
      # ip validity checks
5174
      ip = nic.get("ip", default_ip_mode)
5175
      if ip is None or ip.lower() == constants.VALUE_NONE:
5176
        nic_ip = None
5177
      elif ip.lower() == constants.VALUE_AUTO:
5178
        nic_ip = hostname1.ip
5179
      else:
5180
        if not utils.IsValidIP(ip):
5181
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5182
                                     " like a valid IP" % ip)
5183
        nic_ip = ip
5184

    
5185
      # TODO: check the ip for uniqueness !!
5186
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5187
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5188

    
5189
      # MAC address verification
5190
      mac = nic.get("mac", constants.VALUE_AUTO)
5191
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5192
        if not utils.IsValidMac(mac.lower()):
5193
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5194
                                     mac)
5195
      # bridge verification
5196
      bridge = nic.get("bridge", None)
5197
      link = nic.get("link", None)
5198
      if bridge and link:
5199
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5200
                                   " at the same time")
5201
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5202
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5203
      elif bridge:
5204
        link = bridge
5205

    
5206
      nicparams = {}
5207
      if nic_mode_req:
5208
        nicparams[constants.NIC_MODE] = nic_mode_req
5209
      if link:
5210
        nicparams[constants.NIC_LINK] = link
5211

    
5212
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5213
                                      nicparams)
5214
      objects.NIC.CheckParameterSyntax(check_params)
5215
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5216

    
5217
    # disk checks/pre-build
5218
    self.disks = []
5219
    for disk in self.op.disks:
5220
      mode = disk.get("mode", constants.DISK_RDWR)
5221
      if mode not in constants.DISK_ACCESS_SET:
5222
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5223
                                   mode)
5224
      size = disk.get("size", None)
5225
      if size is None:
5226
        raise errors.OpPrereqError("Missing disk size")
5227
      try:
5228
        size = int(size)
5229
      except ValueError:
5230
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5231
      self.disks.append({"size": size, "mode": mode})
5232

    
5233
    # used in CheckPrereq for ip ping check
5234
    self.check_ip = hostname1.ip
5235

    
5236
    # file storage checks
5237
    if (self.op.file_driver and
5238
        not self.op.file_driver in constants.FILE_DRIVER):
5239
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5240
                                 self.op.file_driver)
5241

    
5242
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5243
      raise errors.OpPrereqError("File storage directory path not absolute")
5244

    
5245
    ### Node/iallocator related checks
5246
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5247
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5248
                                 " node must be given")
5249

    
5250
    if self.op.iallocator:
5251
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5252
    else:
5253
      self.op.pnode = self._ExpandNode(self.op.pnode)
5254
      nodelist = [self.op.pnode]
5255
      if self.op.snode is not None:
5256
        self.op.snode = self._ExpandNode(self.op.snode)
5257
        nodelist.append(self.op.snode)
5258
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5259

    
5260
    # in case of import lock the source node too
5261
    if self.op.mode == constants.INSTANCE_IMPORT:
5262
      src_node = getattr(self.op, "src_node", None)
5263
      src_path = getattr(self.op, "src_path", None)
5264

    
5265
      if src_path is None:
5266
        self.op.src_path = src_path = self.op.instance_name
5267

    
5268
      if src_node is None:
5269
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5270
        self.op.src_node = None
5271
        if os.path.isabs(src_path):
5272
          raise errors.OpPrereqError("Importing an instance from an absolute"
5273
                                     " path requires a source node option.")
5274
      else:
5275
        self.op.src_node = src_node = self._ExpandNode(src_node)
5276
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5277
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5278
        if not os.path.isabs(src_path):
5279
          self.op.src_path = src_path = \
5280
            os.path.join(constants.EXPORT_DIR, src_path)
5281

    
5282
    else: # INSTANCE_CREATE
5283
      if getattr(self.op, "os_type", None) is None:
5284
        raise errors.OpPrereqError("No guest OS specified")
5285

    
5286
  def _RunAllocator(self):
5287
    """Run the allocator based on input opcode.
5288

5289
    """
5290
    nics = [n.ToDict() for n in self.nics]
5291
    ial = IAllocator(self.cfg, self.rpc,
5292
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5293
                     name=self.op.instance_name,
5294
                     disk_template=self.op.disk_template,
5295
                     tags=[],
5296
                     os=self.op.os_type,
5297
                     vcpus=self.be_full[constants.BE_VCPUS],
5298
                     mem_size=self.be_full[constants.BE_MEMORY],
5299
                     disks=self.disks,
5300
                     nics=nics,
5301
                     hypervisor=self.op.hypervisor,
5302
                     )
5303

    
5304
    ial.Run(self.op.iallocator)
5305

    
5306
    if not ial.success:
5307
      raise errors.OpPrereqError("Can't compute nodes using"
5308
                                 " iallocator '%s': %s" % (self.op.iallocator,
5309
                                                           ial.info))
5310
    if len(ial.nodes) != ial.required_nodes:
5311
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5312
                                 " of nodes (%s), required %s" %
5313
                                 (self.op.iallocator, len(ial.nodes),
5314
                                  ial.required_nodes))
5315
    self.op.pnode = ial.nodes[0]
5316
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5317
                 self.op.instance_name, self.op.iallocator,
5318
                 ", ".join(ial.nodes))
5319
    if ial.required_nodes == 2:
5320
      self.op.snode = ial.nodes[1]
5321

    
5322
  def BuildHooksEnv(self):
5323
    """Build hooks env.
5324

5325
    This runs on master, primary and secondary nodes of the instance.
5326

5327
    """
5328
    env = {
5329
      "ADD_MODE": self.op.mode,
5330
      }
5331
    if self.op.mode == constants.INSTANCE_IMPORT:
5332
      env["SRC_NODE"] = self.op.src_node
5333
      env["SRC_PATH"] = self.op.src_path
5334
      env["SRC_IMAGES"] = self.src_images
5335

    
5336
    env.update(_BuildInstanceHookEnv(
5337
      name=self.op.instance_name,
5338
      primary_node=self.op.pnode,
5339
      secondary_nodes=self.secondaries,
5340
      status=self.op.start,
5341
      os_type=self.op.os_type,
5342
      memory=self.be_full[constants.BE_MEMORY],
5343
      vcpus=self.be_full[constants.BE_VCPUS],
5344
      nics=_NICListToTuple(self, self.nics),
5345
      disk_template=self.op.disk_template,
5346
      disks=[(d["size"], d["mode"]) for d in self.disks],
5347
      bep=self.be_full,
5348
      hvp=self.hv_full,
5349
      hypervisor_name=self.op.hypervisor,
5350
    ))
5351

    
5352
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5353
          self.secondaries)
5354
    return env, nl, nl
5355

    
5356

    
5357
  def CheckPrereq(self):
5358
    """Check prerequisites.
5359

5360
    """
5361
    if (not self.cfg.GetVGName() and
5362
        self.op.disk_template not in constants.DTS_NOT_LVM):
5363
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5364
                                 " instances")
5365

    
5366
    if self.op.mode == constants.INSTANCE_IMPORT:
5367
      src_node = self.op.src_node
5368
      src_path = self.op.src_path
5369

    
5370
      if src_node is None:
5371
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5372
        exp_list = self.rpc.call_export_list(locked_nodes)
5373
        found = False
5374
        for node in exp_list:
5375
          if exp_list[node].fail_msg:
5376
            continue
5377
          if src_path in exp_list[node].payload:
5378
            found = True
5379
            self.op.src_node = src_node = node
5380
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5381
                                                       src_path)
5382
            break
5383
        if not found:
5384
          raise errors.OpPrereqError("No export found for relative path %s" %
5385
                                      src_path)
5386

    
5387
      _CheckNodeOnline(self, src_node)
5388
      result = self.rpc.call_export_info(src_node, src_path)
5389
      result.Raise("No export or invalid export found in dir %s" % src_path)
5390

    
5391
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5392
      if not export_info.has_section(constants.INISECT_EXP):
5393
        raise errors.ProgrammerError("Corrupted export config")
5394

    
5395
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5396
      if (int(ei_version) != constants.EXPORT_VERSION):
5397
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5398
                                   (ei_version, constants.EXPORT_VERSION))
5399

    
5400
      # Check that the new instance doesn't have less disks than the export
5401
      instance_disks = len(self.disks)
5402
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5403
      if instance_disks < export_disks:
5404
        raise errors.OpPrereqError("Not enough disks to import."
5405
                                   " (instance: %d, export: %d)" %
5406
                                   (instance_disks, export_disks))
5407

    
5408
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5409
      disk_images = []
5410
      for idx in range(export_disks):
5411
        option = 'disk%d_dump' % idx
5412
        if export_info.has_option(constants.INISECT_INS, option):
5413
          # FIXME: are the old os-es, disk sizes, etc. useful?
5414
          export_name = export_info.get(constants.INISECT_INS, option)
5415
          image = os.path.join(src_path, export_name)
5416
          disk_images.append(image)
5417
        else:
5418
          disk_images.append(False)
5419

    
5420
      self.src_images = disk_images
5421

    
5422
      old_name = export_info.get(constants.INISECT_INS, 'name')
5423
      # FIXME: int() here could throw a ValueError on broken exports
5424
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5425
      if self.op.instance_name == old_name:
5426
        for idx, nic in enumerate(self.nics):
5427
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5428
            nic_mac_ini = 'nic%d_mac' % idx
5429
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5430

    
5431
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5432
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5433
    if self.op.start and not self.op.ip_check:
5434
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5435
                                 " adding an instance in start mode")
5436

    
5437
    if self.op.ip_check:
5438
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5439
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5440
                                   (self.check_ip, self.op.instance_name))
5441

    
5442
    #### mac address generation
5443
    # By generating here the mac address both the allocator and the hooks get
5444
    # the real final mac address rather than the 'auto' or 'generate' value.
5445
    # There is a race condition between the generation and the instance object
5446
    # creation, which means that we know the mac is valid now, but we're not
5447
    # sure it will be when we actually add the instance. If things go bad
5448
    # adding the instance will abort because of a duplicate mac, and the
5449
    # creation job will fail.
5450
    for nic in self.nics:
5451
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5452
        nic.mac = self.cfg.GenerateMAC()
5453

    
5454
    #### allocator run
5455

    
5456
    if self.op.iallocator is not None:
5457
      self._RunAllocator()
5458

    
5459
    #### node related checks
5460

    
5461
    # check primary node
5462
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5463
    assert self.pnode is not None, \
5464
      "Cannot retrieve locked node %s" % self.op.pnode
5465
    if pnode.offline:
5466
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5467
                                 pnode.name)
5468
    if pnode.drained:
5469
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5470
                                 pnode.name)
5471

    
5472
    self.secondaries = []
5473

    
5474
    # mirror node verification
5475
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5476
      if self.op.snode is None:
5477
        raise errors.OpPrereqError("The networked disk templates need"
5478
                                   " a mirror node")
5479
      if self.op.snode == pnode.name:
5480
        raise errors.OpPrereqError("The secondary node cannot be"
5481
                                   " the primary node.")
5482
      _CheckNodeOnline(self, self.op.snode)
5483
      _CheckNodeNotDrained(self, self.op.snode)
5484
      self.secondaries.append(self.op.snode)
5485

    
5486
    nodenames = [pnode.name] + self.secondaries
5487

    
5488
    req_size = _ComputeDiskSize(self.op.disk_template,
5489
                                self.disks)
5490

    
5491
    # Check lv size requirements
5492
    if req_size is not None:
5493
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5494
                                         self.op.hypervisor)
5495
      for node in nodenames:
5496
        info = nodeinfo[node]
5497
        info.Raise("Cannot get current information from node %s" % node)
5498
        info = info.payload
5499
        vg_free = info.get('vg_free', None)
5500
        if not isinstance(vg_free, int):
5501
          raise errors.OpPrereqError("Can't compute free disk space on"
5502
                                     " node %s" % node)
5503
        if req_size > vg_free:
5504
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5505
                                     " %d MB available, %d MB required" %
5506
                                     (node, vg_free, req_size))
5507

    
5508
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5509

    
5510
    # os verification
5511
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5512
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5513
                 (self.op.os_type, pnode.name), prereq=True)
5514

    
5515
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5516

    
5517
    # memory check on primary node
5518
    if self.op.start:
5519
      _CheckNodeFreeMemory(self, self.pnode.name,
5520
                           "creating instance %s" % self.op.instance_name,
5521
                           self.be_full[constants.BE_MEMORY],
5522
                           self.op.hypervisor)
5523

    
5524
    self.dry_run_result = list(nodenames)
5525

    
5526
  def Exec(self, feedback_fn):
5527
    """Create and add the instance to the cluster.
5528

5529
    """
5530
    instance = self.op.instance_name
5531
    pnode_name = self.pnode.name
5532

    
5533
    ht_kind = self.op.hypervisor
5534
    if ht_kind in constants.HTS_REQ_PORT:
5535
      network_port = self.cfg.AllocatePort()
5536
    else:
5537
      network_port = None
5538

    
5539
    ##if self.op.vnc_bind_address is None:
5540
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5541

    
5542
    # this is needed because os.path.join does not accept None arguments
5543
    if self.op.file_storage_dir is None:
5544
      string_file_storage_dir = ""
5545
    else:
5546
      string_file_storage_dir = self.op.file_storage_dir
5547

    
5548
    # build the full file storage dir path
5549
    file_storage_dir = os.path.normpath(os.path.join(
5550
                                        self.cfg.GetFileStorageDir(),
5551
                                        string_file_storage_dir, instance))
5552

    
5553

    
5554
    disks = _GenerateDiskTemplate(self,
5555
                                  self.op.disk_template,
5556
                                  instance, pnode_name,
5557
                                  self.secondaries,
5558
                                  self.disks,
5559
                                  file_storage_dir,
5560
                                  self.op.file_driver,
5561
                                  0)
5562

    
5563
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5564
                            primary_node=pnode_name,
5565
                            nics=self.nics, disks=disks,
5566
                            disk_template=self.op.disk_template,
5567
                            admin_up=False,
5568
                            network_port=network_port,
5569
                            beparams=self.op.beparams,
5570
                            hvparams=self.op.hvparams,
5571
                            hypervisor=self.op.hypervisor,
5572
                            )
5573

    
5574
    feedback_fn("* creating instance disks...")
5575
    try:
5576
      _CreateDisks(self, iobj)
5577
    except errors.OpExecError:
5578
      self.LogWarning("Device creation failed, reverting...")
5579
      try:
5580
        _RemoveDisks(self, iobj)
5581
      finally:
5582
        self.cfg.ReleaseDRBDMinors(instance)
5583
        raise
5584

    
5585
    feedback_fn("adding instance %s to cluster config" % instance)
5586

    
5587
    self.cfg.AddInstance(iobj)
5588
    # Declare that we don't want to remove the instance lock anymore, as we've
5589
    # added the instance to the config
5590
    del self.remove_locks[locking.LEVEL_INSTANCE]
5591
    # Unlock all the nodes
5592
    if self.op.mode == constants.INSTANCE_IMPORT:
5593
      nodes_keep = [self.op.src_node]
5594
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5595
                       if node != self.op.src_node]
5596
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5597
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5598
    else:
5599
      self.context.glm.release(locking.LEVEL_NODE)
5600
      del self.acquired_locks[locking.LEVEL_NODE]
5601

    
5602
    if self.op.wait_for_sync:
5603
      disk_abort = not _WaitForSync(self, iobj)
5604
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5605
      # make sure the disks are not degraded (still sync-ing is ok)
5606
      time.sleep(15)
5607
      feedback_fn("* checking mirrors status")
5608
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5609
    else:
5610
      disk_abort = False
5611

    
5612
    if disk_abort:
5613
      _RemoveDisks(self, iobj)
5614
      self.cfg.RemoveInstance(iobj.name)
5615
      # Make sure the instance lock gets removed
5616
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5617
      raise errors.OpExecError("There are some degraded disks for"
5618
                               " this instance")
5619

    
5620
    feedback_fn("creating os for instance %s on node %s" %
5621
                (instance, pnode_name))
5622

    
5623
    if iobj.disk_template != constants.DT_DISKLESS:
5624
      if self.op.mode == constants.INSTANCE_CREATE:
5625
        feedback_fn("* running the instance OS create scripts...")
5626
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5627
        result.Raise("Could not add os for instance %s"
5628
                     " on node %s" % (instance, pnode_name))
5629

    
5630
      elif self.op.mode == constants.INSTANCE_IMPORT:
5631
        feedback_fn("* running the instance OS import scripts...")
5632
        src_node = self.op.src_node
5633
        src_images = self.src_images
5634
        cluster_name = self.cfg.GetClusterName()
5635
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5636
                                                         src_node, src_images,
5637
                                                         cluster_name)
5638
        msg = import_result.fail_msg
5639
        if msg:
5640
          self.LogWarning("Error while importing the disk images for instance"
5641
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5642
      else:
5643
        # also checked in the prereq part
5644
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5645
                                     % self.op.mode)
5646

    
5647
    if self.op.start:
5648
      iobj.admin_up = True
5649
      self.cfg.Update(iobj)
5650
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5651
      feedback_fn("* starting instance...")
5652
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5653
      result.Raise("Could not start instance")
5654

    
5655
    return list(iobj.all_nodes)
5656

    
5657

    
5658
class LUConnectConsole(NoHooksLU):
5659
  """Connect to an instance's console.
5660

5661
  This is somewhat special in that it returns the command line that
5662
  you need to run on the master node in order to connect to the
5663
  console.
5664

5665
  """
5666
  _OP_REQP = ["instance_name"]
5667
  REQ_BGL = False
5668

    
5669
  def ExpandNames(self):
5670
    self._ExpandAndLockInstance()
5671

    
5672
  def CheckPrereq(self):
5673
    """Check prerequisites.
5674

5675
    This checks that the instance is in the cluster.
5676

5677
    """
5678
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5679
    assert self.instance is not None, \
5680
      "Cannot retrieve locked instance %s" % self.op.instance_name
5681
    _CheckNodeOnline(self, self.instance.primary_node)
5682

    
5683
  def Exec(self, feedback_fn):
5684
    """Connect to the console of an instance
5685

5686
    """
5687
    instance = self.instance
5688
    node = instance.primary_node
5689

    
5690
    node_insts = self.rpc.call_instance_list([node],
5691
                                             [instance.hypervisor])[node]
5692
    node_insts.Raise("Can't get node information from %s" % node)
5693

    
5694
    if instance.name not in node_insts.payload:
5695
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5696

    
5697
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5698

    
5699
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5700
    cluster = self.cfg.GetClusterInfo()
5701
    # beparams and hvparams are passed separately, to avoid editing the
5702
    # instance and then saving the defaults in the instance itself.
5703
    hvparams = cluster.FillHV(instance)
5704
    beparams = cluster.FillBE(instance)
5705
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5706

    
5707
    # build ssh cmdline
5708
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5709

    
5710

    
5711
class LUReplaceDisks(LogicalUnit):
5712
  """Replace the disks of an instance.
5713

5714
  """
5715
  HPATH = "mirrors-replace"
5716
  HTYPE = constants.HTYPE_INSTANCE
5717
  _OP_REQP = ["instance_name", "mode", "disks"]
5718
  REQ_BGL = False
5719

    
5720
  def CheckArguments(self):
5721
    if not hasattr(self.op, "remote_node"):
5722
      self.op.remote_node = None
5723
    if not hasattr(self.op, "iallocator"):
5724
      self.op.iallocator = None
5725

    
5726
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5727
                                  self.op.iallocator)
5728

    
5729
  def ExpandNames(self):
5730
    self._ExpandAndLockInstance()
5731

    
5732
    if self.op.iallocator is not None:
5733
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5734

    
5735
    elif self.op.remote_node is not None:
5736
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5737
      if remote_node is None:
5738
        raise errors.OpPrereqError("Node '%s' not known" %
5739
                                   self.op.remote_node)
5740

    
5741
      self.op.remote_node = remote_node
5742

    
5743
      # Warning: do not remove the locking of the new secondary here
5744
      # unless DRBD8.AddChildren is changed to work in parallel;
5745
      # currently it doesn't since parallel invocations of
5746
      # FindUnusedMinor will conflict
5747
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5748
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5749

    
5750
    else:
5751
      self.needed_locks[locking.LEVEL_NODE] = []
5752
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5753

    
5754
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5755
                                   self.op.iallocator, self.op.remote_node,
5756
                                   self.op.disks)
5757

    
5758
    self.tasklets = [self.replacer]
5759

    
5760
  def DeclareLocks(self, level):
5761
    # If we're not already locking all nodes in the set we have to declare the
5762
    # instance's primary/secondary nodes.
5763
    if (level == locking.LEVEL_NODE and
5764
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5765
      self._LockInstancesNodes()
5766

    
5767
  def BuildHooksEnv(self):
5768
    """Build hooks env.
5769

5770
    This runs on the master, the primary and all the secondaries.
5771

5772
    """
5773
    instance = self.replacer.instance
5774
    env = {
5775
      "MODE": self.op.mode,
5776
      "NEW_SECONDARY": self.op.remote_node,
5777
      "OLD_SECONDARY": instance.secondary_nodes[0],
5778
      }
5779
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5780
    nl = [
5781
      self.cfg.GetMasterNode(),
5782
      instance.primary_node,
5783
      ]
5784
    if self.op.remote_node is not None:
5785
      nl.append(self.op.remote_node)
5786
    return env, nl, nl
5787

    
5788

    
5789
class LUEvacuateNode(LogicalUnit):
5790
  """Relocate the secondary instances from a node.
5791

5792
  """
5793
  HPATH = "node-evacuate"
5794
  HTYPE = constants.HTYPE_NODE
5795
  _OP_REQP = ["node_name"]
5796
  REQ_BGL = False
5797

    
5798
  def CheckArguments(self):
5799
    if not hasattr(self.op, "remote_node"):
5800
      self.op.remote_node = None
5801
    if not hasattr(self.op, "iallocator"):
5802
      self.op.iallocator = None
5803

    
5804
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
5805
                                  self.op.remote_node,
5806
                                  self.op.iallocator)
5807

    
5808
  def ExpandNames(self):
5809
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
5810
    if self.op.node_name is None:
5811
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
5812

    
5813
    self.needed_locks = {}
5814

    
5815
    # Declare node locks
5816
    if self.op.iallocator is not None:
5817
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5818

    
5819
    elif self.op.remote_node is not None:
5820
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5821
      if remote_node is None:
5822
        raise errors.OpPrereqError("Node '%s' not known" %
5823
                                   self.op.remote_node)
5824

    
5825
      self.op.remote_node = remote_node
5826

    
5827
      # Warning: do not remove the locking of the new secondary here
5828
      # unless DRBD8.AddChildren is changed to work in parallel;
5829
      # currently it doesn't since parallel invocations of
5830
      # FindUnusedMinor will conflict
5831
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5832
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5833

    
5834
    else:
5835
      raise errors.OpPrereqError("Invalid parameters")
5836

    
5837
    # Create tasklets for replacing disks for all secondary instances on this
5838
    # node
5839
    names = []
5840
    tasklets = []
5841

    
5842
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
5843
      logging.debug("Replacing disks for instance %s", inst.name)
5844
      names.append(inst.name)
5845

    
5846
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
5847
                                self.op.iallocator, self.op.remote_node, [])
5848
      tasklets.append(replacer)
5849

    
5850
    self.tasklets = tasklets
5851
    self.instance_names = names
5852

    
5853
    # Declare instance locks
5854
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
5855

    
5856
  def DeclareLocks(self, level):
5857
    # If we're not already locking all nodes in the set we have to declare the
5858
    # instance's primary/secondary nodes.
5859
    if (level == locking.LEVEL_NODE and
5860
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5861
      self._LockInstancesNodes()
5862

    
5863
  def BuildHooksEnv(self):
5864
    """Build hooks env.
5865

5866
    This runs on the master, the primary and all the secondaries.
5867

5868
    """
5869
    env = {
5870
      "NODE_NAME": self.op.node_name,
5871
      }
5872

    
5873
    nl = [self.cfg.GetMasterNode()]
5874

    
5875
    if self.op.remote_node is not None:
5876
      env["NEW_SECONDARY"] = self.op.remote_node
5877
      nl.append(self.op.remote_node)
5878

    
5879
    return (env, nl, nl)
5880

    
5881

    
5882
class TLReplaceDisks(Tasklet):
5883
  """Replaces disks for an instance.
5884

5885
  Note: Locking is not within the scope of this class.
5886

5887
  """
5888
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
5889
               disks):
5890
    """Initializes this class.
5891

5892
    """
5893
    Tasklet.__init__(self, lu)
5894

    
5895
    # Parameters
5896
    self.instance_name = instance_name
5897
    self.mode = mode
5898
    self.iallocator_name = iallocator_name
5899
    self.remote_node = remote_node
5900
    self.disks = disks
5901

    
5902
    # Runtime data
5903
    self.instance = None
5904
    self.new_node = None
5905
    self.target_node = None
5906
    self.other_node = None
5907
    self.remote_node_info = None
5908
    self.node_secondary_ip = None
5909

    
5910
  @staticmethod
5911
  def CheckArguments(mode, remote_node, iallocator):
5912
    """Helper function for users of this class.
5913

5914
    """
5915
    # check for valid parameter combination
5916
    if mode == constants.REPLACE_DISK_CHG:
5917
      if remote_node is None and iallocator is None:
5918
        raise errors.OpPrereqError("When changing the secondary either an"
5919
                                   " iallocator script must be used or the"
5920
                                   " new node given")
5921

    
5922
      if remote_node is not None and iallocator is not None:
5923
        raise errors.OpPrereqError("Give either the iallocator or the new"
5924
                                   " secondary, not both")
5925

    
5926
    elif remote_node is not None or iallocator is not None:
5927
      # Not replacing the secondary
5928
      raise errors.OpPrereqError("The iallocator and new node options can"
5929
                                 " only be used when changing the"
5930
                                 " secondary node")
5931

    
5932
  @staticmethod
5933
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
5934
    """Compute a new secondary node using an IAllocator.
5935

5936
    """
5937
    ial = IAllocator(lu.cfg, lu.rpc,
5938
                     mode=constants.IALLOCATOR_MODE_RELOC,
5939
                     name=instance_name,
5940
                     relocate_from=relocate_from)
5941

    
5942
    ial.Run(iallocator_name)
5943

    
5944
    if not ial.success:
5945
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
5946
                                 " %s" % (iallocator_name, ial.info))
5947

    
5948
    if len(ial.nodes) != ial.required_nodes:
5949
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5950
                                 " of nodes (%s), required %s" %
5951
                                 (len(ial.nodes), ial.required_nodes))
5952

    
5953
    remote_node_name = ial.nodes[0]
5954

    
5955
    lu.LogInfo("Selected new secondary for instance '%s': %s",
5956
               instance_name, remote_node_name)
5957

    
5958
    return remote_node_name
5959

    
5960
  def _FindFaultyDisks(self, node_name):
5961
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
5962
                                    node_name, True)
5963

    
5964
  def CheckPrereq(self):
5965
    """Check prerequisites.
5966

5967
    This checks that the instance is in the cluster.
5968

5969
    """
5970
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
5971
    assert self.instance is not None, \
5972
      "Cannot retrieve locked instance %s" % self.instance_name
5973

    
5974
    if self.instance.disk_template != constants.DT_DRBD8:
5975
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
5976
                                 " instances")
5977

    
5978
    if len(self.instance.secondary_nodes) != 1:
5979
      raise errors.OpPrereqError("The instance has a strange layout,"
5980
                                 " expected one secondary but found %d" %
5981
                                 len(self.instance.secondary_nodes))
5982

    
5983
    secondary_node = self.instance.secondary_nodes[0]
5984

    
5985
    if self.iallocator_name is None:
5986
      remote_node = self.remote_node
5987
    else:
5988
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
5989
                                       self.instance.name, secondary_node)
5990

    
5991
    if remote_node is not None:
5992
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
5993
      assert self.remote_node_info is not None, \
5994
        "Cannot retrieve locked node %s" % remote_node
5995
    else:
5996
      self.remote_node_info = None
5997

    
5998
    if remote_node == self.instance.primary_node:
5999
      raise errors.OpPrereqError("The specified node is the primary node of"
6000
                                 " the instance.")
6001

    
6002
    if remote_node == secondary_node:
6003
      raise errors.OpPrereqError("The specified node is already the"
6004
                                 " secondary node of the instance.")
6005

    
6006
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6007
                                    constants.REPLACE_DISK_CHG):
6008
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
6009

    
6010
    if self.mode == constants.REPLACE_DISK_AUTO:
6011
      faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
6012
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6013

    
6014
      if faulty_primary and faulty_secondary:
6015
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6016
                                   " one node and can not be repaired"
6017
                                   " automatically" % self.instance_name)
6018

    
6019
      if faulty_primary:
6020
        self.disks = faulty_primary
6021
        self.target_node = self.instance.primary_node
6022
        self.other_node = secondary_node
6023
        check_nodes = [self.target_node, self.other_node]
6024
      elif faulty_secondary:
6025
        self.disks = faulty_secondary
6026
        self.target_node = secondary_node
6027
        self.other_node = self.instance.primary_node
6028
        check_nodes = [self.target_node, self.other_node]
6029
      else:
6030
        self.disks = []
6031
        check_nodes = []
6032

    
6033
    else:
6034
      # Non-automatic modes
6035
      if self.mode == constants.REPLACE_DISK_PRI:
6036
        self.target_node = self.instance.primary_node
6037
        self.other_node = secondary_node
6038
        check_nodes = [self.target_node, self.other_node]
6039

    
6040
      elif self.mode == constants.REPLACE_DISK_SEC:
6041
        self.target_node = secondary_node
6042
        self.other_node = self.instance.primary_node
6043
        check_nodes = [self.target_node, self.other_node]
6044

    
6045
      elif self.mode == constants.REPLACE_DISK_CHG:
6046
        self.new_node = remote_node
6047
        self.other_node = self.instance.primary_node
6048
        self.target_node = secondary_node
6049
        check_nodes = [self.new_node, self.other_node]
6050

    
6051
        _CheckNodeNotDrained(self.lu, remote_node)
6052

    
6053
      else:
6054
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6055
                                     self.mode)
6056

    
6057
      # If not specified all disks should be replaced
6058
      if not self.disks:
6059
        self.disks = range(len(self.instance.disks))
6060

    
6061
    for node in check_nodes:
6062
      _CheckNodeOnline(self.lu, node)
6063

    
6064
    # Check whether disks are valid
6065
    for disk_idx in self.disks:
6066
      self.instance.FindDisk(disk_idx)
6067

    
6068
    # Get secondary node IP addresses
6069
    node_2nd_ip = {}
6070

    
6071
    for node_name in [self.target_node, self.other_node, self.new_node]:
6072
      if node_name is not None:
6073
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6074

    
6075
    self.node_secondary_ip = node_2nd_ip
6076

    
6077
  def Exec(self, feedback_fn):
6078
    """Execute disk replacement.
6079

6080
    This dispatches the disk replacement to the appropriate handler.
6081

6082
    """
6083
    if not self.disks:
6084
      feedback_fn("No disks need replacement")
6085
      return
6086

    
6087
    feedback_fn("Replacing disk(s) %s for %s" %
6088
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6089

    
6090
    activate_disks = (not self.instance.admin_up)
6091

    
6092
    # Activate the instance disks if we're replacing them on a down instance
6093
    if activate_disks:
6094
      _StartInstanceDisks(self.lu, self.instance, True)
6095

    
6096
    try:
6097
      # Should we replace the secondary node?
6098
      if self.new_node is not None:
6099
        return self._ExecDrbd8Secondary()
6100
      else:
6101
        return self._ExecDrbd8DiskOnly()
6102

    
6103
    finally:
6104
      # Deactivate the instance disks if we're replacing them on a down instance
6105
      if activate_disks:
6106
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6107

    
6108
  def _CheckVolumeGroup(self, nodes):
6109
    self.lu.LogInfo("Checking volume groups")
6110

    
6111
    vgname = self.cfg.GetVGName()
6112

    
6113
    # Make sure volume group exists on all involved nodes
6114
    results = self.rpc.call_vg_list(nodes)
6115
    if not results:
6116
      raise errors.OpExecError("Can't list volume groups on the nodes")
6117

    
6118
    for node in nodes:
6119
      res = results[node]
6120
      res.Raise("Error checking node %s" % node)
6121
      if vgname not in res.payload:
6122
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6123
                                 (vgname, node))
6124

    
6125
  def _CheckDisksExistence(self, nodes):
6126
    # Check disk existence
6127
    for idx, dev in enumerate(self.instance.disks):
6128
      if idx not in self.disks:
6129
        continue
6130

    
6131
      for node in nodes:
6132
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6133
        self.cfg.SetDiskID(dev, node)
6134

    
6135
        result = self.rpc.call_blockdev_find(node, dev)
6136

    
6137
        msg = result.fail_msg
6138
        if msg or not result.payload:
6139
          if not msg:
6140
            msg = "disk not found"
6141
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6142
                                   (idx, node, msg))
6143

    
6144
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6145
    for idx, dev in enumerate(self.instance.disks):
6146
      if idx not in self.disks:
6147
        continue
6148

    
6149
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6150
                      (idx, node_name))
6151

    
6152
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6153
                                   ldisk=ldisk):
6154
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6155
                                 " replace disks for instance %s" %
6156
                                 (node_name, self.instance.name))
6157

    
6158
  def _CreateNewStorage(self, node_name):
6159
    vgname = self.cfg.GetVGName()
6160
    iv_names = {}
6161

    
6162
    for idx, dev in enumerate(self.instance.disks):
6163
      if idx not in self.disks:
6164
        continue
6165

    
6166
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6167

    
6168
      self.cfg.SetDiskID(dev, node_name)
6169

    
6170
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6171
      names = _GenerateUniqueNames(self.lu, lv_names)
6172

    
6173
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6174
                             logical_id=(vgname, names[0]))
6175
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6176
                             logical_id=(vgname, names[1]))
6177

    
6178
      new_lvs = [lv_data, lv_meta]
6179
      old_lvs = dev.children
6180
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6181

    
6182
      # we pass force_create=True to force the LVM creation
6183
      for new_lv in new_lvs:
6184
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6185
                        _GetInstanceInfoText(self.instance), False)
6186

    
6187
    return iv_names
6188

    
6189
  def _CheckDevices(self, node_name, iv_names):
6190
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6191
      self.cfg.SetDiskID(dev, node_name)
6192

    
6193
      result = self.rpc.call_blockdev_find(node_name, dev)
6194

    
6195
      msg = result.fail_msg
6196
      if msg or not result.payload:
6197
        if not msg:
6198
          msg = "disk not found"
6199
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6200
                                 (name, msg))
6201

    
6202
      if result.payload.is_degraded:
6203
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6204

    
6205
  def _RemoveOldStorage(self, node_name, iv_names):
6206
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6207
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6208

    
6209
      for lv in old_lvs:
6210
        self.cfg.SetDiskID(lv, node_name)
6211

    
6212
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6213
        if msg:
6214
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6215
                             hint="remove unused LVs manually")
6216

    
6217
  def _ExecDrbd8DiskOnly(self):
6218
    """Replace a disk on the primary or secondary for DRBD 8.
6219

6220
    The algorithm for replace is quite complicated:
6221

6222
      1. for each disk to be replaced:
6223

6224
        1. create new LVs on the target node with unique names
6225
        1. detach old LVs from the drbd device
6226
        1. rename old LVs to name_replaced.<time_t>
6227
        1. rename new LVs to old LVs
6228
        1. attach the new LVs (with the old names now) to the drbd device
6229

6230
      1. wait for sync across all devices
6231

6232
      1. for each modified disk:
6233

6234
        1. remove old LVs (which have the name name_replaces.<time_t>)
6235

6236
    Failures are not very well handled.
6237

6238
    """
6239
    steps_total = 6
6240

    
6241
    # Step: check device activation
6242
    self.lu.LogStep(1, steps_total, "Check device existence")
6243
    self._CheckDisksExistence([self.other_node, self.target_node])
6244
    self._CheckVolumeGroup([self.target_node, self.other_node])
6245

    
6246
    # Step: check other node consistency
6247
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6248
    self._CheckDisksConsistency(self.other_node,
6249
                                self.other_node == self.instance.primary_node,
6250
                                False)
6251

    
6252
    # Step: create new storage
6253
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6254
    iv_names = self._CreateNewStorage(self.target_node)
6255

    
6256
    # Step: for each lv, detach+rename*2+attach
6257
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6258
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6259
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6260

    
6261
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
6262
      result.Raise("Can't detach drbd from local storage on node"
6263
                   " %s for device %s" % (self.target_node, dev.iv_name))
6264
      #dev.children = []
6265
      #cfg.Update(instance)
6266

    
6267
      # ok, we created the new LVs, so now we know we have the needed
6268
      # storage; as such, we proceed on the target node to rename
6269
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6270
      # using the assumption that logical_id == physical_id (which in
6271
      # turn is the unique_id on that node)
6272

    
6273
      # FIXME(iustin): use a better name for the replaced LVs
6274
      temp_suffix = int(time.time())
6275
      ren_fn = lambda d, suff: (d.physical_id[0],
6276
                                d.physical_id[1] + "_replaced-%s" % suff)
6277

    
6278
      # Build the rename list based on what LVs exist on the node
6279
      rename_old_to_new = []
6280
      for to_ren in old_lvs:
6281
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6282
        if not result.fail_msg and result.payload:
6283
          # device exists
6284
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6285

    
6286
      self.lu.LogInfo("Renaming the old LVs on the target node")
6287
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
6288
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6289

    
6290
      # Now we rename the new LVs to the old LVs
6291
      self.lu.LogInfo("Renaming the new LVs on the target node")
6292
      rename_new_to_old = [(new, old.physical_id)
6293
                           for old, new in zip(old_lvs, new_lvs)]
6294
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
6295
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6296

    
6297
      for old, new in zip(old_lvs, new_lvs):
6298
        new.logical_id = old.logical_id
6299
        self.cfg.SetDiskID(new, self.target_node)
6300

    
6301
      for disk in old_lvs:
6302
        disk.logical_id = ren_fn(disk, temp_suffix)
6303
        self.cfg.SetDiskID(disk, self.target_node)
6304

    
6305
      # Now that the new lvs have the old name, we can add them to the device
6306
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6307
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
6308
      msg = result.fail_msg
6309
      if msg:
6310
        for new_lv in new_lvs:
6311
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
6312
          if msg2:
6313
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6314
                               hint=("cleanup manually the unused logical"
6315
                                     "volumes"))
6316
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6317

    
6318
      dev.children = new_lvs
6319

    
6320
      self.cfg.Update(self.instance)
6321

    
6322
    # Wait for sync
6323
    # This can fail as the old devices are degraded and _WaitForSync
6324
    # does a combined result over all disks, so we don't check its return value
6325
    self.lu.LogStep(5, steps_total, "Sync devices")
6326
    _WaitForSync(self.lu, self.instance, unlock=True)
6327

    
6328
    # Check all devices manually
6329
    self._CheckDevices(self.instance.primary_node, iv_names)
6330

    
6331
    # Step: remove old storage
6332
    self.lu.LogStep(6, steps_total, "Removing old storage")
6333
    self._RemoveOldStorage(self.target_node, iv_names)
6334

    
6335
  def _ExecDrbd8Secondary(self):
6336
    """Replace the secondary node for DRBD 8.
6337

6338
    The algorithm for replace is quite complicated:
6339
      - for all disks of the instance:
6340
        - create new LVs on the new node with same names
6341
        - shutdown the drbd device on the old secondary
6342
        - disconnect the drbd network on the primary
6343
        - create the drbd device on the new secondary
6344
        - network attach the drbd on the primary, using an artifice:
6345
          the drbd code for Attach() will connect to the network if it
6346
          finds a device which is connected to the good local disks but
6347
          not network enabled
6348
      - wait for sync across all devices
6349
      - remove all disks from the old secondary
6350

6351
    Failures are not very well handled.
6352

6353
    """
6354
    steps_total = 6
6355

    
6356
    # Step: check device activation
6357
    self.lu.LogStep(1, steps_total, "Check device existence")
6358
    self._CheckDisksExistence([self.instance.primary_node])
6359
    self._CheckVolumeGroup([self.instance.primary_node])
6360

    
6361
    # Step: check other node consistency
6362
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6363
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6364

    
6365
    # Step: create new storage
6366
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6367
    for idx, dev in enumerate(self.instance.disks):
6368
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6369
                      (self.new_node, idx))
6370
      # we pass force_create=True to force LVM creation
6371
      for new_lv in dev.children:
6372
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6373
                        _GetInstanceInfoText(self.instance), False)
6374

    
6375
    # Step 4: dbrd minors and drbd setups changes
6376
    # after this, we must manually remove the drbd minors on both the
6377
    # error and the success paths
6378
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6379
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
6380
                                        self.instance.name)
6381
    logging.debug("Allocated minors %r" % (minors,))
6382

    
6383
    iv_names = {}
6384
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6385
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
6386
      # create new devices on new_node; note that we create two IDs:
6387
      # one without port, so the drbd will be activated without
6388
      # networking information on the new node at this stage, and one
6389
      # with network, for the latter activation in step 4
6390
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6391
      if self.instance.primary_node == o_node1:
6392
        p_minor = o_minor1
6393
      else:
6394
        p_minor = o_minor2
6395

    
6396
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
6397
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
6398

    
6399
      iv_names[idx] = (dev, dev.children, new_net_id)
6400
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6401
                    new_net_id)
6402
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6403
                              logical_id=new_alone_id,
6404
                              children=dev.children,
6405
                              size=dev.size)
6406
      try:
6407
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6408
                              _GetInstanceInfoText(self.instance), False)
6409
      except errors.GenericError:
6410
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6411
        raise
6412

    
6413
    # We have new devices, shutdown the drbd on the old secondary
6414
    for idx, dev in enumerate(self.instance.disks):
6415
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6416
      self.cfg.SetDiskID(dev, self.target_node)
6417
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6418
      if msg:
6419
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6420
                           "node: %s" % (idx, msg),
6421
                           hint=("Please cleanup this device manually as"
6422
                                 " soon as possible"))
6423

    
6424
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6425
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
6426
                                               self.instance.disks)[self.instance.primary_node]
6427

    
6428
    msg = result.fail_msg
6429
    if msg:
6430
      # detaches didn't succeed (unlikely)
6431
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6432
      raise errors.OpExecError("Can't detach the disks from the network on"
6433
                               " old node: %s" % (msg,))
6434

    
6435
    # if we managed to detach at least one, we update all the disks of
6436
    # the instance to point to the new secondary
6437
    self.lu.LogInfo("Updating instance configuration")
6438
    for dev, _, new_logical_id in iv_names.itervalues():
6439
      dev.logical_id = new_logical_id
6440
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6441

    
6442
    self.cfg.Update(self.instance)
6443

    
6444
    # and now perform the drbd attach
6445
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6446
                    " (standalone => connected)")
6447
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
6448
                                           self.instance.disks, self.instance.name,
6449
                                           False)
6450
    for to_node, to_result in result.items():
6451
      msg = to_result.fail_msg
6452
      if msg:
6453
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
6454
                           hint=("please do a gnt-instance info to see the"
6455
                                 " status of disks"))
6456

    
6457
    # Wait for sync
6458
    # This can fail as the old devices are degraded and _WaitForSync
6459
    # does a combined result over all disks, so we don't check its return value
6460
    self.lu.LogStep(5, steps_total, "Sync devices")
6461
    _WaitForSync(self.lu, self.instance, unlock=True)
6462

    
6463
    # Check all devices manually
6464
    self._CheckDevices(self.instance.primary_node, iv_names)
6465

    
6466
    # Step: remove old storage
6467
    self.lu.LogStep(6, steps_total, "Removing old storage")
6468
    self._RemoveOldStorage(self.target_node, iv_names)
6469

    
6470

    
6471
class LURepairNodeStorage(NoHooksLU):
6472
  """Repairs the volume group on a node.
6473

6474
  """
6475
  _OP_REQP = ["node_name"]
6476
  REQ_BGL = False
6477

    
6478
  def CheckArguments(self):
6479
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6480
    if node_name is None:
6481
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6482

    
6483
    self.op.node_name = node_name
6484

    
6485
  def ExpandNames(self):
6486
    self.needed_locks = {
6487
      locking.LEVEL_NODE: [self.op.node_name],
6488
      }
6489

    
6490
  def _CheckFaultyDisks(self, instance, node_name):
6491
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6492
                                node_name, True):
6493
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6494
                                 " node '%s'" % (inst.name, node_name))
6495

    
6496
  def CheckPrereq(self):
6497
    """Check prerequisites.
6498

6499
    """
6500
    storage_type = self.op.storage_type
6501

    
6502
    if (constants.SO_FIX_CONSISTENCY not in
6503
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6504
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6505
                                 " repaired" % storage_type)
6506

    
6507
    # Check whether any instance on this node has faulty disks
6508
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6509
      check_nodes = set(inst.all_nodes)
6510
      check_nodes.discard(self.op.node_name)
6511
      for inst_node_name in check_nodes:
6512
        self._CheckFaultyDisks(inst, inst_node_name)
6513

    
6514
  def Exec(self, feedback_fn):
6515
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6516
                (self.op.name, self.op.node_name))
6517

    
6518
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6519
    result = self.rpc.call_storage_execute(self.op.node_name,
6520
                                           self.op.storage_type, st_args,
6521
                                           self.op.name,
6522
                                           constants.SO_FIX_CONSISTENCY)
6523
    result.Raise("Failed to repair storage unit '%s' on %s" %
6524
                 (self.op.name, self.op.node_name))
6525

    
6526

    
6527
class LUGrowDisk(LogicalUnit):
6528
  """Grow a disk of an instance.
6529

6530
  """
6531
  HPATH = "disk-grow"
6532
  HTYPE = constants.HTYPE_INSTANCE
6533
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6534
  REQ_BGL = False
6535

    
6536
  def ExpandNames(self):
6537
    self._ExpandAndLockInstance()
6538
    self.needed_locks[locking.LEVEL_NODE] = []
6539
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6540

    
6541
  def DeclareLocks(self, level):
6542
    if level == locking.LEVEL_NODE:
6543
      self._LockInstancesNodes()
6544

    
6545
  def BuildHooksEnv(self):
6546
    """Build hooks env.
6547

6548
    This runs on the master, the primary and all the secondaries.
6549

6550
    """
6551
    env = {
6552
      "DISK": self.op.disk,
6553
      "AMOUNT": self.op.amount,
6554
      }
6555
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6556
    nl = [
6557
      self.cfg.GetMasterNode(),
6558
      self.instance.primary_node,
6559
      ]
6560
    return env, nl, nl
6561

    
6562
  def CheckPrereq(self):
6563
    """Check prerequisites.
6564

6565
    This checks that the instance is in the cluster.
6566

6567
    """
6568
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6569
    assert instance is not None, \
6570
      "Cannot retrieve locked instance %s" % self.op.instance_name
6571
    nodenames = list(instance.all_nodes)
6572
    for node in nodenames:
6573
      _CheckNodeOnline(self, node)
6574

    
6575

    
6576
    self.instance = instance
6577

    
6578
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6579
      raise errors.OpPrereqError("Instance's disk layout does not support"
6580
                                 " growing.")
6581

    
6582
    self.disk = instance.FindDisk(self.op.disk)
6583

    
6584
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6585
                                       instance.hypervisor)
6586
    for node in nodenames:
6587
      info = nodeinfo[node]
6588
      info.Raise("Cannot get current information from node %s" % node)
6589
      vg_free = info.payload.get('vg_free', None)
6590
      if not isinstance(vg_free, int):
6591
        raise errors.OpPrereqError("Can't compute free disk space on"
6592
                                   " node %s" % node)
6593
      if self.op.amount > vg_free:
6594
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
6595
                                   " %d MiB available, %d MiB required" %
6596
                                   (node, vg_free, self.op.amount))
6597

    
6598
  def Exec(self, feedback_fn):
6599
    """Execute disk grow.
6600

6601
    """
6602
    instance = self.instance
6603
    disk = self.disk
6604
    for node in instance.all_nodes:
6605
      self.cfg.SetDiskID(disk, node)
6606
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6607
      result.Raise("Grow request failed to node %s" % node)
6608
    disk.RecordGrow(self.op.amount)
6609
    self.cfg.Update(instance)
6610
    if self.op.wait_for_sync:
6611
      disk_abort = not _WaitForSync(self, instance)
6612
      if disk_abort:
6613
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6614
                             " status.\nPlease check the instance.")
6615

    
6616

    
6617
class LUQueryInstanceData(NoHooksLU):
6618
  """Query runtime instance data.
6619

6620
  """
6621
  _OP_REQP = ["instances", "static"]
6622
  REQ_BGL = False
6623

    
6624
  def ExpandNames(self):
6625
    self.needed_locks = {}
6626
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6627

    
6628
    if not isinstance(self.op.instances, list):
6629
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6630

    
6631
    if self.op.instances:
6632
      self.wanted_names = []
6633
      for name in self.op.instances:
6634
        full_name = self.cfg.ExpandInstanceName(name)
6635
        if full_name is None:
6636
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6637
        self.wanted_names.append(full_name)
6638
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6639
    else:
6640
      self.wanted_names = None
6641
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6642

    
6643
    self.needed_locks[locking.LEVEL_NODE] = []
6644
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6645

    
6646
  def DeclareLocks(self, level):
6647
    if level == locking.LEVEL_NODE:
6648
      self._LockInstancesNodes()
6649

    
6650
  def CheckPrereq(self):
6651
    """Check prerequisites.
6652

6653
    This only checks the optional instance list against the existing names.
6654

6655
    """
6656
    if self.wanted_names is None:
6657
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6658

    
6659
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6660
                             in self.wanted_names]
6661
    return
6662

    
6663
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
6664
    """Returns the status of a block device
6665

6666
    """
6667
    if self.op.static or not node:
6668
      return None
6669

    
6670
    self.cfg.SetDiskID(dev, node)
6671

    
6672
    result = self.rpc.call_blockdev_find(node, dev)
6673
    if result.offline:
6674
      return None
6675

    
6676
    result.Raise("Can't compute disk status for %s" % instance_name)
6677

    
6678
    status = result.payload
6679
    if status is None:
6680
      return None
6681

    
6682
    return (status.dev_path, status.major, status.minor,
6683
            status.sync_percent, status.estimated_time,
6684
            status.is_degraded, status.ldisk_status)
6685

    
6686
  def _ComputeDiskStatus(self, instance, snode, dev):
6687
    """Compute block device status.
6688

6689
    """
6690
    if dev.dev_type in constants.LDS_DRBD:
6691
      # we change the snode then (otherwise we use the one passed in)
6692
      if dev.logical_id[0] == instance.primary_node:
6693
        snode = dev.logical_id[1]
6694
      else:
6695
        snode = dev.logical_id[0]
6696

    
6697
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6698
                                              instance.name, dev)
6699
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6700

    
6701
    if dev.children:
6702
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6703
                      for child in dev.children]
6704
    else:
6705
      dev_children = []
6706

    
6707
    data = {
6708
      "iv_name": dev.iv_name,
6709
      "dev_type": dev.dev_type,
6710
      "logical_id": dev.logical_id,
6711
      "physical_id": dev.physical_id,
6712
      "pstatus": dev_pstatus,
6713
      "sstatus": dev_sstatus,
6714
      "children": dev_children,
6715
      "mode": dev.mode,
6716
      "size": dev.size,
6717
      }
6718

    
6719
    return data
6720

    
6721
  def Exec(self, feedback_fn):
6722
    """Gather and return data"""
6723
    result = {}
6724

    
6725
    cluster = self.cfg.GetClusterInfo()
6726

    
6727
    for instance in self.wanted_instances:
6728
      if not self.op.static:
6729
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6730
                                                  instance.name,
6731
                                                  instance.hypervisor)
6732
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6733
        remote_info = remote_info.payload
6734
        if remote_info and "state" in remote_info:
6735
          remote_state = "up"
6736
        else:
6737
          remote_state = "down"
6738
      else:
6739
        remote_state = None
6740
      if instance.admin_up:
6741
        config_state = "up"
6742
      else:
6743
        config_state = "down"
6744

    
6745
      disks = [self._ComputeDiskStatus(instance, None, device)
6746
               for device in instance.disks]
6747

    
6748
      idict = {
6749
        "name": instance.name,
6750
        "config_state": config_state,
6751
        "run_state": remote_state,
6752
        "pnode": instance.primary_node,
6753
        "snodes": instance.secondary_nodes,
6754
        "os": instance.os,
6755
        # this happens to be the same format used for hooks
6756
        "nics": _NICListToTuple(self, instance.nics),
6757
        "disks": disks,
6758
        "hypervisor": instance.hypervisor,
6759
        "network_port": instance.network_port,
6760
        "hv_instance": instance.hvparams,
6761
        "hv_actual": cluster.FillHV(instance),
6762
        "be_instance": instance.beparams,
6763
        "be_actual": cluster.FillBE(instance),
6764
        "serial_no": instance.serial_no,
6765
        "mtime": instance.mtime,
6766
        "ctime": instance.ctime,
6767
        }
6768

    
6769
      result[instance.name] = idict
6770

    
6771
    return result
6772

    
6773

    
6774
class LUSetInstanceParams(LogicalUnit):
6775
  """Modifies an instances's parameters.
6776

6777
  """
6778
  HPATH = "instance-modify"
6779
  HTYPE = constants.HTYPE_INSTANCE
6780
  _OP_REQP = ["instance_name"]
6781
  REQ_BGL = False
6782

    
6783
  def CheckArguments(self):
6784
    if not hasattr(self.op, 'nics'):
6785
      self.op.nics = []
6786
    if not hasattr(self.op, 'disks'):
6787
      self.op.disks = []
6788
    if not hasattr(self.op, 'beparams'):
6789
      self.op.beparams = {}
6790
    if not hasattr(self.op, 'hvparams'):
6791
      self.op.hvparams = {}
6792
    self.op.force = getattr(self.op, "force", False)
6793
    if not (self.op.nics or self.op.disks or
6794
            self.op.hvparams or self.op.beparams):
6795
      raise errors.OpPrereqError("No changes submitted")
6796

    
6797
    # Disk validation
6798
    disk_addremove = 0
6799
    for disk_op, disk_dict in self.op.disks:
6800
      if disk_op == constants.DDM_REMOVE:
6801
        disk_addremove += 1
6802
        continue
6803
      elif disk_op == constants.DDM_ADD:
6804
        disk_addremove += 1
6805
      else:
6806
        if not isinstance(disk_op, int):
6807
          raise errors.OpPrereqError("Invalid disk index")
6808
        if not isinstance(disk_dict, dict):
6809
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
6810
          raise errors.OpPrereqError(msg)
6811

    
6812
      if disk_op == constants.DDM_ADD:
6813
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
6814
        if mode not in constants.DISK_ACCESS_SET:
6815
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
6816
        size = disk_dict.get('size', None)
6817
        if size is None:
6818
          raise errors.OpPrereqError("Required disk parameter size missing")
6819
        try:
6820
          size = int(size)
6821
        except ValueError, err:
6822
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
6823
                                     str(err))
6824
        disk_dict['size'] = size
6825
      else:
6826
        # modification of disk
6827
        if 'size' in disk_dict:
6828
          raise errors.OpPrereqError("Disk size change not possible, use"
6829
                                     " grow-disk")
6830

    
6831
    if disk_addremove > 1:
6832
      raise errors.OpPrereqError("Only one disk add or remove operation"
6833
                                 " supported at a time")
6834

    
6835
    # NIC validation
6836
    nic_addremove = 0
6837
    for nic_op, nic_dict in self.op.nics:
6838
      if nic_op == constants.DDM_REMOVE:
6839
        nic_addremove += 1
6840
        continue
6841
      elif nic_op == constants.DDM_ADD:
6842
        nic_addremove += 1
6843
      else:
6844
        if not isinstance(nic_op, int):
6845
          raise errors.OpPrereqError("Invalid nic index")
6846
        if not isinstance(nic_dict, dict):
6847
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
6848
          raise errors.OpPrereqError(msg)
6849

    
6850
      # nic_dict should be a dict
6851
      nic_ip = nic_dict.get('ip', None)
6852
      if nic_ip is not None:
6853
        if nic_ip.lower() == constants.VALUE_NONE:
6854
          nic_dict['ip'] = None
6855
        else:
6856
          if not utils.IsValidIP(nic_ip):
6857
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
6858

    
6859
      nic_bridge = nic_dict.get('bridge', None)
6860
      nic_link = nic_dict.get('link', None)
6861
      if nic_bridge and nic_link:
6862
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6863
                                   " at the same time")
6864
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
6865
        nic_dict['bridge'] = None
6866
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
6867
        nic_dict['link'] = None
6868

    
6869
      if nic_op == constants.DDM_ADD:
6870
        nic_mac = nic_dict.get('mac', None)
6871
        if nic_mac is None:
6872
          nic_dict['mac'] = constants.VALUE_AUTO
6873

    
6874
      if 'mac' in nic_dict:
6875
        nic_mac = nic_dict['mac']
6876
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6877
          if not utils.IsValidMac(nic_mac):
6878
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
6879
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
6880
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
6881
                                     " modifying an existing nic")
6882

    
6883
    if nic_addremove > 1:
6884
      raise errors.OpPrereqError("Only one NIC add or remove operation"
6885
                                 " supported at a time")
6886

    
6887
  def ExpandNames(self):
6888
    self._ExpandAndLockInstance()
6889
    self.needed_locks[locking.LEVEL_NODE] = []
6890
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6891

    
6892
  def DeclareLocks(self, level):
6893
    if level == locking.LEVEL_NODE:
6894
      self._LockInstancesNodes()
6895

    
6896
  def BuildHooksEnv(self):
6897
    """Build hooks env.
6898

6899
    This runs on the master, primary and secondaries.
6900

6901
    """
6902
    args = dict()
6903
    if constants.BE_MEMORY in self.be_new:
6904
      args['memory'] = self.be_new[constants.BE_MEMORY]
6905
    if constants.BE_VCPUS in self.be_new:
6906
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
6907
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
6908
    # information at all.
6909
    if self.op.nics:
6910
      args['nics'] = []
6911
      nic_override = dict(self.op.nics)
6912
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
6913
      for idx, nic in enumerate(self.instance.nics):
6914
        if idx in nic_override:
6915
          this_nic_override = nic_override[idx]
6916
        else:
6917
          this_nic_override = {}
6918
        if 'ip' in this_nic_override:
6919
          ip = this_nic_override['ip']
6920
        else:
6921
          ip = nic.ip
6922
        if 'mac' in this_nic_override:
6923
          mac = this_nic_override['mac']
6924
        else:
6925
          mac = nic.mac
6926
        if idx in self.nic_pnew:
6927
          nicparams = self.nic_pnew[idx]
6928
        else:
6929
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
6930
        mode = nicparams[constants.NIC_MODE]
6931
        link = nicparams[constants.NIC_LINK]
6932
        args['nics'].append((ip, mac, mode, link))
6933
      if constants.DDM_ADD in nic_override:
6934
        ip = nic_override[constants.DDM_ADD].get('ip', None)
6935
        mac = nic_override[constants.DDM_ADD]['mac']
6936
        nicparams = self.nic_pnew[constants.DDM_ADD]
6937
        mode = nicparams[constants.NIC_MODE]
6938
        link = nicparams[constants.NIC_LINK]
6939
        args['nics'].append((ip, mac, mode, link))
6940
      elif constants.DDM_REMOVE in nic_override:
6941
        del args['nics'][-1]
6942

    
6943
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
6944
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6945
    return env, nl, nl
6946

    
6947
  def _GetUpdatedParams(self, old_params, update_dict,
6948
                        default_values, parameter_types):
6949
    """Return the new params dict for the given params.
6950

6951
    @type old_params: dict
6952
    @param old_params: old parameters
6953
    @type update_dict: dict
6954
    @param update_dict: dict containing new parameter values,
6955
                        or constants.VALUE_DEFAULT to reset the
6956
                        parameter to its default value
6957
    @type default_values: dict
6958
    @param default_values: default values for the filled parameters
6959
    @type parameter_types: dict
6960
    @param parameter_types: dict mapping target dict keys to types
6961
                            in constants.ENFORCEABLE_TYPES
6962
    @rtype: (dict, dict)
6963
    @return: (new_parameters, filled_parameters)
6964

6965
    """
6966
    params_copy = copy.deepcopy(old_params)
6967
    for key, val in update_dict.iteritems():
6968
      if val == constants.VALUE_DEFAULT:
6969
        try:
6970
          del params_copy[key]
6971
        except KeyError:
6972
          pass
6973
      else:
6974
        params_copy[key] = val
6975
    utils.ForceDictType(params_copy, parameter_types)
6976
    params_filled = objects.FillDict(default_values, params_copy)
6977
    return (params_copy, params_filled)
6978

    
6979
  def CheckPrereq(self):
6980
    """Check prerequisites.
6981

6982
    This only checks the instance list against the existing names.
6983

6984
    """
6985
    self.force = self.op.force
6986

    
6987
    # checking the new params on the primary/secondary nodes
6988

    
6989
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6990
    cluster = self.cluster = self.cfg.GetClusterInfo()
6991
    assert self.instance is not None, \
6992
      "Cannot retrieve locked instance %s" % self.op.instance_name
6993
    pnode = instance.primary_node
6994
    nodelist = list(instance.all_nodes)
6995

    
6996
    # hvparams processing
6997
    if self.op.hvparams:
6998
      i_hvdict, hv_new = self._GetUpdatedParams(
6999
                             instance.hvparams, self.op.hvparams,
7000
                             cluster.hvparams[instance.hypervisor],
7001
                             constants.HVS_PARAMETER_TYPES)
7002
      # local check
7003
      hypervisor.GetHypervisor(
7004
        instance.hypervisor).CheckParameterSyntax(hv_new)
7005
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7006
      self.hv_new = hv_new # the new actual values
7007
      self.hv_inst = i_hvdict # the new dict (without defaults)
7008
    else:
7009
      self.hv_new = self.hv_inst = {}
7010

    
7011
    # beparams processing
7012
    if self.op.beparams:
7013
      i_bedict, be_new = self._GetUpdatedParams(
7014
                             instance.beparams, self.op.beparams,
7015
                             cluster.beparams[constants.PP_DEFAULT],
7016
                             constants.BES_PARAMETER_TYPES)
7017
      self.be_new = be_new # the new actual values
7018
      self.be_inst = i_bedict # the new dict (without defaults)
7019
    else:
7020
      self.be_new = self.be_inst = {}
7021

    
7022
    self.warn = []
7023

    
7024
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7025
      mem_check_list = [pnode]
7026
      if be_new[constants.BE_AUTO_BALANCE]:
7027
        # either we changed auto_balance to yes or it was from before
7028
        mem_check_list.extend(instance.secondary_nodes)
7029
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7030
                                                  instance.hypervisor)
7031
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7032
                                         instance.hypervisor)
7033
      pninfo = nodeinfo[pnode]
7034
      msg = pninfo.fail_msg
7035
      if msg:
7036
        # Assume the primary node is unreachable and go ahead
7037
        self.warn.append("Can't get info from primary node %s: %s" %
7038
                         (pnode,  msg))
7039
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7040
        self.warn.append("Node data from primary node %s doesn't contain"
7041
                         " free memory information" % pnode)
7042
      elif instance_info.fail_msg:
7043
        self.warn.append("Can't get instance runtime information: %s" %
7044
                        instance_info.fail_msg)
7045
      else:
7046
        if instance_info.payload:
7047
          current_mem = int(instance_info.payload['memory'])
7048
        else:
7049
          # Assume instance not running
7050
          # (there is a slight race condition here, but it's not very probable,
7051
          # and we have no other way to check)
7052
          current_mem = 0
7053
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7054
                    pninfo.payload['memory_free'])
7055
        if miss_mem > 0:
7056
          raise errors.OpPrereqError("This change will prevent the instance"
7057
                                     " from starting, due to %d MB of memory"
7058
                                     " missing on its primary node" % miss_mem)
7059

    
7060
      if be_new[constants.BE_AUTO_BALANCE]:
7061
        for node, nres in nodeinfo.items():
7062
          if node not in instance.secondary_nodes:
7063
            continue
7064
          msg = nres.fail_msg
7065
          if msg:
7066
            self.warn.append("Can't get info from secondary node %s: %s" %
7067
                             (node, msg))
7068
          elif not isinstance(nres.payload.get('memory_free', None), int):
7069
            self.warn.append("Secondary node %s didn't return free"
7070
                             " memory information" % node)
7071
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7072
            self.warn.append("Not enough memory to failover instance to"
7073
                             " secondary node %s" % node)
7074

    
7075
    # NIC processing
7076
    self.nic_pnew = {}
7077
    self.nic_pinst = {}
7078
    for nic_op, nic_dict in self.op.nics:
7079
      if nic_op == constants.DDM_REMOVE:
7080
        if not instance.nics:
7081
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7082
        continue
7083
      if nic_op != constants.DDM_ADD:
7084
        # an existing nic
7085
        if nic_op < 0 or nic_op >= len(instance.nics):
7086
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7087
                                     " are 0 to %d" %
7088
                                     (nic_op, len(instance.nics)))
7089
        old_nic_params = instance.nics[nic_op].nicparams
7090
        old_nic_ip = instance.nics[nic_op].ip
7091
      else:
7092
        old_nic_params = {}
7093
        old_nic_ip = None
7094

    
7095
      update_params_dict = dict([(key, nic_dict[key])
7096
                                 for key in constants.NICS_PARAMETERS
7097
                                 if key in nic_dict])
7098

    
7099
      if 'bridge' in nic_dict:
7100
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7101

    
7102
      new_nic_params, new_filled_nic_params = \
7103
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7104
                                 cluster.nicparams[constants.PP_DEFAULT],
7105
                                 constants.NICS_PARAMETER_TYPES)
7106
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7107
      self.nic_pinst[nic_op] = new_nic_params
7108
      self.nic_pnew[nic_op] = new_filled_nic_params
7109
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7110

    
7111
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7112
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7113
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7114
        if msg:
7115
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7116
          if self.force:
7117
            self.warn.append(msg)
7118
          else:
7119
            raise errors.OpPrereqError(msg)
7120
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7121
        if 'ip' in nic_dict:
7122
          nic_ip = nic_dict['ip']
7123
        else:
7124
          nic_ip = old_nic_ip
7125
        if nic_ip is None:
7126
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7127
                                     ' on a routed nic')
7128
      if 'mac' in nic_dict:
7129
        nic_mac = nic_dict['mac']
7130
        if nic_mac is None:
7131
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7132
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7133
          # otherwise generate the mac
7134
          nic_dict['mac'] = self.cfg.GenerateMAC()
7135
        else:
7136
          # or validate/reserve the current one
7137
          if self.cfg.IsMacInUse(nic_mac):
7138
            raise errors.OpPrereqError("MAC address %s already in use"
7139
                                       " in cluster" % nic_mac)
7140

    
7141
    # DISK processing
7142
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7143
      raise errors.OpPrereqError("Disk operations not supported for"
7144
                                 " diskless instances")
7145
    for disk_op, disk_dict in self.op.disks:
7146
      if disk_op == constants.DDM_REMOVE:
7147
        if len(instance.disks) == 1:
7148
          raise errors.OpPrereqError("Cannot remove the last disk of"
7149
                                     " an instance")
7150
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7151
        ins_l = ins_l[pnode]
7152
        msg = ins_l.fail_msg
7153
        if msg:
7154
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7155
                                     (pnode, msg))
7156
        if instance.name in ins_l.payload:
7157
          raise errors.OpPrereqError("Instance is running, can't remove"
7158
                                     " disks.")
7159

    
7160
      if (disk_op == constants.DDM_ADD and
7161
          len(instance.nics) >= constants.MAX_DISKS):
7162
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7163
                                   " add more" % constants.MAX_DISKS)
7164
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7165
        # an existing disk
7166
        if disk_op < 0 or disk_op >= len(instance.disks):
7167
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7168
                                     " are 0 to %d" %
7169
                                     (disk_op, len(instance.disks)))
7170

    
7171
    return
7172

    
7173
  def Exec(self, feedback_fn):
7174
    """Modifies an instance.
7175

7176
    All parameters take effect only at the next restart of the instance.
7177

7178
    """
7179
    # Process here the warnings from CheckPrereq, as we don't have a
7180
    # feedback_fn there.
7181
    for warn in self.warn:
7182
      feedback_fn("WARNING: %s" % warn)
7183

    
7184
    result = []
7185
    instance = self.instance
7186
    cluster = self.cluster
7187
    # disk changes
7188
    for disk_op, disk_dict in self.op.disks:
7189
      if disk_op == constants.DDM_REMOVE:
7190
        # remove the last disk
7191
        device = instance.disks.pop()
7192
        device_idx = len(instance.disks)
7193
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7194
          self.cfg.SetDiskID(disk, node)
7195
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7196
          if msg:
7197
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7198
                            " continuing anyway", device_idx, node, msg)
7199
        result.append(("disk/%d" % device_idx, "remove"))
7200
      elif disk_op == constants.DDM_ADD:
7201
        # add a new disk
7202
        if instance.disk_template == constants.DT_FILE:
7203
          file_driver, file_path = instance.disks[0].logical_id
7204
          file_path = os.path.dirname(file_path)
7205
        else:
7206
          file_driver = file_path = None
7207
        disk_idx_base = len(instance.disks)
7208
        new_disk = _GenerateDiskTemplate(self,
7209
                                         instance.disk_template,
7210
                                         instance.name, instance.primary_node,
7211
                                         instance.secondary_nodes,
7212
                                         [disk_dict],
7213
                                         file_path,
7214
                                         file_driver,
7215
                                         disk_idx_base)[0]
7216
        instance.disks.append(new_disk)
7217
        info = _GetInstanceInfoText(instance)
7218

    
7219
        logging.info("Creating volume %s for instance %s",
7220
                     new_disk.iv_name, instance.name)
7221
        # Note: this needs to be kept in sync with _CreateDisks
7222
        #HARDCODE
7223
        for node in instance.all_nodes:
7224
          f_create = node == instance.primary_node
7225
          try:
7226
            _CreateBlockDev(self, node, instance, new_disk,
7227
                            f_create, info, f_create)
7228
          except errors.OpExecError, err:
7229
            self.LogWarning("Failed to create volume %s (%s) on"
7230
                            " node %s: %s",
7231
                            new_disk.iv_name, new_disk, node, err)
7232
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7233
                       (new_disk.size, new_disk.mode)))
7234
      else:
7235
        # change a given disk
7236
        instance.disks[disk_op].mode = disk_dict['mode']
7237
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7238
    # NIC changes
7239
    for nic_op, nic_dict in self.op.nics:
7240
      if nic_op == constants.DDM_REMOVE:
7241
        # remove the last nic
7242
        del instance.nics[-1]
7243
        result.append(("nic.%d" % len(instance.nics), "remove"))
7244
      elif nic_op == constants.DDM_ADD:
7245
        # mac and bridge should be set, by now
7246
        mac = nic_dict['mac']
7247
        ip = nic_dict.get('ip', None)
7248
        nicparams = self.nic_pinst[constants.DDM_ADD]
7249
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7250
        instance.nics.append(new_nic)
7251
        result.append(("nic.%d" % (len(instance.nics) - 1),
7252
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7253
                       (new_nic.mac, new_nic.ip,
7254
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7255
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7256
                       )))
7257
      else:
7258
        for key in 'mac', 'ip':
7259
          if key in nic_dict:
7260
            setattr(instance.nics[nic_op], key, nic_dict[key])
7261
        if nic_op in self.nic_pnew:
7262
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7263
        for key, val in nic_dict.iteritems():
7264
          result.append(("nic.%s/%d" % (key, nic_op), val))
7265

    
7266
    # hvparams changes
7267
    if self.op.hvparams:
7268
      instance.hvparams = self.hv_inst
7269
      for key, val in self.op.hvparams.iteritems():
7270
        result.append(("hv/%s" % key, val))
7271

    
7272
    # beparams changes
7273
    if self.op.beparams:
7274
      instance.beparams = self.be_inst
7275
      for key, val in self.op.beparams.iteritems():
7276
        result.append(("be/%s" % key, val))
7277

    
7278
    self.cfg.Update(instance)
7279

    
7280
    return result
7281

    
7282

    
7283
class LUQueryExports(NoHooksLU):
7284
  """Query the exports list
7285

7286
  """
7287
  _OP_REQP = ['nodes']
7288
  REQ_BGL = False
7289

    
7290
  def ExpandNames(self):
7291
    self.needed_locks = {}
7292
    self.share_locks[locking.LEVEL_NODE] = 1
7293
    if not self.op.nodes:
7294
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7295
    else:
7296
      self.needed_locks[locking.LEVEL_NODE] = \
7297
        _GetWantedNodes(self, self.op.nodes)
7298

    
7299
  def CheckPrereq(self):
7300
    """Check prerequisites.
7301

7302
    """
7303
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7304

    
7305
  def Exec(self, feedback_fn):
7306
    """Compute the list of all the exported system images.
7307

7308
    @rtype: dict
7309
    @return: a dictionary with the structure node->(export-list)
7310
        where export-list is a list of the instances exported on
7311
        that node.
7312

7313
    """
7314
    rpcresult = self.rpc.call_export_list(self.nodes)
7315
    result = {}
7316
    for node in rpcresult:
7317
      if rpcresult[node].fail_msg:
7318
        result[node] = False
7319
      else:
7320
        result[node] = rpcresult[node].payload
7321

    
7322
    return result
7323

    
7324

    
7325
class LUExportInstance(LogicalUnit):
7326
  """Export an instance to an image in the cluster.
7327

7328
  """
7329
  HPATH = "instance-export"
7330
  HTYPE = constants.HTYPE_INSTANCE
7331
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7332
  REQ_BGL = False
7333

    
7334
  def ExpandNames(self):
7335
    self._ExpandAndLockInstance()
7336
    # FIXME: lock only instance primary and destination node
7337
    #
7338
    # Sad but true, for now we have do lock all nodes, as we don't know where
7339
    # the previous export might be, and and in this LU we search for it and
7340
    # remove it from its current node. In the future we could fix this by:
7341
    #  - making a tasklet to search (share-lock all), then create the new one,
7342
    #    then one to remove, after
7343
    #  - removing the removal operation altogether
7344
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7345

    
7346
  def DeclareLocks(self, level):
7347
    """Last minute lock declaration."""
7348
    # All nodes are locked anyway, so nothing to do here.
7349

    
7350
  def BuildHooksEnv(self):
7351
    """Build hooks env.
7352

7353
    This will run on the master, primary node and target node.
7354

7355
    """
7356
    env = {
7357
      "EXPORT_NODE": self.op.target_node,
7358
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7359
      }
7360
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7361
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7362
          self.op.target_node]
7363
    return env, nl, nl
7364

    
7365
  def CheckPrereq(self):
7366
    """Check prerequisites.
7367

7368
    This checks that the instance and node names are valid.
7369

7370
    """
7371
    instance_name = self.op.instance_name
7372
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7373
    assert self.instance is not None, \
7374
          "Cannot retrieve locked instance %s" % self.op.instance_name
7375
    _CheckNodeOnline(self, self.instance.primary_node)
7376

    
7377
    self.dst_node = self.cfg.GetNodeInfo(
7378
      self.cfg.ExpandNodeName(self.op.target_node))
7379

    
7380
    if self.dst_node is None:
7381
      # This is wrong node name, not a non-locked node
7382
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7383
    _CheckNodeOnline(self, self.dst_node.name)
7384
    _CheckNodeNotDrained(self, self.dst_node.name)
7385

    
7386
    # instance disk type verification
7387
    for disk in self.instance.disks:
7388
      if disk.dev_type == constants.LD_FILE:
7389
        raise errors.OpPrereqError("Export not supported for instances with"
7390
                                   " file-based disks")
7391

    
7392
  def Exec(self, feedback_fn):
7393
    """Export an instance to an image in the cluster.
7394

7395
    """
7396
    instance = self.instance
7397
    dst_node = self.dst_node
7398
    src_node = instance.primary_node
7399
    if self.op.shutdown:
7400
      # shutdown the instance, but not the disks
7401
      result = self.rpc.call_instance_shutdown(src_node, instance)
7402
      result.Raise("Could not shutdown instance %s on"
7403
                   " node %s" % (instance.name, src_node))
7404

    
7405
    vgname = self.cfg.GetVGName()
7406

    
7407
    snap_disks = []
7408

    
7409
    # set the disks ID correctly since call_instance_start needs the
7410
    # correct drbd minor to create the symlinks
7411
    for disk in instance.disks:
7412
      self.cfg.SetDiskID(disk, src_node)
7413

    
7414
    # per-disk results
7415
    dresults = []
7416
    try:
7417
      for idx, disk in enumerate(instance.disks):
7418
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7419
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7420
        msg = result.fail_msg
7421
        if msg:
7422
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7423
                          idx, src_node, msg)
7424
          snap_disks.append(False)
7425
        else:
7426
          disk_id = (vgname, result.payload)
7427
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7428
                                 logical_id=disk_id, physical_id=disk_id,
7429
                                 iv_name=disk.iv_name)
7430
          snap_disks.append(new_dev)
7431

    
7432
    finally:
7433
      if self.op.shutdown and instance.admin_up:
7434
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7435
        msg = result.fail_msg
7436
        if msg:
7437
          _ShutdownInstanceDisks(self, instance)
7438
          raise errors.OpExecError("Could not start instance: %s" % msg)
7439

    
7440
    # TODO: check for size
7441

    
7442
    cluster_name = self.cfg.GetClusterName()
7443
    for idx, dev in enumerate(snap_disks):
7444
      if dev:
7445
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7446
                                               instance, cluster_name, idx)
7447
        msg = result.fail_msg
7448
        if msg:
7449
          self.LogWarning("Could not export disk/%s from node %s to"
7450
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7451
          dresults.append(False)
7452
        else:
7453
          dresults.append(True)
7454
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7455
        if msg:
7456
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7457
                          " %s: %s", idx, src_node, msg)
7458
      else:
7459
        dresults.append(False)
7460

    
7461
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7462
    fin_resu = True
7463
    msg = result.fail_msg
7464
    if msg:
7465
      self.LogWarning("Could not finalize export for instance %s"
7466
                      " on node %s: %s", instance.name, dst_node.name, msg)
7467
      fin_resu = False
7468

    
7469
    nodelist = self.cfg.GetNodeList()
7470
    nodelist.remove(dst_node.name)
7471

    
7472
    # on one-node clusters nodelist will be empty after the removal
7473
    # if we proceed the backup would be removed because OpQueryExports
7474
    # substitutes an empty list with the full cluster node list.
7475
    iname = instance.name
7476
    if nodelist:
7477
      exportlist = self.rpc.call_export_list(nodelist)
7478
      for node in exportlist:
7479
        if exportlist[node].fail_msg:
7480
          continue
7481
        if iname in exportlist[node].payload:
7482
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7483
          if msg:
7484
            self.LogWarning("Could not remove older export for instance %s"
7485
                            " on node %s: %s", iname, node, msg)
7486
    return fin_resu, dresults
7487

    
7488

    
7489
class LURemoveExport(NoHooksLU):
7490
  """Remove exports related to the named instance.
7491

7492
  """
7493
  _OP_REQP = ["instance_name"]
7494
  REQ_BGL = False
7495

    
7496
  def ExpandNames(self):
7497
    self.needed_locks = {}
7498
    # We need all nodes to be locked in order for RemoveExport to work, but we
7499
    # don't need to lock the instance itself, as nothing will happen to it (and
7500
    # we can remove exports also for a removed instance)
7501
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7502

    
7503
  def CheckPrereq(self):
7504
    """Check prerequisites.
7505
    """
7506
    pass
7507

    
7508
  def Exec(self, feedback_fn):
7509
    """Remove any export.
7510

7511
    """
7512
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7513
    # If the instance was not found we'll try with the name that was passed in.
7514
    # This will only work if it was an FQDN, though.
7515
    fqdn_warn = False
7516
    if not instance_name:
7517
      fqdn_warn = True
7518
      instance_name = self.op.instance_name
7519

    
7520
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7521
    exportlist = self.rpc.call_export_list(locked_nodes)
7522
    found = False
7523
    for node in exportlist:
7524
      msg = exportlist[node].fail_msg
7525
      if msg:
7526
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7527
        continue
7528
      if instance_name in exportlist[node].payload:
7529
        found = True
7530
        result = self.rpc.call_export_remove(node, instance_name)
7531
        msg = result.fail_msg
7532
        if msg:
7533
          logging.error("Could not remove export for instance %s"
7534
                        " on node %s: %s", instance_name, node, msg)
7535

    
7536
    if fqdn_warn and not found:
7537
      feedback_fn("Export not found. If trying to remove an export belonging"
7538
                  " to a deleted instance please use its Fully Qualified"
7539
                  " Domain Name.")
7540

    
7541

    
7542
class TagsLU(NoHooksLU):
7543
  """Generic tags LU.
7544

7545
  This is an abstract class which is the parent of all the other tags LUs.
7546

7547
  """
7548

    
7549
  def ExpandNames(self):
7550
    self.needed_locks = {}
7551
    if self.op.kind == constants.TAG_NODE:
7552
      name = self.cfg.ExpandNodeName(self.op.name)
7553
      if name is None:
7554
        raise errors.OpPrereqError("Invalid node name (%s)" %
7555
                                   (self.op.name,))
7556
      self.op.name = name
7557
      self.needed_locks[locking.LEVEL_NODE] = name
7558
    elif self.op.kind == constants.TAG_INSTANCE:
7559
      name = self.cfg.ExpandInstanceName(self.op.name)
7560
      if name is None:
7561
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7562
                                   (self.op.name,))
7563
      self.op.name = name
7564
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7565

    
7566
  def CheckPrereq(self):
7567
    """Check prerequisites.
7568

7569
    """
7570
    if self.op.kind == constants.TAG_CLUSTER:
7571
      self.target = self.cfg.GetClusterInfo()
7572
    elif self.op.kind == constants.TAG_NODE:
7573
      self.target = self.cfg.GetNodeInfo(self.op.name)
7574
    elif self.op.kind == constants.TAG_INSTANCE:
7575
      self.target = self.cfg.GetInstanceInfo(self.op.name)
7576
    else:
7577
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7578
                                 str(self.op.kind))
7579

    
7580

    
7581
class LUGetTags(TagsLU):
7582
  """Returns the tags of a given object.
7583

7584
  """
7585
  _OP_REQP = ["kind", "name"]
7586
  REQ_BGL = False
7587

    
7588
  def Exec(self, feedback_fn):
7589
    """Returns the tag list.
7590

7591
    """
7592
    return list(self.target.GetTags())
7593

    
7594

    
7595
class LUSearchTags(NoHooksLU):
7596
  """Searches the tags for a given pattern.
7597

7598
  """
7599
  _OP_REQP = ["pattern"]
7600
  REQ_BGL = False
7601

    
7602
  def ExpandNames(self):
7603
    self.needed_locks = {}
7604

    
7605
  def CheckPrereq(self):
7606
    """Check prerequisites.
7607

7608
    This checks the pattern passed for validity by compiling it.
7609

7610
    """
7611
    try:
7612
      self.re = re.compile(self.op.pattern)
7613
    except re.error, err:
7614
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7615
                                 (self.op.pattern, err))
7616

    
7617
  def Exec(self, feedback_fn):
7618
    """Returns the tag list.
7619

7620
    """
7621
    cfg = self.cfg
7622
    tgts = [("/cluster", cfg.GetClusterInfo())]
7623
    ilist = cfg.GetAllInstancesInfo().values()
7624
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7625
    nlist = cfg.GetAllNodesInfo().values()
7626
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7627
    results = []
7628
    for path, target in tgts:
7629
      for tag in target.GetTags():
7630
        if self.re.search(tag):
7631
          results.append((path, tag))
7632
    return results
7633

    
7634

    
7635
class LUAddTags(TagsLU):
7636
  """Sets a tag on a given object.
7637

7638
  """
7639
  _OP_REQP = ["kind", "name", "tags"]
7640
  REQ_BGL = False
7641

    
7642
  def CheckPrereq(self):
7643
    """Check prerequisites.
7644

7645
    This checks the type and length of the tag name and value.
7646

7647
    """
7648
    TagsLU.CheckPrereq(self)
7649
    for tag in self.op.tags:
7650
      objects.TaggableObject.ValidateTag(tag)
7651

    
7652
  def Exec(self, feedback_fn):
7653
    """Sets the tag.
7654

7655
    """
7656
    try:
7657
      for tag in self.op.tags:
7658
        self.target.AddTag(tag)
7659
    except errors.TagError, err:
7660
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7661
    try:
7662
      self.cfg.Update(self.target)
7663
    except errors.ConfigurationError:
7664
      raise errors.OpRetryError("There has been a modification to the"
7665
                                " config file and the operation has been"
7666
                                " aborted. Please retry.")
7667

    
7668

    
7669
class LUDelTags(TagsLU):
7670
  """Delete a list of tags from a given object.
7671

7672
  """
7673
  _OP_REQP = ["kind", "name", "tags"]
7674
  REQ_BGL = False
7675

    
7676
  def CheckPrereq(self):
7677
    """Check prerequisites.
7678

7679
    This checks that we have the given tag.
7680

7681
    """
7682
    TagsLU.CheckPrereq(self)
7683
    for tag in self.op.tags:
7684
      objects.TaggableObject.ValidateTag(tag)
7685
    del_tags = frozenset(self.op.tags)
7686
    cur_tags = self.target.GetTags()
7687
    if not del_tags <= cur_tags:
7688
      diff_tags = del_tags - cur_tags
7689
      diff_names = ["'%s'" % tag for tag in diff_tags]
7690
      diff_names.sort()
7691
      raise errors.OpPrereqError("Tag(s) %s not found" %
7692
                                 (",".join(diff_names)))
7693

    
7694
  def Exec(self, feedback_fn):
7695
    """Remove the tag from the object.
7696

7697
    """
7698
    for tag in self.op.tags:
7699
      self.target.RemoveTag(tag)
7700
    try:
7701
      self.cfg.Update(self.target)
7702
    except errors.ConfigurationError:
7703
      raise errors.OpRetryError("There has been a modification to the"
7704
                                " config file and the operation has been"
7705
                                " aborted. Please retry.")
7706

    
7707

    
7708
class LUTestDelay(NoHooksLU):
7709
  """Sleep for a specified amount of time.
7710

7711
  This LU sleeps on the master and/or nodes for a specified amount of
7712
  time.
7713

7714
  """
7715
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7716
  REQ_BGL = False
7717

    
7718
  def ExpandNames(self):
7719
    """Expand names and set required locks.
7720

7721
    This expands the node list, if any.
7722

7723
    """
7724
    self.needed_locks = {}
7725
    if self.op.on_nodes:
7726
      # _GetWantedNodes can be used here, but is not always appropriate to use
7727
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7728
      # more information.
7729
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7730
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7731

    
7732
  def CheckPrereq(self):
7733
    """Check prerequisites.
7734

7735
    """
7736

    
7737
  def Exec(self, feedback_fn):
7738
    """Do the actual sleep.
7739

7740
    """
7741
    if self.op.on_master:
7742
      if not utils.TestDelay(self.op.duration):
7743
        raise errors.OpExecError("Error during master delay test")
7744
    if self.op.on_nodes:
7745
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7746
      for node, node_result in result.items():
7747
        node_result.Raise("Failure during rpc call to node %s" % node)
7748

    
7749

    
7750
class IAllocator(object):
7751
  """IAllocator framework.
7752

7753
  An IAllocator instance has three sets of attributes:
7754
    - cfg that is needed to query the cluster
7755
    - input data (all members of the _KEYS class attribute are required)
7756
    - four buffer attributes (in|out_data|text), that represent the
7757
      input (to the external script) in text and data structure format,
7758
      and the output from it, again in two formats
7759
    - the result variables from the script (success, info, nodes) for
7760
      easy usage
7761

7762
  """
7763
  _ALLO_KEYS = [
7764
    "mem_size", "disks", "disk_template",
7765
    "os", "tags", "nics", "vcpus", "hypervisor",
7766
    ]
7767
  _RELO_KEYS = [
7768
    "relocate_from",
7769
    ]
7770

    
7771
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7772
    self.cfg = cfg
7773
    self.rpc = rpc
7774
    # init buffer variables
7775
    self.in_text = self.out_text = self.in_data = self.out_data = None
7776
    # init all input fields so that pylint is happy
7777
    self.mode = mode
7778
    self.name = name
7779
    self.mem_size = self.disks = self.disk_template = None
7780
    self.os = self.tags = self.nics = self.vcpus = None
7781
    self.hypervisor = None
7782
    self.relocate_from = None
7783
    # computed fields
7784
    self.required_nodes = None
7785
    # init result fields
7786
    self.success = self.info = self.nodes = None
7787
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7788
      keyset = self._ALLO_KEYS
7789
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7790
      keyset = self._RELO_KEYS
7791
    else:
7792
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
7793
                                   " IAllocator" % self.mode)
7794
    for key in kwargs:
7795
      if key not in keyset:
7796
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
7797
                                     " IAllocator" % key)
7798
      setattr(self, key, kwargs[key])
7799
    for key in keyset:
7800
      if key not in kwargs:
7801
        raise errors.ProgrammerError("Missing input parameter '%s' to"
7802
                                     " IAllocator" % key)
7803
    self._BuildInputData()
7804

    
7805
  def _ComputeClusterData(self):
7806
    """Compute the generic allocator input data.
7807

7808
    This is the data that is independent of the actual operation.
7809

7810
    """
7811
    cfg = self.cfg
7812
    cluster_info = cfg.GetClusterInfo()
7813
    # cluster data
7814
    data = {
7815
      "version": constants.IALLOCATOR_VERSION,
7816
      "cluster_name": cfg.GetClusterName(),
7817
      "cluster_tags": list(cluster_info.GetTags()),
7818
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
7819
      # we don't have job IDs
7820
      }
7821
    iinfo = cfg.GetAllInstancesInfo().values()
7822
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
7823

    
7824
    # node data
7825
    node_results = {}
7826
    node_list = cfg.GetNodeList()
7827

    
7828
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7829
      hypervisor_name = self.hypervisor
7830
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7831
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
7832

    
7833
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
7834
                                        hypervisor_name)
7835
    node_iinfo = \
7836
      self.rpc.call_all_instances_info(node_list,
7837
                                       cluster_info.enabled_hypervisors)
7838
    for nname, nresult in node_data.items():
7839
      # first fill in static (config-based) values
7840
      ninfo = cfg.GetNodeInfo(nname)
7841
      pnr = {
7842
        "tags": list(ninfo.GetTags()),
7843
        "primary_ip": ninfo.primary_ip,
7844
        "secondary_ip": ninfo.secondary_ip,
7845
        "offline": ninfo.offline,
7846
        "drained": ninfo.drained,
7847
        "master_candidate": ninfo.master_candidate,
7848
        }
7849

    
7850
      if not (ninfo.offline or ninfo.drained):
7851
        nresult.Raise("Can't get data for node %s" % nname)
7852
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
7853
                                nname)
7854
        remote_info = nresult.payload
7855

    
7856
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
7857
                     'vg_size', 'vg_free', 'cpu_total']:
7858
          if attr not in remote_info:
7859
            raise errors.OpExecError("Node '%s' didn't return attribute"
7860
                                     " '%s'" % (nname, attr))
7861
          if not isinstance(remote_info[attr], int):
7862
            raise errors.OpExecError("Node '%s' returned invalid value"
7863
                                     " for '%s': %s" %
7864
                                     (nname, attr, remote_info[attr]))
7865
        # compute memory used by primary instances
7866
        i_p_mem = i_p_up_mem = 0
7867
        for iinfo, beinfo in i_list:
7868
          if iinfo.primary_node == nname:
7869
            i_p_mem += beinfo[constants.BE_MEMORY]
7870
            if iinfo.name not in node_iinfo[nname].payload:
7871
              i_used_mem = 0
7872
            else:
7873
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
7874
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
7875
            remote_info['memory_free'] -= max(0, i_mem_diff)
7876

    
7877
            if iinfo.admin_up:
7878
              i_p_up_mem += beinfo[constants.BE_MEMORY]
7879

    
7880
        # compute memory used by instances
7881
        pnr_dyn = {
7882
          "total_memory": remote_info['memory_total'],
7883
          "reserved_memory": remote_info['memory_dom0'],
7884
          "free_memory": remote_info['memory_free'],
7885
          "total_disk": remote_info['vg_size'],
7886
          "free_disk": remote_info['vg_free'],
7887
          "total_cpus": remote_info['cpu_total'],
7888
          "i_pri_memory": i_p_mem,
7889
          "i_pri_up_memory": i_p_up_mem,
7890
          }
7891
        pnr.update(pnr_dyn)
7892

    
7893
      node_results[nname] = pnr
7894
    data["nodes"] = node_results
7895

    
7896
    # instance data
7897
    instance_data = {}
7898
    for iinfo, beinfo in i_list:
7899
      nic_data = []
7900
      for nic in iinfo.nics:
7901
        filled_params = objects.FillDict(
7902
            cluster_info.nicparams[constants.PP_DEFAULT],
7903
            nic.nicparams)
7904
        nic_dict = {"mac": nic.mac,
7905
                    "ip": nic.ip,
7906
                    "mode": filled_params[constants.NIC_MODE],
7907
                    "link": filled_params[constants.NIC_LINK],
7908
                   }
7909
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
7910
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
7911
        nic_data.append(nic_dict)
7912
      pir = {
7913
        "tags": list(iinfo.GetTags()),
7914
        "admin_up": iinfo.admin_up,
7915
        "vcpus": beinfo[constants.BE_VCPUS],
7916
        "memory": beinfo[constants.BE_MEMORY],
7917
        "os": iinfo.os,
7918
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
7919
        "nics": nic_data,
7920
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
7921
        "disk_template": iinfo.disk_template,
7922
        "hypervisor": iinfo.hypervisor,
7923
        }
7924
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
7925
                                                 pir["disks"])
7926
      instance_data[iinfo.name] = pir
7927

    
7928
    data["instances"] = instance_data
7929

    
7930
    self.in_data = data
7931

    
7932
  def _AddNewInstance(self):
7933
    """Add new instance data to allocator structure.
7934

7935
    This in combination with _AllocatorGetClusterData will create the
7936
    correct structure needed as input for the allocator.
7937

7938
    The checks for the completeness of the opcode must have already been
7939
    done.
7940

7941
    """
7942
    data = self.in_data
7943

    
7944
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
7945

    
7946
    if self.disk_template in constants.DTS_NET_MIRROR:
7947
      self.required_nodes = 2
7948
    else:
7949
      self.required_nodes = 1
7950
    request = {
7951
      "type": "allocate",
7952
      "name": self.name,
7953
      "disk_template": self.disk_template,
7954
      "tags": self.tags,
7955
      "os": self.os,
7956
      "vcpus": self.vcpus,
7957
      "memory": self.mem_size,
7958
      "disks": self.disks,
7959
      "disk_space_total": disk_space,
7960
      "nics": self.nics,
7961
      "required_nodes": self.required_nodes,
7962
      }
7963
    data["request"] = request
7964

    
7965
  def _AddRelocateInstance(self):
7966
    """Add relocate instance data to allocator structure.
7967

7968
    This in combination with _IAllocatorGetClusterData will create the
7969
    correct structure needed as input for the allocator.
7970

7971
    The checks for the completeness of the opcode must have already been
7972
    done.
7973

7974
    """
7975
    instance = self.cfg.GetInstanceInfo(self.name)
7976
    if instance is None:
7977
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
7978
                                   " IAllocator" % self.name)
7979

    
7980
    if instance.disk_template not in constants.DTS_NET_MIRROR:
7981
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
7982

    
7983
    if len(instance.secondary_nodes) != 1:
7984
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
7985

    
7986
    self.required_nodes = 1
7987
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
7988
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
7989

    
7990
    request = {
7991
      "type": "relocate",
7992
      "name": self.name,
7993
      "disk_space_total": disk_space,
7994
      "required_nodes": self.required_nodes,
7995
      "relocate_from": self.relocate_from,
7996
      }
7997
    self.in_data["request"] = request
7998

    
7999
  def _BuildInputData(self):
8000
    """Build input data structures.
8001

8002
    """
8003
    self._ComputeClusterData()
8004

    
8005
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8006
      self._AddNewInstance()
8007
    else:
8008
      self._AddRelocateInstance()
8009

    
8010
    self.in_text = serializer.Dump(self.in_data)
8011

    
8012
  def Run(self, name, validate=True, call_fn=None):
8013
    """Run an instance allocator and return the results.
8014

8015
    """
8016
    if call_fn is None:
8017
      call_fn = self.rpc.call_iallocator_runner
8018

    
8019
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8020
    result.Raise("Failure while running the iallocator script")
8021

    
8022
    self.out_text = result.payload
8023
    if validate:
8024
      self._ValidateResult()
8025

    
8026
  def _ValidateResult(self):
8027
    """Process the allocator results.
8028

8029
    This will process and if successful save the result in
8030
    self.out_data and the other parameters.
8031

8032
    """
8033
    try:
8034
      rdict = serializer.Load(self.out_text)
8035
    except Exception, err:
8036
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8037

    
8038
    if not isinstance(rdict, dict):
8039
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8040

    
8041
    for key in "success", "info", "nodes":
8042
      if key not in rdict:
8043
        raise errors.OpExecError("Can't parse iallocator results:"
8044
                                 " missing key '%s'" % key)
8045
      setattr(self, key, rdict[key])
8046

    
8047
    if not isinstance(rdict["nodes"], list):
8048
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8049
                               " is not a list")
8050
    self.out_data = rdict
8051

    
8052

    
8053
class LUTestAllocator(NoHooksLU):
8054
  """Run allocator tests.
8055

8056
  This LU runs the allocator tests
8057

8058
  """
8059
  _OP_REQP = ["direction", "mode", "name"]
8060

    
8061
  def CheckPrereq(self):
8062
    """Check prerequisites.
8063

8064
    This checks the opcode parameters depending on the director and mode test.
8065

8066
    """
8067
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8068
      for attr in ["name", "mem_size", "disks", "disk_template",
8069
                   "os", "tags", "nics", "vcpus"]:
8070
        if not hasattr(self.op, attr):
8071
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8072
                                     attr)
8073
      iname = self.cfg.ExpandInstanceName(self.op.name)
8074
      if iname is not None:
8075
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8076
                                   iname)
8077
      if not isinstance(self.op.nics, list):
8078
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8079
      for row in self.op.nics:
8080
        if (not isinstance(row, dict) or
8081
            "mac" not in row or
8082
            "ip" not in row or
8083
            "bridge" not in row):
8084
          raise errors.OpPrereqError("Invalid contents of the"
8085
                                     " 'nics' parameter")
8086
      if not isinstance(self.op.disks, list):
8087
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8088
      for row in self.op.disks:
8089
        if (not isinstance(row, dict) or
8090
            "size" not in row or
8091
            not isinstance(row["size"], int) or
8092
            "mode" not in row or
8093
            row["mode"] not in ['r', 'w']):
8094
          raise errors.OpPrereqError("Invalid contents of the"
8095
                                     " 'disks' parameter")
8096
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8097
        self.op.hypervisor = self.cfg.GetHypervisorType()
8098
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8099
      if not hasattr(self.op, "name"):
8100
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8101
      fname = self.cfg.ExpandInstanceName(self.op.name)
8102
      if fname is None:
8103
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8104
                                   self.op.name)
8105
      self.op.name = fname
8106
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8107
    else:
8108
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8109
                                 self.op.mode)
8110

    
8111
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8112
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8113
        raise errors.OpPrereqError("Missing allocator name")
8114
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8115
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8116
                                 self.op.direction)
8117

    
8118
  def Exec(self, feedback_fn):
8119
    """Run the allocator test.
8120

8121
    """
8122
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8123
      ial = IAllocator(self.cfg, self.rpc,
8124
                       mode=self.op.mode,
8125
                       name=self.op.name,
8126
                       mem_size=self.op.mem_size,
8127
                       disks=self.op.disks,
8128
                       disk_template=self.op.disk_template,
8129
                       os=self.op.os,
8130
                       tags=self.op.tags,
8131
                       nics=self.op.nics,
8132
                       vcpus=self.op.vcpus,
8133
                       hypervisor=self.op.hypervisor,
8134
                       )
8135
    else:
8136
      ial = IAllocator(self.cfg, self.rpc,
8137
                       mode=self.op.mode,
8138
                       name=self.op.name,
8139
                       relocate_from=list(self.relocate_from),
8140
                       )
8141

    
8142
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8143
      result = ial.in_text
8144
    else:
8145
      ial.Run(self.op.allocator, validate=False)
8146
      result = ial.out_text
8147
    return result