Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ cd46f3b4

History | View | Annotate | Download (286.7 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool()
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _CheckNicsBridgesExist(lu, target_nics, target_node,
690
                               profile=constants.PP_DEFAULT):
691
  """Check that the brigdes needed by a list of nics exist.
692

693
  """
694
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
695
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
696
                for nic in target_nics]
697
  brlist = [params[constants.NIC_LINK] for params in paramslist
698
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
699
  if brlist:
700
    result = lu.rpc.call_bridges_exist(target_node, brlist)
701
    result.Raise("Error checking bridges on destination node '%s'" %
702
                 target_node, prereq=True)
703

    
704

    
705
def _CheckInstanceBridgesExist(lu, instance, node=None):
706
  """Check that the brigdes needed by an instance exist.
707

708
  """
709
  if node is None:
710
    node = instance.primary_node
711
  _CheckNicsBridgesExist(lu, instance.nics, node)
712

    
713

    
714
def _GetNodeInstancesInner(cfg, fn):
715
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
716

    
717

    
718
def _GetNodeInstances(cfg, node_name):
719
  """Returns a list of all primary and secondary instances on a node.
720

721
  """
722

    
723
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
724

    
725

    
726
def _GetNodePrimaryInstances(cfg, node_name):
727
  """Returns primary instances on a node.
728

729
  """
730
  return _GetNodeInstancesInner(cfg,
731
                                lambda inst: node_name == inst.primary_node)
732

    
733

    
734
def _GetNodeSecondaryInstances(cfg, node_name):
735
  """Returns secondary instances on a node.
736

737
  """
738
  return _GetNodeInstancesInner(cfg,
739
                                lambda inst: node_name in inst.secondary_nodes)
740

    
741

    
742
def _GetStorageTypeArgs(cfg, storage_type):
743
  """Returns the arguments for a storage type.
744

745
  """
746
  # Special case for file storage
747
  if storage_type == constants.ST_FILE:
748
    # storage.FileStorage wants a list of storage directories
749
    return [[cfg.GetFileStorageDir()]]
750

    
751
  return []
752

    
753

    
754
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
755
  faulty = []
756

    
757
  for dev in instance.disks:
758
    cfg.SetDiskID(dev, node_name)
759

    
760
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
761
  result.Raise("Failed to get disk status from node %s" % node_name,
762
               prereq=prereq)
763

    
764
  for idx, bdev_status in enumerate(result.payload):
765
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
766
      faulty.append(idx)
767

    
768
  return faulty
769

    
770

    
771
class LUPostInitCluster(LogicalUnit):
772
  """Logical unit for running hooks after cluster initialization.
773

774
  """
775
  HPATH = "cluster-init"
776
  HTYPE = constants.HTYPE_CLUSTER
777
  _OP_REQP = []
778

    
779
  def BuildHooksEnv(self):
780
    """Build hooks env.
781

782
    """
783
    env = {"OP_TARGET": self.cfg.GetClusterName()}
784
    mn = self.cfg.GetMasterNode()
785
    return env, [], [mn]
786

    
787
  def CheckPrereq(self):
788
    """No prerequisites to check.
789

790
    """
791
    return True
792

    
793
  def Exec(self, feedback_fn):
794
    """Nothing to do.
795

796
    """
797
    return True
798

    
799

    
800
class LUDestroyCluster(NoHooksLU):
801
  """Logical unit for destroying the cluster.
802

803
  """
804
  _OP_REQP = []
805

    
806
  def CheckPrereq(self):
807
    """Check prerequisites.
808

809
    This checks whether the cluster is empty.
810

811
    Any errors are signaled by raising errors.OpPrereqError.
812

813
    """
814
    master = self.cfg.GetMasterNode()
815

    
816
    nodelist = self.cfg.GetNodeList()
817
    if len(nodelist) != 1 or nodelist[0] != master:
818
      raise errors.OpPrereqError("There are still %d node(s) in"
819
                                 " this cluster." % (len(nodelist) - 1))
820
    instancelist = self.cfg.GetInstanceList()
821
    if instancelist:
822
      raise errors.OpPrereqError("There are still %d instance(s) in"
823
                                 " this cluster." % len(instancelist))
824

    
825
  def Exec(self, feedback_fn):
826
    """Destroys the cluster.
827

828
    """
829
    master = self.cfg.GetMasterNode()
830
    result = self.rpc.call_node_stop_master(master, False)
831
    result.Raise("Could not disable the master role")
832
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
833
    utils.CreateBackup(priv_key)
834
    utils.CreateBackup(pub_key)
835
    return master
836

    
837

    
838
class LUVerifyCluster(LogicalUnit):
839
  """Verifies the cluster status.
840

841
  """
842
  HPATH = "cluster-verify"
843
  HTYPE = constants.HTYPE_CLUSTER
844
  _OP_REQP = ["skip_checks"]
845
  REQ_BGL = False
846

    
847
  def ExpandNames(self):
848
    self.needed_locks = {
849
      locking.LEVEL_NODE: locking.ALL_SET,
850
      locking.LEVEL_INSTANCE: locking.ALL_SET,
851
    }
852
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
853

    
854
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
855
                  node_result, feedback_fn, master_files,
856
                  drbd_map, vg_name):
857
    """Run multiple tests against a node.
858

859
    Test list:
860

861
      - compares ganeti version
862
      - checks vg existence and size > 20G
863
      - checks config file checksum
864
      - checks ssh to other nodes
865

866
    @type nodeinfo: L{objects.Node}
867
    @param nodeinfo: the node to check
868
    @param file_list: required list of files
869
    @param local_cksum: dictionary of local files and their checksums
870
    @param node_result: the results from the node
871
    @param feedback_fn: function used to accumulate results
872
    @param master_files: list of files that only masters should have
873
    @param drbd_map: the useddrbd minors for this node, in
874
        form of minor: (instance, must_exist) which correspond to instances
875
        and their running status
876
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
877

878
    """
879
    node = nodeinfo.name
880

    
881
    # main result, node_result should be a non-empty dict
882
    if not node_result or not isinstance(node_result, dict):
883
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
884
      return True
885

    
886
    # compares ganeti version
887
    local_version = constants.PROTOCOL_VERSION
888
    remote_version = node_result.get('version', None)
889
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
890
            len(remote_version) == 2):
891
      feedback_fn("  - ERROR: connection to %s failed" % (node))
892
      return True
893

    
894
    if local_version != remote_version[0]:
895
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
896
                  " node %s %s" % (local_version, node, remote_version[0]))
897
      return True
898

    
899
    # node seems compatible, we can actually try to look into its results
900

    
901
    bad = False
902

    
903
    # full package version
904
    if constants.RELEASE_VERSION != remote_version[1]:
905
      feedback_fn("  - WARNING: software version mismatch: master %s,"
906
                  " node %s %s" %
907
                  (constants.RELEASE_VERSION, node, remote_version[1]))
908

    
909
    # checks vg existence and size > 20G
910
    if vg_name is not None:
911
      vglist = node_result.get(constants.NV_VGLIST, None)
912
      if not vglist:
913
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
914
                        (node,))
915
        bad = True
916
      else:
917
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
918
                                              constants.MIN_VG_SIZE)
919
        if vgstatus:
920
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
921
          bad = True
922

    
923
    # checks config file checksum
924

    
925
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
926
    if not isinstance(remote_cksum, dict):
927
      bad = True
928
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
929
    else:
930
      for file_name in file_list:
931
        node_is_mc = nodeinfo.master_candidate
932
        must_have_file = file_name not in master_files
933
        if file_name not in remote_cksum:
934
          if node_is_mc or must_have_file:
935
            bad = True
936
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
937
        elif remote_cksum[file_name] != local_cksum[file_name]:
938
          if node_is_mc or must_have_file:
939
            bad = True
940
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
941
          else:
942
            # not candidate and this is not a must-have file
943
            bad = True
944
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
945
                        " candidates (and the file is outdated)" % file_name)
946
        else:
947
          # all good, except non-master/non-must have combination
948
          if not node_is_mc and not must_have_file:
949
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
950
                        " candidates" % file_name)
951

    
952
    # checks ssh to any
953

    
954
    if constants.NV_NODELIST not in node_result:
955
      bad = True
956
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
957
    else:
958
      if node_result[constants.NV_NODELIST]:
959
        bad = True
960
        for node in node_result[constants.NV_NODELIST]:
961
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
962
                          (node, node_result[constants.NV_NODELIST][node]))
963

    
964
    if constants.NV_NODENETTEST not in node_result:
965
      bad = True
966
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
967
    else:
968
      if node_result[constants.NV_NODENETTEST]:
969
        bad = True
970
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
971
        for node in nlist:
972
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
973
                          (node, node_result[constants.NV_NODENETTEST][node]))
974

    
975
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
976
    if isinstance(hyp_result, dict):
977
      for hv_name, hv_result in hyp_result.iteritems():
978
        if hv_result is not None:
979
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
980
                      (hv_name, hv_result))
981

    
982
    # check used drbd list
983
    if vg_name is not None:
984
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
985
      if not isinstance(used_minors, (tuple, list)):
986
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
987
                    str(used_minors))
988
      else:
989
        for minor, (iname, must_exist) in drbd_map.items():
990
          if minor not in used_minors and must_exist:
991
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
992
                        " not active" % (minor, iname))
993
            bad = True
994
        for minor in used_minors:
995
          if minor not in drbd_map:
996
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
997
                        minor)
998
            bad = True
999

    
1000
    return bad
1001

    
1002
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1003
                      node_instance, feedback_fn, n_offline):
1004
    """Verify an instance.
1005

1006
    This function checks to see if the required block devices are
1007
    available on the instance's node.
1008

1009
    """
1010
    bad = False
1011

    
1012
    node_current = instanceconfig.primary_node
1013

    
1014
    node_vol_should = {}
1015
    instanceconfig.MapLVsByNode(node_vol_should)
1016

    
1017
    for node in node_vol_should:
1018
      if node in n_offline:
1019
        # ignore missing volumes on offline nodes
1020
        continue
1021
      for volume in node_vol_should[node]:
1022
        if node not in node_vol_is or volume not in node_vol_is[node]:
1023
          feedback_fn("  - ERROR: volume %s missing on node %s" %
1024
                          (volume, node))
1025
          bad = True
1026

    
1027
    if instanceconfig.admin_up:
1028
      if ((node_current not in node_instance or
1029
          not instance in node_instance[node_current]) and
1030
          node_current not in n_offline):
1031
        feedback_fn("  - ERROR: instance %s not running on node %s" %
1032
                        (instance, node_current))
1033
        bad = True
1034

    
1035
    for node in node_instance:
1036
      if (not node == node_current):
1037
        if instance in node_instance[node]:
1038
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
1039
                          (instance, node))
1040
          bad = True
1041

    
1042
    return bad
1043

    
1044
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
1045
    """Verify if there are any unknown volumes in the cluster.
1046

1047
    The .os, .swap and backup volumes are ignored. All other volumes are
1048
    reported as unknown.
1049

1050
    """
1051
    bad = False
1052

    
1053
    for node in node_vol_is:
1054
      for volume in node_vol_is[node]:
1055
        if node not in node_vol_should or volume not in node_vol_should[node]:
1056
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
1057
                      (volume, node))
1058
          bad = True
1059
    return bad
1060

    
1061
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
1062
    """Verify the list of running instances.
1063

1064
    This checks what instances are running but unknown to the cluster.
1065

1066
    """
1067
    bad = False
1068
    for node in node_instance:
1069
      for runninginstance in node_instance[node]:
1070
        if runninginstance not in instancelist:
1071
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
1072
                          (runninginstance, node))
1073
          bad = True
1074
    return bad
1075

    
1076
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
1077
    """Verify N+1 Memory Resilience.
1078

1079
    Check that if one single node dies we can still start all the instances it
1080
    was primary for.
1081

1082
    """
1083
    bad = False
1084

    
1085
    for node, nodeinfo in node_info.iteritems():
1086
      # This code checks that every node which is now listed as secondary has
1087
      # enough memory to host all instances it is supposed to should a single
1088
      # other node in the cluster fail.
1089
      # FIXME: not ready for failover to an arbitrary node
1090
      # FIXME: does not support file-backed instances
1091
      # WARNING: we currently take into account down instances as well as up
1092
      # ones, considering that even if they're down someone might want to start
1093
      # them even in the event of a node failure.
1094
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1095
        needed_mem = 0
1096
        for instance in instances:
1097
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1098
          if bep[constants.BE_AUTO_BALANCE]:
1099
            needed_mem += bep[constants.BE_MEMORY]
1100
        if nodeinfo['mfree'] < needed_mem:
1101
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1102
                      " failovers should node %s fail" % (node, prinode))
1103
          bad = True
1104
    return bad
1105

    
1106
  def CheckPrereq(self):
1107
    """Check prerequisites.
1108

1109
    Transform the list of checks we're going to skip into a set and check that
1110
    all its members are valid.
1111

1112
    """
1113
    self.skip_set = frozenset(self.op.skip_checks)
1114
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1115
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1116

    
1117
  def BuildHooksEnv(self):
1118
    """Build hooks env.
1119

1120
    Cluster-Verify hooks just ran in the post phase and their failure makes
1121
    the output be logged in the verify output and the verification to fail.
1122

1123
    """
1124
    all_nodes = self.cfg.GetNodeList()
1125
    env = {
1126
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1127
      }
1128
    for node in self.cfg.GetAllNodesInfo().values():
1129
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1130

    
1131
    return env, [], all_nodes
1132

    
1133
  def Exec(self, feedback_fn):
1134
    """Verify integrity of cluster, performing various test on nodes.
1135

1136
    """
1137
    bad = False
1138
    feedback_fn("* Verifying global settings")
1139
    for msg in self.cfg.VerifyConfig():
1140
      feedback_fn("  - ERROR: %s" % msg)
1141

    
1142
    vg_name = self.cfg.GetVGName()
1143
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1144
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1145
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1146
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1147
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1148
                        for iname in instancelist)
1149
    i_non_redundant = [] # Non redundant instances
1150
    i_non_a_balanced = [] # Non auto-balanced instances
1151
    n_offline = [] # List of offline nodes
1152
    n_drained = [] # List of nodes being drained
1153
    node_volume = {}
1154
    node_instance = {}
1155
    node_info = {}
1156
    instance_cfg = {}
1157

    
1158
    # FIXME: verify OS list
1159
    # do local checksums
1160
    master_files = [constants.CLUSTER_CONF_FILE]
1161

    
1162
    file_names = ssconf.SimpleStore().GetFileList()
1163
    file_names.append(constants.SSL_CERT_FILE)
1164
    file_names.append(constants.RAPI_CERT_FILE)
1165
    file_names.extend(master_files)
1166

    
1167
    local_checksums = utils.FingerprintFiles(file_names)
1168

    
1169
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1170
    node_verify_param = {
1171
      constants.NV_FILELIST: file_names,
1172
      constants.NV_NODELIST: [node.name for node in nodeinfo
1173
                              if not node.offline],
1174
      constants.NV_HYPERVISOR: hypervisors,
1175
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1176
                                  node.secondary_ip) for node in nodeinfo
1177
                                 if not node.offline],
1178
      constants.NV_INSTANCELIST: hypervisors,
1179
      constants.NV_VERSION: None,
1180
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1181
      }
1182
    if vg_name is not None:
1183
      node_verify_param[constants.NV_VGLIST] = None
1184
      node_verify_param[constants.NV_LVLIST] = vg_name
1185
      node_verify_param[constants.NV_DRBDLIST] = None
1186
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1187
                                           self.cfg.GetClusterName())
1188

    
1189
    cluster = self.cfg.GetClusterInfo()
1190
    master_node = self.cfg.GetMasterNode()
1191
    all_drbd_map = self.cfg.ComputeDRBDMap()
1192

    
1193
    for node_i in nodeinfo:
1194
      node = node_i.name
1195

    
1196
      if node_i.offline:
1197
        feedback_fn("* Skipping offline node %s" % (node,))
1198
        n_offline.append(node)
1199
        continue
1200

    
1201
      if node == master_node:
1202
        ntype = "master"
1203
      elif node_i.master_candidate:
1204
        ntype = "master candidate"
1205
      elif node_i.drained:
1206
        ntype = "drained"
1207
        n_drained.append(node)
1208
      else:
1209
        ntype = "regular"
1210
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1211

    
1212
      msg = all_nvinfo[node].fail_msg
1213
      if msg:
1214
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1215
        bad = True
1216
        continue
1217

    
1218
      nresult = all_nvinfo[node].payload
1219
      node_drbd = {}
1220
      for minor, instance in all_drbd_map[node].items():
1221
        if instance not in instanceinfo:
1222
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1223
                      instance)
1224
          # ghost instance should not be running, but otherwise we
1225
          # don't give double warnings (both ghost instance and
1226
          # unallocated minor in use)
1227
          node_drbd[minor] = (instance, False)
1228
        else:
1229
          instance = instanceinfo[instance]
1230
          node_drbd[minor] = (instance.name, instance.admin_up)
1231
      result = self._VerifyNode(node_i, file_names, local_checksums,
1232
                                nresult, feedback_fn, master_files,
1233
                                node_drbd, vg_name)
1234
      bad = bad or result
1235

    
1236
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1237
      if vg_name is None:
1238
        node_volume[node] = {}
1239
      elif isinstance(lvdata, basestring):
1240
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1241
                    (node, utils.SafeEncode(lvdata)))
1242
        bad = True
1243
        node_volume[node] = {}
1244
      elif not isinstance(lvdata, dict):
1245
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1246
        bad = True
1247
        continue
1248
      else:
1249
        node_volume[node] = lvdata
1250

    
1251
      # node_instance
1252
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1253
      if not isinstance(idata, list):
1254
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1255
                    (node,))
1256
        bad = True
1257
        continue
1258

    
1259
      node_instance[node] = idata
1260

    
1261
      # node_info
1262
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1263
      if not isinstance(nodeinfo, dict):
1264
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1265
        bad = True
1266
        continue
1267

    
1268
      try:
1269
        node_info[node] = {
1270
          "mfree": int(nodeinfo['memory_free']),
1271
          "pinst": [],
1272
          "sinst": [],
1273
          # dictionary holding all instances this node is secondary for,
1274
          # grouped by their primary node. Each key is a cluster node, and each
1275
          # value is a list of instances which have the key as primary and the
1276
          # current node as secondary.  this is handy to calculate N+1 memory
1277
          # availability if you can only failover from a primary to its
1278
          # secondary.
1279
          "sinst-by-pnode": {},
1280
        }
1281
        # FIXME: devise a free space model for file based instances as well
1282
        if vg_name is not None:
1283
          if (constants.NV_VGLIST not in nresult or
1284
              vg_name not in nresult[constants.NV_VGLIST]):
1285
            feedback_fn("  - ERROR: node %s didn't return data for the"
1286
                        " volume group '%s' - it is either missing or broken" %
1287
                        (node, vg_name))
1288
            bad = True
1289
            continue
1290
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1291
      except (ValueError, KeyError):
1292
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1293
                    " from node %s" % (node,))
1294
        bad = True
1295
        continue
1296

    
1297
    node_vol_should = {}
1298

    
1299
    for instance in instancelist:
1300
      feedback_fn("* Verifying instance %s" % instance)
1301
      inst_config = instanceinfo[instance]
1302
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1303
                                     node_instance, feedback_fn, n_offline)
1304
      bad = bad or result
1305
      inst_nodes_offline = []
1306

    
1307
      inst_config.MapLVsByNode(node_vol_should)
1308

    
1309
      instance_cfg[instance] = inst_config
1310

    
1311
      pnode = inst_config.primary_node
1312
      if pnode in node_info:
1313
        node_info[pnode]['pinst'].append(instance)
1314
      elif pnode not in n_offline:
1315
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1316
                    " %s failed" % (instance, pnode))
1317
        bad = True
1318

    
1319
      if pnode in n_offline:
1320
        inst_nodes_offline.append(pnode)
1321

    
1322
      # If the instance is non-redundant we cannot survive losing its primary
1323
      # node, so we are not N+1 compliant. On the other hand we have no disk
1324
      # templates with more than one secondary so that situation is not well
1325
      # supported either.
1326
      # FIXME: does not support file-backed instances
1327
      if len(inst_config.secondary_nodes) == 0:
1328
        i_non_redundant.append(instance)
1329
      elif len(inst_config.secondary_nodes) > 1:
1330
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1331
                    % instance)
1332

    
1333
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1334
        i_non_a_balanced.append(instance)
1335

    
1336
      for snode in inst_config.secondary_nodes:
1337
        if snode in node_info:
1338
          node_info[snode]['sinst'].append(instance)
1339
          if pnode not in node_info[snode]['sinst-by-pnode']:
1340
            node_info[snode]['sinst-by-pnode'][pnode] = []
1341
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1342
        elif snode not in n_offline:
1343
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1344
                      " %s failed" % (instance, snode))
1345
          bad = True
1346
        if snode in n_offline:
1347
          inst_nodes_offline.append(snode)
1348

    
1349
      if inst_nodes_offline:
1350
        # warn that the instance lives on offline nodes, and set bad=True
1351
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1352
                    ", ".join(inst_nodes_offline))
1353
        bad = True
1354

    
1355
    feedback_fn("* Verifying orphan volumes")
1356
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1357
                                       feedback_fn)
1358
    bad = bad or result
1359

    
1360
    feedback_fn("* Verifying remaining instances")
1361
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1362
                                         feedback_fn)
1363
    bad = bad or result
1364

    
1365
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1366
      feedback_fn("* Verifying N+1 Memory redundancy")
1367
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1368
      bad = bad or result
1369

    
1370
    feedback_fn("* Other Notes")
1371
    if i_non_redundant:
1372
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1373
                  % len(i_non_redundant))
1374

    
1375
    if i_non_a_balanced:
1376
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1377
                  % len(i_non_a_balanced))
1378

    
1379
    if n_offline:
1380
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1381

    
1382
    if n_drained:
1383
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1384

    
1385
    return not bad
1386

    
1387
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1388
    """Analyze the post-hooks' result
1389

1390
    This method analyses the hook result, handles it, and sends some
1391
    nicely-formatted feedback back to the user.
1392

1393
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1394
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1395
    @param hooks_results: the results of the multi-node hooks rpc call
1396
    @param feedback_fn: function used send feedback back to the caller
1397
    @param lu_result: previous Exec result
1398
    @return: the new Exec result, based on the previous result
1399
        and hook results
1400

1401
    """
1402
    # We only really run POST phase hooks, and are only interested in
1403
    # their results
1404
    if phase == constants.HOOKS_PHASE_POST:
1405
      # Used to change hooks' output to proper indentation
1406
      indent_re = re.compile('^', re.M)
1407
      feedback_fn("* Hooks Results")
1408
      if not hooks_results:
1409
        feedback_fn("  - ERROR: general communication failure")
1410
        lu_result = 1
1411
      else:
1412
        for node_name in hooks_results:
1413
          show_node_header = True
1414
          res = hooks_results[node_name]
1415
          msg = res.fail_msg
1416
          if msg:
1417
            if res.offline:
1418
              # no need to warn or set fail return value
1419
              continue
1420
            feedback_fn("    Communication failure in hooks execution: %s" %
1421
                        msg)
1422
            lu_result = 1
1423
            continue
1424
          for script, hkr, output in res.payload:
1425
            if hkr == constants.HKR_FAIL:
1426
              # The node header is only shown once, if there are
1427
              # failing hooks on that node
1428
              if show_node_header:
1429
                feedback_fn("  Node %s:" % node_name)
1430
                show_node_header = False
1431
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1432
              output = indent_re.sub('      ', output)
1433
              feedback_fn("%s" % output)
1434
              lu_result = 1
1435

    
1436
      return lu_result
1437

    
1438

    
1439
class LUVerifyDisks(NoHooksLU):
1440
  """Verifies the cluster disks status.
1441

1442
  """
1443
  _OP_REQP = []
1444
  REQ_BGL = False
1445

    
1446
  def ExpandNames(self):
1447
    self.needed_locks = {
1448
      locking.LEVEL_NODE: locking.ALL_SET,
1449
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1450
    }
1451
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1452

    
1453
  def CheckPrereq(self):
1454
    """Check prerequisites.
1455

1456
    This has no prerequisites.
1457

1458
    """
1459
    pass
1460

    
1461
  def Exec(self, feedback_fn):
1462
    """Verify integrity of cluster disks.
1463

1464
    @rtype: tuple of three items
1465
    @return: a tuple of (dict of node-to-node_error, list of instances
1466
        which need activate-disks, dict of instance: (node, volume) for
1467
        missing volumes
1468

1469
    """
1470
    result = res_nodes, res_instances, res_missing = {}, [], {}
1471

    
1472
    vg_name = self.cfg.GetVGName()
1473
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1474
    instances = [self.cfg.GetInstanceInfo(name)
1475
                 for name in self.cfg.GetInstanceList()]
1476

    
1477
    nv_dict = {}
1478
    for inst in instances:
1479
      inst_lvs = {}
1480
      if (not inst.admin_up or
1481
          inst.disk_template not in constants.DTS_NET_MIRROR):
1482
        continue
1483
      inst.MapLVsByNode(inst_lvs)
1484
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1485
      for node, vol_list in inst_lvs.iteritems():
1486
        for vol in vol_list:
1487
          nv_dict[(node, vol)] = inst
1488

    
1489
    if not nv_dict:
1490
      return result
1491

    
1492
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1493

    
1494
    for node in nodes:
1495
      # node_volume
1496
      node_res = node_lvs[node]
1497
      if node_res.offline:
1498
        continue
1499
      msg = node_res.fail_msg
1500
      if msg:
1501
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1502
        res_nodes[node] = msg
1503
        continue
1504

    
1505
      lvs = node_res.payload
1506
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1507
        inst = nv_dict.pop((node, lv_name), None)
1508
        if (not lv_online and inst is not None
1509
            and inst.name not in res_instances):
1510
          res_instances.append(inst.name)
1511

    
1512
    # any leftover items in nv_dict are missing LVs, let's arrange the
1513
    # data better
1514
    for key, inst in nv_dict.iteritems():
1515
      if inst.name not in res_missing:
1516
        res_missing[inst.name] = []
1517
      res_missing[inst.name].append(key)
1518

    
1519
    return result
1520

    
1521

    
1522
class LURepairDiskSizes(NoHooksLU):
1523
  """Verifies the cluster disks sizes.
1524

1525
  """
1526
  _OP_REQP = ["instances"]
1527
  REQ_BGL = False
1528

    
1529
  def ExpandNames(self):
1530

    
1531
    if not isinstance(self.op.instances, list):
1532
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1533

    
1534
    if self.op.instances:
1535
      self.wanted_names = []
1536
      for name in self.op.instances:
1537
        full_name = self.cfg.ExpandInstanceName(name)
1538
        if full_name is None:
1539
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1540
        self.wanted_names.append(full_name)
1541
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1542
      self.needed_locks = {
1543
        locking.LEVEL_NODE: [],
1544
        locking.LEVEL_INSTANCE: self.wanted_names,
1545
        }
1546
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1547
    else:
1548
      self.wanted_names = None
1549
      self.needed_locks = {
1550
        locking.LEVEL_NODE: locking.ALL_SET,
1551
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1552
        }
1553
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1554

    
1555
  def DeclareLocks(self, level):
1556
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1557
      self._LockInstancesNodes(primary_only=True)
1558

    
1559
  def CheckPrereq(self):
1560
    """Check prerequisites.
1561

1562
    This only checks the optional instance list against the existing names.
1563

1564
    """
1565
    if self.wanted_names is None:
1566
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1567

    
1568
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1569
                             in self.wanted_names]
1570

    
1571
  def Exec(self, feedback_fn):
1572
    """Verify the size of cluster disks.
1573

1574
    """
1575
    # TODO: check child disks too
1576
    # TODO: check differences in size between primary/secondary nodes
1577
    per_node_disks = {}
1578
    for instance in self.wanted_instances:
1579
      pnode = instance.primary_node
1580
      if pnode not in per_node_disks:
1581
        per_node_disks[pnode] = []
1582
      for idx, disk in enumerate(instance.disks):
1583
        per_node_disks[pnode].append((instance, idx, disk))
1584

    
1585
    changed = []
1586
    for node, dskl in per_node_disks.items():
1587
      result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1588
      if result.failed:
1589
        self.LogWarning("Failure in blockdev_getsizes call to node"
1590
                        " %s, ignoring", node)
1591
        continue
1592
      if len(result.data) != len(dskl):
1593
        self.LogWarning("Invalid result from node %s, ignoring node results",
1594
                        node)
1595
        continue
1596
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1597
        if size is None:
1598
          self.LogWarning("Disk %d of instance %s did not return size"
1599
                          " information, ignoring", idx, instance.name)
1600
          continue
1601
        if not isinstance(size, (int, long)):
1602
          self.LogWarning("Disk %d of instance %s did not return valid"
1603
                          " size information, ignoring", idx, instance.name)
1604
          continue
1605
        size = size >> 20
1606
        if size != disk.size:
1607
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1608
                       " correcting: recorded %d, actual %d", idx,
1609
                       instance.name, disk.size, size)
1610
          disk.size = size
1611
          self.cfg.Update(instance)
1612
          changed.append((instance.name, idx, size))
1613
    return changed
1614

    
1615

    
1616
class LURenameCluster(LogicalUnit):
1617
  """Rename the cluster.
1618

1619
  """
1620
  HPATH = "cluster-rename"
1621
  HTYPE = constants.HTYPE_CLUSTER
1622
  _OP_REQP = ["name"]
1623

    
1624
  def BuildHooksEnv(self):
1625
    """Build hooks env.
1626

1627
    """
1628
    env = {
1629
      "OP_TARGET": self.cfg.GetClusterName(),
1630
      "NEW_NAME": self.op.name,
1631
      }
1632
    mn = self.cfg.GetMasterNode()
1633
    return env, [mn], [mn]
1634

    
1635
  def CheckPrereq(self):
1636
    """Verify that the passed name is a valid one.
1637

1638
    """
1639
    hostname = utils.HostInfo(self.op.name)
1640

    
1641
    new_name = hostname.name
1642
    self.ip = new_ip = hostname.ip
1643
    old_name = self.cfg.GetClusterName()
1644
    old_ip = self.cfg.GetMasterIP()
1645
    if new_name == old_name and new_ip == old_ip:
1646
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1647
                                 " cluster has changed")
1648
    if new_ip != old_ip:
1649
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1650
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1651
                                   " reachable on the network. Aborting." %
1652
                                   new_ip)
1653

    
1654
    self.op.name = new_name
1655

    
1656
  def Exec(self, feedback_fn):
1657
    """Rename the cluster.
1658

1659
    """
1660
    clustername = self.op.name
1661
    ip = self.ip
1662

    
1663
    # shutdown the master IP
1664
    master = self.cfg.GetMasterNode()
1665
    result = self.rpc.call_node_stop_master(master, False)
1666
    result.Raise("Could not disable the master role")
1667

    
1668
    try:
1669
      cluster = self.cfg.GetClusterInfo()
1670
      cluster.cluster_name = clustername
1671
      cluster.master_ip = ip
1672
      self.cfg.Update(cluster)
1673

    
1674
      # update the known hosts file
1675
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1676
      node_list = self.cfg.GetNodeList()
1677
      try:
1678
        node_list.remove(master)
1679
      except ValueError:
1680
        pass
1681
      result = self.rpc.call_upload_file(node_list,
1682
                                         constants.SSH_KNOWN_HOSTS_FILE)
1683
      for to_node, to_result in result.iteritems():
1684
        msg = to_result.fail_msg
1685
        if msg:
1686
          msg = ("Copy of file %s to node %s failed: %s" %
1687
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1688
          self.proc.LogWarning(msg)
1689

    
1690
    finally:
1691
      result = self.rpc.call_node_start_master(master, False, False)
1692
      msg = result.fail_msg
1693
      if msg:
1694
        self.LogWarning("Could not re-enable the master role on"
1695
                        " the master, please restart manually: %s", msg)
1696

    
1697

    
1698
def _RecursiveCheckIfLVMBased(disk):
1699
  """Check if the given disk or its children are lvm-based.
1700

1701
  @type disk: L{objects.Disk}
1702
  @param disk: the disk to check
1703
  @rtype: boolean
1704
  @return: boolean indicating whether a LD_LV dev_type was found or not
1705

1706
  """
1707
  if disk.children:
1708
    for chdisk in disk.children:
1709
      if _RecursiveCheckIfLVMBased(chdisk):
1710
        return True
1711
  return disk.dev_type == constants.LD_LV
1712

    
1713

    
1714
class LUSetClusterParams(LogicalUnit):
1715
  """Change the parameters of the cluster.
1716

1717
  """
1718
  HPATH = "cluster-modify"
1719
  HTYPE = constants.HTYPE_CLUSTER
1720
  _OP_REQP = []
1721
  REQ_BGL = False
1722

    
1723
  def CheckArguments(self):
1724
    """Check parameters
1725

1726
    """
1727
    if not hasattr(self.op, "candidate_pool_size"):
1728
      self.op.candidate_pool_size = None
1729
    if self.op.candidate_pool_size is not None:
1730
      try:
1731
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1732
      except (ValueError, TypeError), err:
1733
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1734
                                   str(err))
1735
      if self.op.candidate_pool_size < 1:
1736
        raise errors.OpPrereqError("At least one master candidate needed")
1737

    
1738
  def ExpandNames(self):
1739
    # FIXME: in the future maybe other cluster params won't require checking on
1740
    # all nodes to be modified.
1741
    self.needed_locks = {
1742
      locking.LEVEL_NODE: locking.ALL_SET,
1743
    }
1744
    self.share_locks[locking.LEVEL_NODE] = 1
1745

    
1746
  def BuildHooksEnv(self):
1747
    """Build hooks env.
1748

1749
    """
1750
    env = {
1751
      "OP_TARGET": self.cfg.GetClusterName(),
1752
      "NEW_VG_NAME": self.op.vg_name,
1753
      }
1754
    mn = self.cfg.GetMasterNode()
1755
    return env, [mn], [mn]
1756

    
1757
  def CheckPrereq(self):
1758
    """Check prerequisites.
1759

1760
    This checks whether the given params don't conflict and
1761
    if the given volume group is valid.
1762

1763
    """
1764
    if self.op.vg_name is not None and not self.op.vg_name:
1765
      instances = self.cfg.GetAllInstancesInfo().values()
1766
      for inst in instances:
1767
        for disk in inst.disks:
1768
          if _RecursiveCheckIfLVMBased(disk):
1769
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1770
                                       " lvm-based instances exist")
1771

    
1772
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1773

    
1774
    # if vg_name not None, checks given volume group on all nodes
1775
    if self.op.vg_name:
1776
      vglist = self.rpc.call_vg_list(node_list)
1777
      for node in node_list:
1778
        msg = vglist[node].fail_msg
1779
        if msg:
1780
          # ignoring down node
1781
          self.LogWarning("Error while gathering data on node %s"
1782
                          " (ignoring node): %s", node, msg)
1783
          continue
1784
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1785
                                              self.op.vg_name,
1786
                                              constants.MIN_VG_SIZE)
1787
        if vgstatus:
1788
          raise errors.OpPrereqError("Error on node '%s': %s" %
1789
                                     (node, vgstatus))
1790

    
1791
    self.cluster = cluster = self.cfg.GetClusterInfo()
1792
    # validate params changes
1793
    if self.op.beparams:
1794
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1795
      self.new_beparams = objects.FillDict(
1796
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1797

    
1798
    if self.op.nicparams:
1799
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1800
      self.new_nicparams = objects.FillDict(
1801
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1802
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1803

    
1804
    # hypervisor list/parameters
1805
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1806
    if self.op.hvparams:
1807
      if not isinstance(self.op.hvparams, dict):
1808
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1809
      for hv_name, hv_dict in self.op.hvparams.items():
1810
        if hv_name not in self.new_hvparams:
1811
          self.new_hvparams[hv_name] = hv_dict
1812
        else:
1813
          self.new_hvparams[hv_name].update(hv_dict)
1814

    
1815
    if self.op.enabled_hypervisors is not None:
1816
      self.hv_list = self.op.enabled_hypervisors
1817
      if not self.hv_list:
1818
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1819
                                   " least one member")
1820
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1821
      if invalid_hvs:
1822
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1823
                                   " entries: %s" %
1824
                                   utils.CommaJoin(invalid_hvs))
1825
    else:
1826
      self.hv_list = cluster.enabled_hypervisors
1827

    
1828
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1829
      # either the enabled list has changed, or the parameters have, validate
1830
      for hv_name, hv_params in self.new_hvparams.items():
1831
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1832
            (self.op.enabled_hypervisors and
1833
             hv_name in self.op.enabled_hypervisors)):
1834
          # either this is a new hypervisor, or its parameters have changed
1835
          hv_class = hypervisor.GetHypervisor(hv_name)
1836
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1837
          hv_class.CheckParameterSyntax(hv_params)
1838
          _CheckHVParams(self, node_list, hv_name, hv_params)
1839

    
1840
  def Exec(self, feedback_fn):
1841
    """Change the parameters of the cluster.
1842

1843
    """
1844
    if self.op.vg_name is not None:
1845
      new_volume = self.op.vg_name
1846
      if not new_volume:
1847
        new_volume = None
1848
      if new_volume != self.cfg.GetVGName():
1849
        self.cfg.SetVGName(new_volume)
1850
      else:
1851
        feedback_fn("Cluster LVM configuration already in desired"
1852
                    " state, not changing")
1853
    if self.op.hvparams:
1854
      self.cluster.hvparams = self.new_hvparams
1855
    if self.op.enabled_hypervisors is not None:
1856
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1857
    if self.op.beparams:
1858
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1859
    if self.op.nicparams:
1860
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1861

    
1862
    if self.op.candidate_pool_size is not None:
1863
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1864
      # we need to update the pool size here, otherwise the save will fail
1865
      _AdjustCandidatePool(self)
1866

    
1867
    self.cfg.Update(self.cluster)
1868

    
1869

    
1870
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1871
  """Distribute additional files which are part of the cluster configuration.
1872

1873
  ConfigWriter takes care of distributing the config and ssconf files, but
1874
  there are more files which should be distributed to all nodes. This function
1875
  makes sure those are copied.
1876

1877
  @param lu: calling logical unit
1878
  @param additional_nodes: list of nodes not in the config to distribute to
1879

1880
  """
1881
  # 1. Gather target nodes
1882
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1883
  dist_nodes = lu.cfg.GetNodeList()
1884
  if additional_nodes is not None:
1885
    dist_nodes.extend(additional_nodes)
1886
  if myself.name in dist_nodes:
1887
    dist_nodes.remove(myself.name)
1888
  # 2. Gather files to distribute
1889
  dist_files = set([constants.ETC_HOSTS,
1890
                    constants.SSH_KNOWN_HOSTS_FILE,
1891
                    constants.RAPI_CERT_FILE,
1892
                    constants.RAPI_USERS_FILE,
1893
                    constants.HMAC_CLUSTER_KEY,
1894
                   ])
1895

    
1896
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1897
  for hv_name in enabled_hypervisors:
1898
    hv_class = hypervisor.GetHypervisor(hv_name)
1899
    dist_files.update(hv_class.GetAncillaryFiles())
1900

    
1901
  # 3. Perform the files upload
1902
  for fname in dist_files:
1903
    if os.path.exists(fname):
1904
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1905
      for to_node, to_result in result.items():
1906
        msg = to_result.fail_msg
1907
        if msg:
1908
          msg = ("Copy of file %s to node %s failed: %s" %
1909
                 (fname, to_node, msg))
1910
          lu.proc.LogWarning(msg)
1911

    
1912

    
1913
class LURedistributeConfig(NoHooksLU):
1914
  """Force the redistribution of cluster configuration.
1915

1916
  This is a very simple LU.
1917

1918
  """
1919
  _OP_REQP = []
1920
  REQ_BGL = False
1921

    
1922
  def ExpandNames(self):
1923
    self.needed_locks = {
1924
      locking.LEVEL_NODE: locking.ALL_SET,
1925
    }
1926
    self.share_locks[locking.LEVEL_NODE] = 1
1927

    
1928
  def CheckPrereq(self):
1929
    """Check prerequisites.
1930

1931
    """
1932

    
1933
  def Exec(self, feedback_fn):
1934
    """Redistribute the configuration.
1935

1936
    """
1937
    self.cfg.Update(self.cfg.GetClusterInfo())
1938
    _RedistributeAncillaryFiles(self)
1939

    
1940

    
1941
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1942
  """Sleep and poll for an instance's disk to sync.
1943

1944
  """
1945
  if not instance.disks:
1946
    return True
1947

    
1948
  if not oneshot:
1949
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1950

    
1951
  node = instance.primary_node
1952

    
1953
  for dev in instance.disks:
1954
    lu.cfg.SetDiskID(dev, node)
1955

    
1956
  retries = 0
1957
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1958
  while True:
1959
    max_time = 0
1960
    done = True
1961
    cumul_degraded = False
1962
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1963
    msg = rstats.fail_msg
1964
    if msg:
1965
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1966
      retries += 1
1967
      if retries >= 10:
1968
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1969
                                 " aborting." % node)
1970
      time.sleep(6)
1971
      continue
1972
    rstats = rstats.payload
1973
    retries = 0
1974
    for i, mstat in enumerate(rstats):
1975
      if mstat is None:
1976
        lu.LogWarning("Can't compute data for node %s/%s",
1977
                           node, instance.disks[i].iv_name)
1978
        continue
1979

    
1980
      cumul_degraded = (cumul_degraded or
1981
                        (mstat.is_degraded and mstat.sync_percent is None))
1982
      if mstat.sync_percent is not None:
1983
        done = False
1984
        if mstat.estimated_time is not None:
1985
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
1986
          max_time = mstat.estimated_time
1987
        else:
1988
          rem_time = "no time estimate"
1989
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
1990
                        (instance.disks[i].iv_name, mstat.sync_percent, rem_time))
1991

    
1992
    # if we're done but degraded, let's do a few small retries, to
1993
    # make sure we see a stable and not transient situation; therefore
1994
    # we force restart of the loop
1995
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
1996
      logging.info("Degraded disks found, %d retries left", degr_retries)
1997
      degr_retries -= 1
1998
      time.sleep(1)
1999
      continue
2000

    
2001
    if done or oneshot:
2002
      break
2003

    
2004
    time.sleep(min(60, max_time))
2005

    
2006
  if done:
2007
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2008
  return not cumul_degraded
2009

    
2010

    
2011
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2012
  """Check that mirrors are not degraded.
2013

2014
  The ldisk parameter, if True, will change the test from the
2015
  is_degraded attribute (which represents overall non-ok status for
2016
  the device(s)) to the ldisk (representing the local storage status).
2017

2018
  """
2019
  lu.cfg.SetDiskID(dev, node)
2020

    
2021
  result = True
2022

    
2023
  if on_primary or dev.AssembleOnSecondary():
2024
    rstats = lu.rpc.call_blockdev_find(node, dev)
2025
    msg = rstats.fail_msg
2026
    if msg:
2027
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2028
      result = False
2029
    elif not rstats.payload:
2030
      lu.LogWarning("Can't find disk on node %s", node)
2031
      result = False
2032
    else:
2033
      if ldisk:
2034
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2035
      else:
2036
        result = result and not rstats.payload.is_degraded
2037

    
2038
  if dev.children:
2039
    for child in dev.children:
2040
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2041

    
2042
  return result
2043

    
2044

    
2045
class LUDiagnoseOS(NoHooksLU):
2046
  """Logical unit for OS diagnose/query.
2047

2048
  """
2049
  _OP_REQP = ["output_fields", "names"]
2050
  REQ_BGL = False
2051
  _FIELDS_STATIC = utils.FieldSet()
2052
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
2053

    
2054
  def ExpandNames(self):
2055
    if self.op.names:
2056
      raise errors.OpPrereqError("Selective OS query not supported")
2057

    
2058
    _CheckOutputFields(static=self._FIELDS_STATIC,
2059
                       dynamic=self._FIELDS_DYNAMIC,
2060
                       selected=self.op.output_fields)
2061

    
2062
    # Lock all nodes, in shared mode
2063
    # Temporary removal of locks, should be reverted later
2064
    # TODO: reintroduce locks when they are lighter-weight
2065
    self.needed_locks = {}
2066
    #self.share_locks[locking.LEVEL_NODE] = 1
2067
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2068

    
2069
  def CheckPrereq(self):
2070
    """Check prerequisites.
2071

2072
    """
2073

    
2074
  @staticmethod
2075
  def _DiagnoseByOS(node_list, rlist):
2076
    """Remaps a per-node return list into an a per-os per-node dictionary
2077

2078
    @param node_list: a list with the names of all nodes
2079
    @param rlist: a map with node names as keys and OS objects as values
2080

2081
    @rtype: dict
2082
    @return: a dictionary with osnames as keys and as value another map, with
2083
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2084

2085
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2086
                                     (/srv/..., False, "invalid api")],
2087
                           "node2": [(/srv/..., True, "")]}
2088
          }
2089

2090
    """
2091
    all_os = {}
2092
    # we build here the list of nodes that didn't fail the RPC (at RPC
2093
    # level), so that nodes with a non-responding node daemon don't
2094
    # make all OSes invalid
2095
    good_nodes = [node_name for node_name in rlist
2096
                  if not rlist[node_name].fail_msg]
2097
    for node_name, nr in rlist.items():
2098
      if nr.fail_msg or not nr.payload:
2099
        continue
2100
      for name, path, status, diagnose in nr.payload:
2101
        if name not in all_os:
2102
          # build a list of nodes for this os containing empty lists
2103
          # for each node in node_list
2104
          all_os[name] = {}
2105
          for nname in good_nodes:
2106
            all_os[name][nname] = []
2107
        all_os[name][node_name].append((path, status, diagnose))
2108
    return all_os
2109

    
2110
  def Exec(self, feedback_fn):
2111
    """Compute the list of OSes.
2112

2113
    """
2114
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2115
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2116
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2117
    output = []
2118
    for os_name, os_data in pol.items():
2119
      row = []
2120
      for field in self.op.output_fields:
2121
        if field == "name":
2122
          val = os_name
2123
        elif field == "valid":
2124
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
2125
        elif field == "node_status":
2126
          # this is just a copy of the dict
2127
          val = {}
2128
          for node_name, nos_list in os_data.items():
2129
            val[node_name] = nos_list
2130
        else:
2131
          raise errors.ParameterError(field)
2132
        row.append(val)
2133
      output.append(row)
2134

    
2135
    return output
2136

    
2137

    
2138
class LURemoveNode(LogicalUnit):
2139
  """Logical unit for removing a node.
2140

2141
  """
2142
  HPATH = "node-remove"
2143
  HTYPE = constants.HTYPE_NODE
2144
  _OP_REQP = ["node_name"]
2145

    
2146
  def BuildHooksEnv(self):
2147
    """Build hooks env.
2148

2149
    This doesn't run on the target node in the pre phase as a failed
2150
    node would then be impossible to remove.
2151

2152
    """
2153
    env = {
2154
      "OP_TARGET": self.op.node_name,
2155
      "NODE_NAME": self.op.node_name,
2156
      }
2157
    all_nodes = self.cfg.GetNodeList()
2158
    if self.op.node_name in all_nodes:
2159
      all_nodes.remove(self.op.node_name)
2160
    return env, all_nodes, all_nodes
2161

    
2162
  def CheckPrereq(self):
2163
    """Check prerequisites.
2164

2165
    This checks:
2166
     - the node exists in the configuration
2167
     - it does not have primary or secondary instances
2168
     - it's not the master
2169

2170
    Any errors are signaled by raising errors.OpPrereqError.
2171

2172
    """
2173
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2174
    if node is None:
2175
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2176

    
2177
    instance_list = self.cfg.GetInstanceList()
2178

    
2179
    masternode = self.cfg.GetMasterNode()
2180
    if node.name == masternode:
2181
      raise errors.OpPrereqError("Node is the master node,"
2182
                                 " you need to failover first.")
2183

    
2184
    for instance_name in instance_list:
2185
      instance = self.cfg.GetInstanceInfo(instance_name)
2186
      if node.name in instance.all_nodes:
2187
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2188
                                   " please remove first." % instance_name)
2189
    self.op.node_name = node.name
2190
    self.node = node
2191

    
2192
  def Exec(self, feedback_fn):
2193
    """Removes the node from the cluster.
2194

2195
    """
2196
    node = self.node
2197
    logging.info("Stopping the node daemon and removing configs from node %s",
2198
                 node.name)
2199

    
2200
    self.context.RemoveNode(node.name)
2201

    
2202
    # Run post hooks on the node before it's removed
2203
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2204
    try:
2205
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2206
    finally:
2207
      res = h_results[node.name]
2208
      if res.fail_msg:
2209
        if not res.offline:
2210
          self.LogError("Failed to start hooks on %s: %s" %
2211
                        (node.name, res.fail_msg))
2212
      for script, hkr, output in res.payload:
2213
        if hkr != constants.HKR_FAIL:
2214
          continue
2215
        if output:
2216
          self.LogWarning("On %s script %s failed, output:  %s" %
2217
                          (node.name, script, output))
2218
        else:
2219
          self.LogWarning("On %s script %s failed (no output)." %
2220
                          (node.name, script))
2221

    
2222
    result = self.rpc.call_node_leave_cluster(node.name)
2223
    msg = result.fail_msg
2224
    if msg:
2225
      self.LogWarning("Errors encountered on the remote node while leaving"
2226
                      " the cluster: %s", msg)
2227

    
2228
    # Promote nodes to master candidate as needed
2229
    _AdjustCandidatePool(self)
2230

    
2231

    
2232
class LUQueryNodes(NoHooksLU):
2233
  """Logical unit for querying nodes.
2234

2235
  """
2236
  _OP_REQP = ["output_fields", "names", "use_locking"]
2237
  REQ_BGL = False
2238
  _FIELDS_DYNAMIC = utils.FieldSet(
2239
    "dtotal", "dfree",
2240
    "mtotal", "mnode", "mfree",
2241
    "bootid",
2242
    "ctotal", "cnodes", "csockets",
2243
    )
2244

    
2245
  _FIELDS_STATIC = utils.FieldSet(
2246
    "name", "pinst_cnt", "sinst_cnt",
2247
    "pinst_list", "sinst_list",
2248
    "pip", "sip", "tags",
2249
    "serial_no", "ctime", "mtime",
2250
    "master_candidate",
2251
    "master",
2252
    "offline",
2253
    "drained",
2254
    "role",
2255
    )
2256

    
2257
  def ExpandNames(self):
2258
    _CheckOutputFields(static=self._FIELDS_STATIC,
2259
                       dynamic=self._FIELDS_DYNAMIC,
2260
                       selected=self.op.output_fields)
2261

    
2262
    self.needed_locks = {}
2263
    self.share_locks[locking.LEVEL_NODE] = 1
2264

    
2265
    if self.op.names:
2266
      self.wanted = _GetWantedNodes(self, self.op.names)
2267
    else:
2268
      self.wanted = locking.ALL_SET
2269

    
2270
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2271
    self.do_locking = self.do_node_query and self.op.use_locking
2272
    if self.do_locking:
2273
      # if we don't request only static fields, we need to lock the nodes
2274
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2275

    
2276

    
2277
  def CheckPrereq(self):
2278
    """Check prerequisites.
2279

2280
    """
2281
    # The validation of the node list is done in the _GetWantedNodes,
2282
    # if non empty, and if empty, there's no validation to do
2283
    pass
2284

    
2285
  def Exec(self, feedback_fn):
2286
    """Computes the list of nodes and their attributes.
2287

2288
    """
2289
    all_info = self.cfg.GetAllNodesInfo()
2290
    if self.do_locking:
2291
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2292
    elif self.wanted != locking.ALL_SET:
2293
      nodenames = self.wanted
2294
      missing = set(nodenames).difference(all_info.keys())
2295
      if missing:
2296
        raise errors.OpExecError(
2297
          "Some nodes were removed before retrieving their data: %s" % missing)
2298
    else:
2299
      nodenames = all_info.keys()
2300

    
2301
    nodenames = utils.NiceSort(nodenames)
2302
    nodelist = [all_info[name] for name in nodenames]
2303

    
2304
    # begin data gathering
2305

    
2306
    if self.do_node_query:
2307
      live_data = {}
2308
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2309
                                          self.cfg.GetHypervisorType())
2310
      for name in nodenames:
2311
        nodeinfo = node_data[name]
2312
        if not nodeinfo.fail_msg and nodeinfo.payload:
2313
          nodeinfo = nodeinfo.payload
2314
          fn = utils.TryConvert
2315
          live_data[name] = {
2316
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2317
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2318
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2319
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2320
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2321
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2322
            "bootid": nodeinfo.get('bootid', None),
2323
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2324
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2325
            }
2326
        else:
2327
          live_data[name] = {}
2328
    else:
2329
      live_data = dict.fromkeys(nodenames, {})
2330

    
2331
    node_to_primary = dict([(name, set()) for name in nodenames])
2332
    node_to_secondary = dict([(name, set()) for name in nodenames])
2333

    
2334
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2335
                             "sinst_cnt", "sinst_list"))
2336
    if inst_fields & frozenset(self.op.output_fields):
2337
      instancelist = self.cfg.GetInstanceList()
2338

    
2339
      for instance_name in instancelist:
2340
        inst = self.cfg.GetInstanceInfo(instance_name)
2341
        if inst.primary_node in node_to_primary:
2342
          node_to_primary[inst.primary_node].add(inst.name)
2343
        for secnode in inst.secondary_nodes:
2344
          if secnode in node_to_secondary:
2345
            node_to_secondary[secnode].add(inst.name)
2346

    
2347
    master_node = self.cfg.GetMasterNode()
2348

    
2349
    # end data gathering
2350

    
2351
    output = []
2352
    for node in nodelist:
2353
      node_output = []
2354
      for field in self.op.output_fields:
2355
        if field == "name":
2356
          val = node.name
2357
        elif field == "pinst_list":
2358
          val = list(node_to_primary[node.name])
2359
        elif field == "sinst_list":
2360
          val = list(node_to_secondary[node.name])
2361
        elif field == "pinst_cnt":
2362
          val = len(node_to_primary[node.name])
2363
        elif field == "sinst_cnt":
2364
          val = len(node_to_secondary[node.name])
2365
        elif field == "pip":
2366
          val = node.primary_ip
2367
        elif field == "sip":
2368
          val = node.secondary_ip
2369
        elif field == "tags":
2370
          val = list(node.GetTags())
2371
        elif field == "serial_no":
2372
          val = node.serial_no
2373
        elif field == "ctime":
2374
          val = node.ctime
2375
        elif field == "mtime":
2376
          val = node.mtime
2377
        elif field == "master_candidate":
2378
          val = node.master_candidate
2379
        elif field == "master":
2380
          val = node.name == master_node
2381
        elif field == "offline":
2382
          val = node.offline
2383
        elif field == "drained":
2384
          val = node.drained
2385
        elif self._FIELDS_DYNAMIC.Matches(field):
2386
          val = live_data[node.name].get(field, None)
2387
        elif field == "role":
2388
          if node.name == master_node:
2389
            val = "M"
2390
          elif node.master_candidate:
2391
            val = "C"
2392
          elif node.drained:
2393
            val = "D"
2394
          elif node.offline:
2395
            val = "O"
2396
          else:
2397
            val = "R"
2398
        else:
2399
          raise errors.ParameterError(field)
2400
        node_output.append(val)
2401
      output.append(node_output)
2402

    
2403
    return output
2404

    
2405

    
2406
class LUQueryNodeVolumes(NoHooksLU):
2407
  """Logical unit for getting volumes on node(s).
2408

2409
  """
2410
  _OP_REQP = ["nodes", "output_fields"]
2411
  REQ_BGL = False
2412
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2413
  _FIELDS_STATIC = utils.FieldSet("node")
2414

    
2415
  def ExpandNames(self):
2416
    _CheckOutputFields(static=self._FIELDS_STATIC,
2417
                       dynamic=self._FIELDS_DYNAMIC,
2418
                       selected=self.op.output_fields)
2419

    
2420
    self.needed_locks = {}
2421
    self.share_locks[locking.LEVEL_NODE] = 1
2422
    if not self.op.nodes:
2423
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2424
    else:
2425
      self.needed_locks[locking.LEVEL_NODE] = \
2426
        _GetWantedNodes(self, self.op.nodes)
2427

    
2428
  def CheckPrereq(self):
2429
    """Check prerequisites.
2430

2431
    This checks that the fields required are valid output fields.
2432

2433
    """
2434
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2435

    
2436
  def Exec(self, feedback_fn):
2437
    """Computes the list of nodes and their attributes.
2438

2439
    """
2440
    nodenames = self.nodes
2441
    volumes = self.rpc.call_node_volumes(nodenames)
2442

    
2443
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2444
             in self.cfg.GetInstanceList()]
2445

    
2446
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2447

    
2448
    output = []
2449
    for node in nodenames:
2450
      nresult = volumes[node]
2451
      if nresult.offline:
2452
        continue
2453
      msg = nresult.fail_msg
2454
      if msg:
2455
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2456
        continue
2457

    
2458
      node_vols = nresult.payload[:]
2459
      node_vols.sort(key=lambda vol: vol['dev'])
2460

    
2461
      for vol in node_vols:
2462
        node_output = []
2463
        for field in self.op.output_fields:
2464
          if field == "node":
2465
            val = node
2466
          elif field == "phys":
2467
            val = vol['dev']
2468
          elif field == "vg":
2469
            val = vol['vg']
2470
          elif field == "name":
2471
            val = vol['name']
2472
          elif field == "size":
2473
            val = int(float(vol['size']))
2474
          elif field == "instance":
2475
            for inst in ilist:
2476
              if node not in lv_by_node[inst]:
2477
                continue
2478
              if vol['name'] in lv_by_node[inst][node]:
2479
                val = inst.name
2480
                break
2481
            else:
2482
              val = '-'
2483
          else:
2484
            raise errors.ParameterError(field)
2485
          node_output.append(str(val))
2486

    
2487
        output.append(node_output)
2488

    
2489
    return output
2490

    
2491

    
2492
class LUQueryNodeStorage(NoHooksLU):
2493
  """Logical unit for getting information on storage units on node(s).
2494

2495
  """
2496
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2497
  REQ_BGL = False
2498
  _FIELDS_STATIC = utils.FieldSet("node")
2499

    
2500
  def ExpandNames(self):
2501
    storage_type = self.op.storage_type
2502

    
2503
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2504
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2505

    
2506
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2507

    
2508
    _CheckOutputFields(static=self._FIELDS_STATIC,
2509
                       dynamic=utils.FieldSet(*dynamic_fields),
2510
                       selected=self.op.output_fields)
2511

    
2512
    self.needed_locks = {}
2513
    self.share_locks[locking.LEVEL_NODE] = 1
2514

    
2515
    if self.op.nodes:
2516
      self.needed_locks[locking.LEVEL_NODE] = \
2517
        _GetWantedNodes(self, self.op.nodes)
2518
    else:
2519
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2520

    
2521
  def CheckPrereq(self):
2522
    """Check prerequisites.
2523

2524
    This checks that the fields required are valid output fields.
2525

2526
    """
2527
    self.op.name = getattr(self.op, "name", None)
2528

    
2529
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2530

    
2531
  def Exec(self, feedback_fn):
2532
    """Computes the list of nodes and their attributes.
2533

2534
    """
2535
    # Always get name to sort by
2536
    if constants.SF_NAME in self.op.output_fields:
2537
      fields = self.op.output_fields[:]
2538
    else:
2539
      fields = [constants.SF_NAME] + self.op.output_fields
2540

    
2541
    # Never ask for node as it's only known to the LU
2542
    while "node" in fields:
2543
      fields.remove("node")
2544

    
2545
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2546
    name_idx = field_idx[constants.SF_NAME]
2547

    
2548
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2549
    data = self.rpc.call_storage_list(self.nodes,
2550
                                      self.op.storage_type, st_args,
2551
                                      self.op.name, fields)
2552

    
2553
    result = []
2554

    
2555
    for node in utils.NiceSort(self.nodes):
2556
      nresult = data[node]
2557
      if nresult.offline:
2558
        continue
2559

    
2560
      msg = nresult.fail_msg
2561
      if msg:
2562
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2563
        continue
2564

    
2565
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2566

    
2567
      for name in utils.NiceSort(rows.keys()):
2568
        row = rows[name]
2569

    
2570
        out = []
2571

    
2572
        for field in self.op.output_fields:
2573
          if field == "node":
2574
            val = node
2575
          elif field in field_idx:
2576
            val = row[field_idx[field]]
2577
          else:
2578
            raise errors.ParameterError(field)
2579

    
2580
          out.append(val)
2581

    
2582
        result.append(out)
2583

    
2584
    return result
2585

    
2586

    
2587
class LUModifyNodeStorage(NoHooksLU):
2588
  """Logical unit for modifying a storage volume on a node.
2589

2590
  """
2591
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2592
  REQ_BGL = False
2593

    
2594
  def CheckArguments(self):
2595
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2596
    if node_name is None:
2597
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2598

    
2599
    self.op.node_name = node_name
2600

    
2601
    storage_type = self.op.storage_type
2602
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2603
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2604

    
2605
  def ExpandNames(self):
2606
    self.needed_locks = {
2607
      locking.LEVEL_NODE: self.op.node_name,
2608
      }
2609

    
2610
  def CheckPrereq(self):
2611
    """Check prerequisites.
2612

2613
    """
2614
    storage_type = self.op.storage_type
2615

    
2616
    try:
2617
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2618
    except KeyError:
2619
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2620
                                 " modified" % storage_type)
2621

    
2622
    diff = set(self.op.changes.keys()) - modifiable
2623
    if diff:
2624
      raise errors.OpPrereqError("The following fields can not be modified for"
2625
                                 " storage units of type '%s': %r" %
2626
                                 (storage_type, list(diff)))
2627

    
2628
  def Exec(self, feedback_fn):
2629
    """Computes the list of nodes and their attributes.
2630

2631
    """
2632
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2633
    result = self.rpc.call_storage_modify(self.op.node_name,
2634
                                          self.op.storage_type, st_args,
2635
                                          self.op.name, self.op.changes)
2636
    result.Raise("Failed to modify storage unit '%s' on %s" %
2637
                 (self.op.name, self.op.node_name))
2638

    
2639

    
2640
class LUAddNode(LogicalUnit):
2641
  """Logical unit for adding node to the cluster.
2642

2643
  """
2644
  HPATH = "node-add"
2645
  HTYPE = constants.HTYPE_NODE
2646
  _OP_REQP = ["node_name"]
2647

    
2648
  def BuildHooksEnv(self):
2649
    """Build hooks env.
2650

2651
    This will run on all nodes before, and on all nodes + the new node after.
2652

2653
    """
2654
    env = {
2655
      "OP_TARGET": self.op.node_name,
2656
      "NODE_NAME": self.op.node_name,
2657
      "NODE_PIP": self.op.primary_ip,
2658
      "NODE_SIP": self.op.secondary_ip,
2659
      }
2660
    nodes_0 = self.cfg.GetNodeList()
2661
    nodes_1 = nodes_0 + [self.op.node_name, ]
2662
    return env, nodes_0, nodes_1
2663

    
2664
  def CheckPrereq(self):
2665
    """Check prerequisites.
2666

2667
    This checks:
2668
     - the new node is not already in the config
2669
     - it is resolvable
2670
     - its parameters (single/dual homed) matches the cluster
2671

2672
    Any errors are signaled by raising errors.OpPrereqError.
2673

2674
    """
2675
    node_name = self.op.node_name
2676
    cfg = self.cfg
2677

    
2678
    dns_data = utils.HostInfo(node_name)
2679

    
2680
    node = dns_data.name
2681
    primary_ip = self.op.primary_ip = dns_data.ip
2682
    secondary_ip = getattr(self.op, "secondary_ip", None)
2683
    if secondary_ip is None:
2684
      secondary_ip = primary_ip
2685
    if not utils.IsValidIP(secondary_ip):
2686
      raise errors.OpPrereqError("Invalid secondary IP given")
2687
    self.op.secondary_ip = secondary_ip
2688

    
2689
    node_list = cfg.GetNodeList()
2690
    if not self.op.readd and node in node_list:
2691
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2692
                                 node)
2693
    elif self.op.readd and node not in node_list:
2694
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2695

    
2696
    for existing_node_name in node_list:
2697
      existing_node = cfg.GetNodeInfo(existing_node_name)
2698

    
2699
      if self.op.readd and node == existing_node_name:
2700
        if (existing_node.primary_ip != primary_ip or
2701
            existing_node.secondary_ip != secondary_ip):
2702
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2703
                                     " address configuration as before")
2704
        continue
2705

    
2706
      if (existing_node.primary_ip == primary_ip or
2707
          existing_node.secondary_ip == primary_ip or
2708
          existing_node.primary_ip == secondary_ip or
2709
          existing_node.secondary_ip == secondary_ip):
2710
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2711
                                   " existing node %s" % existing_node.name)
2712

    
2713
    # check that the type of the node (single versus dual homed) is the
2714
    # same as for the master
2715
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2716
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2717
    newbie_singlehomed = secondary_ip == primary_ip
2718
    if master_singlehomed != newbie_singlehomed:
2719
      if master_singlehomed:
2720
        raise errors.OpPrereqError("The master has no private ip but the"
2721
                                   " new node has one")
2722
      else:
2723
        raise errors.OpPrereqError("The master has a private ip but the"
2724
                                   " new node doesn't have one")
2725

    
2726
    # checks reachability
2727
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2728
      raise errors.OpPrereqError("Node not reachable by ping")
2729

    
2730
    if not newbie_singlehomed:
2731
      # check reachability from my secondary ip to newbie's secondary ip
2732
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2733
                           source=myself.secondary_ip):
2734
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2735
                                   " based ping to noded port")
2736

    
2737
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2738
    if self.op.readd:
2739
      exceptions = [node]
2740
    else:
2741
      exceptions = []
2742
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2743
    # the new node will increase mc_max with one, so:
2744
    mc_max = min(mc_max + 1, cp_size)
2745
    self.master_candidate = mc_now < mc_max
2746

    
2747
    if self.op.readd:
2748
      self.new_node = self.cfg.GetNodeInfo(node)
2749
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2750
    else:
2751
      self.new_node = objects.Node(name=node,
2752
                                   primary_ip=primary_ip,
2753
                                   secondary_ip=secondary_ip,
2754
                                   master_candidate=self.master_candidate,
2755
                                   offline=False, drained=False)
2756

    
2757
  def Exec(self, feedback_fn):
2758
    """Adds the new node to the cluster.
2759

2760
    """
2761
    new_node = self.new_node
2762
    node = new_node.name
2763

    
2764
    # for re-adds, reset the offline/drained/master-candidate flags;
2765
    # we need to reset here, otherwise offline would prevent RPC calls
2766
    # later in the procedure; this also means that if the re-add
2767
    # fails, we are left with a non-offlined, broken node
2768
    if self.op.readd:
2769
      new_node.drained = new_node.offline = False
2770
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2771
      # if we demote the node, we do cleanup later in the procedure
2772
      new_node.master_candidate = self.master_candidate
2773

    
2774
    # notify the user about any possible mc promotion
2775
    if new_node.master_candidate:
2776
      self.LogInfo("Node will be a master candidate")
2777

    
2778
    # check connectivity
2779
    result = self.rpc.call_version([node])[node]
2780
    result.Raise("Can't get version information from node %s" % node)
2781
    if constants.PROTOCOL_VERSION == result.payload:
2782
      logging.info("Communication to node %s fine, sw version %s match",
2783
                   node, result.payload)
2784
    else:
2785
      raise errors.OpExecError("Version mismatch master version %s,"
2786
                               " node version %s" %
2787
                               (constants.PROTOCOL_VERSION, result.payload))
2788

    
2789
    # setup ssh on node
2790
    logging.info("Copy ssh key to node %s", node)
2791
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2792
    keyarray = []
2793
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2794
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2795
                priv_key, pub_key]
2796

    
2797
    for i in keyfiles:
2798
      f = open(i, 'r')
2799
      try:
2800
        keyarray.append(f.read())
2801
      finally:
2802
        f.close()
2803

    
2804
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2805
                                    keyarray[2],
2806
                                    keyarray[3], keyarray[4], keyarray[5])
2807
    result.Raise("Cannot transfer ssh keys to the new node")
2808

    
2809
    # Add node to our /etc/hosts, and add key to known_hosts
2810
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2811
      utils.AddHostToEtcHosts(new_node.name)
2812

    
2813
    if new_node.secondary_ip != new_node.primary_ip:
2814
      result = self.rpc.call_node_has_ip_address(new_node.name,
2815
                                                 new_node.secondary_ip)
2816
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2817
                   prereq=True)
2818
      if not result.payload:
2819
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2820
                                 " you gave (%s). Please fix and re-run this"
2821
                                 " command." % new_node.secondary_ip)
2822

    
2823
    node_verify_list = [self.cfg.GetMasterNode()]
2824
    node_verify_param = {
2825
      'nodelist': [node],
2826
      # TODO: do a node-net-test as well?
2827
    }
2828

    
2829
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2830
                                       self.cfg.GetClusterName())
2831
    for verifier in node_verify_list:
2832
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2833
      nl_payload = result[verifier].payload['nodelist']
2834
      if nl_payload:
2835
        for failed in nl_payload:
2836
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2837
                      (verifier, nl_payload[failed]))
2838
        raise errors.OpExecError("ssh/hostname verification failed.")
2839

    
2840
    if self.op.readd:
2841
      _RedistributeAncillaryFiles(self)
2842
      self.context.ReaddNode(new_node)
2843
      # make sure we redistribute the config
2844
      self.cfg.Update(new_node)
2845
      # and make sure the new node will not have old files around
2846
      if not new_node.master_candidate:
2847
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2848
        msg = result.RemoteFailMsg()
2849
        if msg:
2850
          self.LogWarning("Node failed to demote itself from master"
2851
                          " candidate status: %s" % msg)
2852
    else:
2853
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2854
      self.context.AddNode(new_node)
2855

    
2856

    
2857
class LUSetNodeParams(LogicalUnit):
2858
  """Modifies the parameters of a node.
2859

2860
  """
2861
  HPATH = "node-modify"
2862
  HTYPE = constants.HTYPE_NODE
2863
  _OP_REQP = ["node_name"]
2864
  REQ_BGL = False
2865

    
2866
  def CheckArguments(self):
2867
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2868
    if node_name is None:
2869
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2870
    self.op.node_name = node_name
2871
    _CheckBooleanOpField(self.op, 'master_candidate')
2872
    _CheckBooleanOpField(self.op, 'offline')
2873
    _CheckBooleanOpField(self.op, 'drained')
2874
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2875
    if all_mods.count(None) == 3:
2876
      raise errors.OpPrereqError("Please pass at least one modification")
2877
    if all_mods.count(True) > 1:
2878
      raise errors.OpPrereqError("Can't set the node into more than one"
2879
                                 " state at the same time")
2880

    
2881
  def ExpandNames(self):
2882
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2883

    
2884
  def BuildHooksEnv(self):
2885
    """Build hooks env.
2886

2887
    This runs on the master node.
2888

2889
    """
2890
    env = {
2891
      "OP_TARGET": self.op.node_name,
2892
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2893
      "OFFLINE": str(self.op.offline),
2894
      "DRAINED": str(self.op.drained),
2895
      }
2896
    nl = [self.cfg.GetMasterNode(),
2897
          self.op.node_name]
2898
    return env, nl, nl
2899

    
2900
  def CheckPrereq(self):
2901
    """Check prerequisites.
2902

2903
    This only checks the instance list against the existing names.
2904

2905
    """
2906
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2907

    
2908
    if ((self.op.master_candidate == False or self.op.offline == True or
2909
         self.op.drained == True) and node.master_candidate):
2910
      # we will demote the node from master_candidate
2911
      if self.op.node_name == self.cfg.GetMasterNode():
2912
        raise errors.OpPrereqError("The master node has to be a"
2913
                                   " master candidate, online and not drained")
2914
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2915
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2916
      if num_candidates <= cp_size:
2917
        msg = ("Not enough master candidates (desired"
2918
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2919
        if self.op.force:
2920
          self.LogWarning(msg)
2921
        else:
2922
          raise errors.OpPrereqError(msg)
2923

    
2924
    if (self.op.master_candidate == True and
2925
        ((node.offline and not self.op.offline == False) or
2926
         (node.drained and not self.op.drained == False))):
2927
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2928
                                 " to master_candidate" % node.name)
2929

    
2930
    return
2931

    
2932
  def Exec(self, feedback_fn):
2933
    """Modifies a node.
2934

2935
    """
2936
    node = self.node
2937

    
2938
    result = []
2939
    changed_mc = False
2940

    
2941
    if self.op.offline is not None:
2942
      node.offline = self.op.offline
2943
      result.append(("offline", str(self.op.offline)))
2944
      if self.op.offline == True:
2945
        if node.master_candidate:
2946
          node.master_candidate = False
2947
          changed_mc = True
2948
          result.append(("master_candidate", "auto-demotion due to offline"))
2949
        if node.drained:
2950
          node.drained = False
2951
          result.append(("drained", "clear drained status due to offline"))
2952

    
2953
    if self.op.master_candidate is not None:
2954
      node.master_candidate = self.op.master_candidate
2955
      changed_mc = True
2956
      result.append(("master_candidate", str(self.op.master_candidate)))
2957
      if self.op.master_candidate == False:
2958
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2959
        msg = rrc.fail_msg
2960
        if msg:
2961
          self.LogWarning("Node failed to demote itself: %s" % msg)
2962

    
2963
    if self.op.drained is not None:
2964
      node.drained = self.op.drained
2965
      result.append(("drained", str(self.op.drained)))
2966
      if self.op.drained == True:
2967
        if node.master_candidate:
2968
          node.master_candidate = False
2969
          changed_mc = True
2970
          result.append(("master_candidate", "auto-demotion due to drain"))
2971
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2972
          msg = rrc.RemoteFailMsg()
2973
          if msg:
2974
            self.LogWarning("Node failed to demote itself: %s" % msg)
2975
        if node.offline:
2976
          node.offline = False
2977
          result.append(("offline", "clear offline status due to drain"))
2978

    
2979
    # this will trigger configuration file update, if needed
2980
    self.cfg.Update(node)
2981
    # this will trigger job queue propagation or cleanup
2982
    if changed_mc:
2983
      self.context.ReaddNode(node)
2984

    
2985
    return result
2986

    
2987

    
2988
class LUPowercycleNode(NoHooksLU):
2989
  """Powercycles a node.
2990

2991
  """
2992
  _OP_REQP = ["node_name", "force"]
2993
  REQ_BGL = False
2994

    
2995
  def CheckArguments(self):
2996
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2997
    if node_name is None:
2998
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2999
    self.op.node_name = node_name
3000
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3001
      raise errors.OpPrereqError("The node is the master and the force"
3002
                                 " parameter was not set")
3003

    
3004
  def ExpandNames(self):
3005
    """Locking for PowercycleNode.
3006

3007
    This is a last-resort option and shouldn't block on other
3008
    jobs. Therefore, we grab no locks.
3009

3010
    """
3011
    self.needed_locks = {}
3012

    
3013
  def CheckPrereq(self):
3014
    """Check prerequisites.
3015

3016
    This LU has no prereqs.
3017

3018
    """
3019
    pass
3020

    
3021
  def Exec(self, feedback_fn):
3022
    """Reboots a node.
3023

3024
    """
3025
    result = self.rpc.call_node_powercycle(self.op.node_name,
3026
                                           self.cfg.GetHypervisorType())
3027
    result.Raise("Failed to schedule the reboot")
3028
    return result.payload
3029

    
3030

    
3031
class LUQueryClusterInfo(NoHooksLU):
3032
  """Query cluster configuration.
3033

3034
  """
3035
  _OP_REQP = []
3036
  REQ_BGL = False
3037

    
3038
  def ExpandNames(self):
3039
    self.needed_locks = {}
3040

    
3041
  def CheckPrereq(self):
3042
    """No prerequsites needed for this LU.
3043

3044
    """
3045
    pass
3046

    
3047
  def Exec(self, feedback_fn):
3048
    """Return cluster config.
3049

3050
    """
3051
    cluster = self.cfg.GetClusterInfo()
3052
    result = {
3053
      "software_version": constants.RELEASE_VERSION,
3054
      "protocol_version": constants.PROTOCOL_VERSION,
3055
      "config_version": constants.CONFIG_VERSION,
3056
      "os_api_version": max(constants.OS_API_VERSIONS),
3057
      "export_version": constants.EXPORT_VERSION,
3058
      "architecture": (platform.architecture()[0], platform.machine()),
3059
      "name": cluster.cluster_name,
3060
      "master": cluster.master_node,
3061
      "default_hypervisor": cluster.enabled_hypervisors[0],
3062
      "enabled_hypervisors": cluster.enabled_hypervisors,
3063
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3064
                        for hypervisor_name in cluster.enabled_hypervisors]),
3065
      "beparams": cluster.beparams,
3066
      "nicparams": cluster.nicparams,
3067
      "candidate_pool_size": cluster.candidate_pool_size,
3068
      "master_netdev": cluster.master_netdev,
3069
      "volume_group_name": cluster.volume_group_name,
3070
      "file_storage_dir": cluster.file_storage_dir,
3071
      "ctime": cluster.ctime,
3072
      "mtime": cluster.mtime,
3073
      }
3074

    
3075
    return result
3076

    
3077

    
3078
class LUQueryConfigValues(NoHooksLU):
3079
  """Return configuration values.
3080

3081
  """
3082
  _OP_REQP = []
3083
  REQ_BGL = False
3084
  _FIELDS_DYNAMIC = utils.FieldSet()
3085
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag")
3086

    
3087
  def ExpandNames(self):
3088
    self.needed_locks = {}
3089

    
3090
    _CheckOutputFields(static=self._FIELDS_STATIC,
3091
                       dynamic=self._FIELDS_DYNAMIC,
3092
                       selected=self.op.output_fields)
3093

    
3094
  def CheckPrereq(self):
3095
    """No prerequisites.
3096

3097
    """
3098
    pass
3099

    
3100
  def Exec(self, feedback_fn):
3101
    """Dump a representation of the cluster config to the standard output.
3102

3103
    """
3104
    values = []
3105
    for field in self.op.output_fields:
3106
      if field == "cluster_name":
3107
        entry = self.cfg.GetClusterName()
3108
      elif field == "master_node":
3109
        entry = self.cfg.GetMasterNode()
3110
      elif field == "drain_flag":
3111
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3112
      else:
3113
        raise errors.ParameterError(field)
3114
      values.append(entry)
3115
    return values
3116

    
3117

    
3118
class LUActivateInstanceDisks(NoHooksLU):
3119
  """Bring up an instance's disks.
3120

3121
  """
3122
  _OP_REQP = ["instance_name"]
3123
  REQ_BGL = False
3124

    
3125
  def ExpandNames(self):
3126
    self._ExpandAndLockInstance()
3127
    self.needed_locks[locking.LEVEL_NODE] = []
3128
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3129

    
3130
  def DeclareLocks(self, level):
3131
    if level == locking.LEVEL_NODE:
3132
      self._LockInstancesNodes()
3133

    
3134
  def CheckPrereq(self):
3135
    """Check prerequisites.
3136

3137
    This checks that the instance is in the cluster.
3138

3139
    """
3140
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3141
    assert self.instance is not None, \
3142
      "Cannot retrieve locked instance %s" % self.op.instance_name
3143
    _CheckNodeOnline(self, self.instance.primary_node)
3144
    if not hasattr(self.op, "ignore_size"):
3145
      self.op.ignore_size = False
3146

    
3147
  def Exec(self, feedback_fn):
3148
    """Activate the disks.
3149

3150
    """
3151
    disks_ok, disks_info = \
3152
              _AssembleInstanceDisks(self, self.instance,
3153
                                     ignore_size=self.op.ignore_size)
3154
    if not disks_ok:
3155
      raise errors.OpExecError("Cannot activate block devices")
3156

    
3157
    return disks_info
3158

    
3159

    
3160
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3161
                           ignore_size=False):
3162
  """Prepare the block devices for an instance.
3163

3164
  This sets up the block devices on all nodes.
3165

3166
  @type lu: L{LogicalUnit}
3167
  @param lu: the logical unit on whose behalf we execute
3168
  @type instance: L{objects.Instance}
3169
  @param instance: the instance for whose disks we assemble
3170
  @type ignore_secondaries: boolean
3171
  @param ignore_secondaries: if true, errors on secondary nodes
3172
      won't result in an error return from the function
3173
  @type ignore_size: boolean
3174
  @param ignore_size: if true, the current known size of the disk
3175
      will not be used during the disk activation, useful for cases
3176
      when the size is wrong
3177
  @return: False if the operation failed, otherwise a list of
3178
      (host, instance_visible_name, node_visible_name)
3179
      with the mapping from node devices to instance devices
3180

3181
  """
3182
  device_info = []
3183
  disks_ok = True
3184
  iname = instance.name
3185
  # With the two passes mechanism we try to reduce the window of
3186
  # opportunity for the race condition of switching DRBD to primary
3187
  # before handshaking occured, but we do not eliminate it
3188

    
3189
  # The proper fix would be to wait (with some limits) until the
3190
  # connection has been made and drbd transitions from WFConnection
3191
  # into any other network-connected state (Connected, SyncTarget,
3192
  # SyncSource, etc.)
3193

    
3194
  # 1st pass, assemble on all nodes in secondary mode
3195
  for inst_disk in instance.disks:
3196
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3197
      if ignore_size:
3198
        node_disk = node_disk.Copy()
3199
        node_disk.UnsetSize()
3200
      lu.cfg.SetDiskID(node_disk, node)
3201
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3202
      msg = result.fail_msg
3203
      if msg:
3204
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3205
                           " (is_primary=False, pass=1): %s",
3206
                           inst_disk.iv_name, node, msg)
3207
        if not ignore_secondaries:
3208
          disks_ok = False
3209

    
3210
  # FIXME: race condition on drbd migration to primary
3211

    
3212
  # 2nd pass, do only the primary node
3213
  for inst_disk in instance.disks:
3214
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3215
      if node != instance.primary_node:
3216
        continue
3217
      if ignore_size:
3218
        node_disk = node_disk.Copy()
3219
        node_disk.UnsetSize()
3220
      lu.cfg.SetDiskID(node_disk, node)
3221
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3222
      msg = result.fail_msg
3223
      if msg:
3224
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3225
                           " (is_primary=True, pass=2): %s",
3226
                           inst_disk.iv_name, node, msg)
3227
        disks_ok = False
3228
    device_info.append((instance.primary_node, inst_disk.iv_name,
3229
                        result.payload))
3230

    
3231
  # leave the disks configured for the primary node
3232
  # this is a workaround that would be fixed better by
3233
  # improving the logical/physical id handling
3234
  for disk in instance.disks:
3235
    lu.cfg.SetDiskID(disk, instance.primary_node)
3236

    
3237
  return disks_ok, device_info
3238

    
3239

    
3240
def _StartInstanceDisks(lu, instance, force):
3241
  """Start the disks of an instance.
3242

3243
  """
3244
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3245
                                           ignore_secondaries=force)
3246
  if not disks_ok:
3247
    _ShutdownInstanceDisks(lu, instance)
3248
    if force is not None and not force:
3249
      lu.proc.LogWarning("", hint="If the message above refers to a"
3250
                         " secondary node,"
3251
                         " you can retry the operation using '--force'.")
3252
    raise errors.OpExecError("Disk consistency error")
3253

    
3254

    
3255
class LUDeactivateInstanceDisks(NoHooksLU):
3256
  """Shutdown an instance's disks.
3257

3258
  """
3259
  _OP_REQP = ["instance_name"]
3260
  REQ_BGL = False
3261

    
3262
  def ExpandNames(self):
3263
    self._ExpandAndLockInstance()
3264
    self.needed_locks[locking.LEVEL_NODE] = []
3265
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3266

    
3267
  def DeclareLocks(self, level):
3268
    if level == locking.LEVEL_NODE:
3269
      self._LockInstancesNodes()
3270

    
3271
  def CheckPrereq(self):
3272
    """Check prerequisites.
3273

3274
    This checks that the instance is in the cluster.
3275

3276
    """
3277
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3278
    assert self.instance is not None, \
3279
      "Cannot retrieve locked instance %s" % self.op.instance_name
3280

    
3281
  def Exec(self, feedback_fn):
3282
    """Deactivate the disks
3283

3284
    """
3285
    instance = self.instance
3286
    _SafeShutdownInstanceDisks(self, instance)
3287

    
3288

    
3289
def _SafeShutdownInstanceDisks(lu, instance):
3290
  """Shutdown block devices of an instance.
3291

3292
  This function checks if an instance is running, before calling
3293
  _ShutdownInstanceDisks.
3294

3295
  """
3296
  pnode = instance.primary_node
3297
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3298
  ins_l.Raise("Can't contact node %s" % pnode)
3299

    
3300
  if instance.name in ins_l.payload:
3301
    raise errors.OpExecError("Instance is running, can't shutdown"
3302
                             " block devices.")
3303

    
3304
  _ShutdownInstanceDisks(lu, instance)
3305

    
3306

    
3307
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3308
  """Shutdown block devices of an instance.
3309

3310
  This does the shutdown on all nodes of the instance.
3311

3312
  If the ignore_primary is false, errors on the primary node are
3313
  ignored.
3314

3315
  """
3316
  all_result = True
3317
  for disk in instance.disks:
3318
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3319
      lu.cfg.SetDiskID(top_disk, node)
3320
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3321
      msg = result.fail_msg
3322
      if msg:
3323
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3324
                      disk.iv_name, node, msg)
3325
        if not ignore_primary or node != instance.primary_node:
3326
          all_result = False
3327
  return all_result
3328

    
3329

    
3330
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3331
  """Checks if a node has enough free memory.
3332

3333
  This function check if a given node has the needed amount of free
3334
  memory. In case the node has less memory or we cannot get the
3335
  information from the node, this function raise an OpPrereqError
3336
  exception.
3337

3338
  @type lu: C{LogicalUnit}
3339
  @param lu: a logical unit from which we get configuration data
3340
  @type node: C{str}
3341
  @param node: the node to check
3342
  @type reason: C{str}
3343
  @param reason: string to use in the error message
3344
  @type requested: C{int}
3345
  @param requested: the amount of memory in MiB to check for
3346
  @type hypervisor_name: C{str}
3347
  @param hypervisor_name: the hypervisor to ask for memory stats
3348
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3349
      we cannot check the node
3350

3351
  """
3352
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3353
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3354
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3355
  if not isinstance(free_mem, int):
3356
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3357
                               " was '%s'" % (node, free_mem))
3358
  if requested > free_mem:
3359
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3360
                               " needed %s MiB, available %s MiB" %
3361
                               (node, reason, requested, free_mem))
3362

    
3363

    
3364
class LUStartupInstance(LogicalUnit):
3365
  """Starts an instance.
3366

3367
  """
3368
  HPATH = "instance-start"
3369
  HTYPE = constants.HTYPE_INSTANCE
3370
  _OP_REQP = ["instance_name", "force"]
3371
  REQ_BGL = False
3372

    
3373
  def ExpandNames(self):
3374
    self._ExpandAndLockInstance()
3375

    
3376
  def BuildHooksEnv(self):
3377
    """Build hooks env.
3378

3379
    This runs on master, primary and secondary nodes of the instance.
3380

3381
    """
3382
    env = {
3383
      "FORCE": self.op.force,
3384
      }
3385
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3386
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3387
    return env, nl, nl
3388

    
3389
  def CheckPrereq(self):
3390
    """Check prerequisites.
3391

3392
    This checks that the instance is in the cluster.
3393

3394
    """
3395
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3396
    assert self.instance is not None, \
3397
      "Cannot retrieve locked instance %s" % self.op.instance_name
3398

    
3399
    # extra beparams
3400
    self.beparams = getattr(self.op, "beparams", {})
3401
    if self.beparams:
3402
      if not isinstance(self.beparams, dict):
3403
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3404
                                   " dict" % (type(self.beparams), ))
3405
      # fill the beparams dict
3406
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3407
      self.op.beparams = self.beparams
3408

    
3409
    # extra hvparams
3410
    self.hvparams = getattr(self.op, "hvparams", {})
3411
    if self.hvparams:
3412
      if not isinstance(self.hvparams, dict):
3413
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3414
                                   " dict" % (type(self.hvparams), ))
3415

    
3416
      # check hypervisor parameter syntax (locally)
3417
      cluster = self.cfg.GetClusterInfo()
3418
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3419
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3420
                                    instance.hvparams)
3421
      filled_hvp.update(self.hvparams)
3422
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3423
      hv_type.CheckParameterSyntax(filled_hvp)
3424
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3425
      self.op.hvparams = self.hvparams
3426

    
3427
    _CheckNodeOnline(self, instance.primary_node)
3428

    
3429
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3430
    # check bridges existence
3431
    _CheckInstanceBridgesExist(self, instance)
3432

    
3433
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3434
                                              instance.name,
3435
                                              instance.hypervisor)
3436
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3437
                      prereq=True)
3438
    if not remote_info.payload: # not running already
3439
      _CheckNodeFreeMemory(self, instance.primary_node,
3440
                           "starting instance %s" % instance.name,
3441
                           bep[constants.BE_MEMORY], instance.hypervisor)
3442

    
3443
  def Exec(self, feedback_fn):
3444
    """Start the instance.
3445

3446
    """
3447
    instance = self.instance
3448
    force = self.op.force
3449

    
3450
    self.cfg.MarkInstanceUp(instance.name)
3451

    
3452
    node_current = instance.primary_node
3453

    
3454
    _StartInstanceDisks(self, instance, force)
3455

    
3456
    result = self.rpc.call_instance_start(node_current, instance,
3457
                                          self.hvparams, self.beparams)
3458
    msg = result.fail_msg
3459
    if msg:
3460
      _ShutdownInstanceDisks(self, instance)
3461
      raise errors.OpExecError("Could not start instance: %s" % msg)
3462

    
3463

    
3464
class LURebootInstance(LogicalUnit):
3465
  """Reboot an instance.
3466

3467
  """
3468
  HPATH = "instance-reboot"
3469
  HTYPE = constants.HTYPE_INSTANCE
3470
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3471
  REQ_BGL = False
3472

    
3473
  def ExpandNames(self):
3474
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3475
                                   constants.INSTANCE_REBOOT_HARD,
3476
                                   constants.INSTANCE_REBOOT_FULL]:
3477
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3478
                                  (constants.INSTANCE_REBOOT_SOFT,
3479
                                   constants.INSTANCE_REBOOT_HARD,
3480
                                   constants.INSTANCE_REBOOT_FULL))
3481
    self._ExpandAndLockInstance()
3482

    
3483
  def BuildHooksEnv(self):
3484
    """Build hooks env.
3485

3486
    This runs on master, primary and secondary nodes of the instance.
3487

3488
    """
3489
    env = {
3490
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3491
      "REBOOT_TYPE": self.op.reboot_type,
3492
      }
3493
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3494
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3495
    return env, nl, nl
3496

    
3497
  def CheckPrereq(self):
3498
    """Check prerequisites.
3499

3500
    This checks that the instance is in the cluster.
3501

3502
    """
3503
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3504
    assert self.instance is not None, \
3505
      "Cannot retrieve locked instance %s" % self.op.instance_name
3506

    
3507
    _CheckNodeOnline(self, instance.primary_node)
3508

    
3509
    # check bridges existence
3510
    _CheckInstanceBridgesExist(self, instance)
3511

    
3512
  def Exec(self, feedback_fn):
3513
    """Reboot the instance.
3514

3515
    """
3516
    instance = self.instance
3517
    ignore_secondaries = self.op.ignore_secondaries
3518
    reboot_type = self.op.reboot_type
3519

    
3520
    node_current = instance.primary_node
3521

    
3522
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3523
                       constants.INSTANCE_REBOOT_HARD]:
3524
      for disk in instance.disks:
3525
        self.cfg.SetDiskID(disk, node_current)
3526
      result = self.rpc.call_instance_reboot(node_current, instance,
3527
                                             reboot_type)
3528
      result.Raise("Could not reboot instance")
3529
    else:
3530
      result = self.rpc.call_instance_shutdown(node_current, instance)
3531
      result.Raise("Could not shutdown instance for full reboot")
3532
      _ShutdownInstanceDisks(self, instance)
3533
      _StartInstanceDisks(self, instance, ignore_secondaries)
3534
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3535
      msg = result.fail_msg
3536
      if msg:
3537
        _ShutdownInstanceDisks(self, instance)
3538
        raise errors.OpExecError("Could not start instance for"
3539
                                 " full reboot: %s" % msg)
3540

    
3541
    self.cfg.MarkInstanceUp(instance.name)
3542

    
3543

    
3544
class LUShutdownInstance(LogicalUnit):
3545
  """Shutdown an instance.
3546

3547
  """
3548
  HPATH = "instance-stop"
3549
  HTYPE = constants.HTYPE_INSTANCE
3550
  _OP_REQP = ["instance_name"]
3551
  REQ_BGL = False
3552

    
3553
  def ExpandNames(self):
3554
    self._ExpandAndLockInstance()
3555

    
3556
  def BuildHooksEnv(self):
3557
    """Build hooks env.
3558

3559
    This runs on master, primary and secondary nodes of the instance.
3560

3561
    """
3562
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3563
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3564
    return env, nl, nl
3565

    
3566
  def CheckPrereq(self):
3567
    """Check prerequisites.
3568

3569
    This checks that the instance is in the cluster.
3570

3571
    """
3572
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3573
    assert self.instance is not None, \
3574
      "Cannot retrieve locked instance %s" % self.op.instance_name
3575
    _CheckNodeOnline(self, self.instance.primary_node)
3576

    
3577
  def Exec(self, feedback_fn):
3578
    """Shutdown the instance.
3579

3580
    """
3581
    instance = self.instance
3582
    node_current = instance.primary_node
3583
    self.cfg.MarkInstanceDown(instance.name)
3584
    result = self.rpc.call_instance_shutdown(node_current, instance)
3585
    msg = result.fail_msg
3586
    if msg:
3587
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3588

    
3589
    _ShutdownInstanceDisks(self, instance)
3590

    
3591

    
3592
class LUReinstallInstance(LogicalUnit):
3593
  """Reinstall an instance.
3594

3595
  """
3596
  HPATH = "instance-reinstall"
3597
  HTYPE = constants.HTYPE_INSTANCE
3598
  _OP_REQP = ["instance_name"]
3599
  REQ_BGL = False
3600

    
3601
  def ExpandNames(self):
3602
    self._ExpandAndLockInstance()
3603

    
3604
  def BuildHooksEnv(self):
3605
    """Build hooks env.
3606

3607
    This runs on master, primary and secondary nodes of the instance.
3608

3609
    """
3610
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3611
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3612
    return env, nl, nl
3613

    
3614
  def CheckPrereq(self):
3615
    """Check prerequisites.
3616

3617
    This checks that the instance is in the cluster and is not running.
3618

3619
    """
3620
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3621
    assert instance is not None, \
3622
      "Cannot retrieve locked instance %s" % self.op.instance_name
3623
    _CheckNodeOnline(self, instance.primary_node)
3624

    
3625
    if instance.disk_template == constants.DT_DISKLESS:
3626
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3627
                                 self.op.instance_name)
3628
    if instance.admin_up:
3629
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3630
                                 self.op.instance_name)
3631
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3632
                                              instance.name,
3633
                                              instance.hypervisor)
3634
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3635
                      prereq=True)
3636
    if remote_info.payload:
3637
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3638
                                 (self.op.instance_name,
3639
                                  instance.primary_node))
3640

    
3641
    self.op.os_type = getattr(self.op, "os_type", None)
3642
    if self.op.os_type is not None:
3643
      # OS verification
3644
      pnode = self.cfg.GetNodeInfo(
3645
        self.cfg.ExpandNodeName(instance.primary_node))
3646
      if pnode is None:
3647
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3648
                                   self.op.pnode)
3649
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3650
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3651
                   (self.op.os_type, pnode.name), prereq=True)
3652

    
3653
    self.instance = instance
3654

    
3655
  def Exec(self, feedback_fn):
3656
    """Reinstall the instance.
3657

3658
    """
3659
    inst = self.instance
3660

    
3661
    if self.op.os_type is not None:
3662
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3663
      inst.os = self.op.os_type
3664
      self.cfg.Update(inst)
3665

    
3666
    _StartInstanceDisks(self, inst, None)
3667
    try:
3668
      feedback_fn("Running the instance OS create scripts...")
3669
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3670
      result.Raise("Could not install OS for instance %s on node %s" %
3671
                   (inst.name, inst.primary_node))
3672
    finally:
3673
      _ShutdownInstanceDisks(self, inst)
3674

    
3675

    
3676
class LURecreateInstanceDisks(LogicalUnit):
3677
  """Recreate an instance's missing disks.
3678

3679
  """
3680
  HPATH = "instance-recreate-disks"
3681
  HTYPE = constants.HTYPE_INSTANCE
3682
  _OP_REQP = ["instance_name", "disks"]
3683
  REQ_BGL = False
3684

    
3685
  def CheckArguments(self):
3686
    """Check the arguments.
3687

3688
    """
3689
    if not isinstance(self.op.disks, list):
3690
      raise errors.OpPrereqError("Invalid disks parameter")
3691
    for item in self.op.disks:
3692
      if (not isinstance(item, int) or
3693
          item < 0):
3694
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3695
                                   str(item))
3696

    
3697
  def ExpandNames(self):
3698
    self._ExpandAndLockInstance()
3699

    
3700
  def BuildHooksEnv(self):
3701
    """Build hooks env.
3702

3703
    This runs on master, primary and secondary nodes of the instance.
3704

3705
    """
3706
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3707
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3708
    return env, nl, nl
3709

    
3710
  def CheckPrereq(self):
3711
    """Check prerequisites.
3712

3713
    This checks that the instance is in the cluster and is not running.
3714

3715
    """
3716
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3717
    assert instance is not None, \
3718
      "Cannot retrieve locked instance %s" % self.op.instance_name
3719
    _CheckNodeOnline(self, instance.primary_node)
3720

    
3721
    if instance.disk_template == constants.DT_DISKLESS:
3722
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3723
                                 self.op.instance_name)
3724
    if instance.admin_up:
3725
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3726
                                 self.op.instance_name)
3727
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3728
                                              instance.name,
3729
                                              instance.hypervisor)
3730
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3731
                      prereq=True)
3732
    if remote_info.payload:
3733
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3734
                                 (self.op.instance_name,
3735
                                  instance.primary_node))
3736

    
3737
    if not self.op.disks:
3738
      self.op.disks = range(len(instance.disks))
3739
    else:
3740
      for idx in self.op.disks:
3741
        if idx >= len(instance.disks):
3742
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3743

    
3744
    self.instance = instance
3745

    
3746
  def Exec(self, feedback_fn):
3747
    """Recreate the disks.
3748

3749
    """
3750
    to_skip = []
3751
    for idx, disk in enumerate(self.instance.disks):
3752
      if idx not in self.op.disks: # disk idx has not been passed in
3753
        to_skip.append(idx)
3754
        continue
3755

    
3756
    _CreateDisks(self, self.instance, to_skip=to_skip)
3757

    
3758

    
3759
class LURenameInstance(LogicalUnit):
3760
  """Rename an instance.
3761

3762
  """
3763
  HPATH = "instance-rename"
3764
  HTYPE = constants.HTYPE_INSTANCE
3765
  _OP_REQP = ["instance_name", "new_name"]
3766

    
3767
  def BuildHooksEnv(self):
3768
    """Build hooks env.
3769

3770
    This runs on master, primary and secondary nodes of the instance.
3771

3772
    """
3773
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3774
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3775
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3776
    return env, nl, nl
3777

    
3778
  def CheckPrereq(self):
3779
    """Check prerequisites.
3780

3781
    This checks that the instance is in the cluster and is not running.
3782

3783
    """
3784
    instance = self.cfg.GetInstanceInfo(
3785
      self.cfg.ExpandInstanceName(self.op.instance_name))
3786
    if instance is None:
3787
      raise errors.OpPrereqError("Instance '%s' not known" %
3788
                                 self.op.instance_name)
3789
    _CheckNodeOnline(self, instance.primary_node)
3790

    
3791
    if instance.admin_up:
3792
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3793
                                 self.op.instance_name)
3794
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3795
                                              instance.name,
3796
                                              instance.hypervisor)
3797
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3798
                      prereq=True)
3799
    if remote_info.payload:
3800
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3801
                                 (self.op.instance_name,
3802
                                  instance.primary_node))
3803
    self.instance = instance
3804

    
3805
    # new name verification
3806
    name_info = utils.HostInfo(self.op.new_name)
3807

    
3808
    self.op.new_name = new_name = name_info.name
3809
    instance_list = self.cfg.GetInstanceList()
3810
    if new_name in instance_list:
3811
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3812
                                 new_name)
3813

    
3814
    if not getattr(self.op, "ignore_ip", False):
3815
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3816
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3817
                                   (name_info.ip, new_name))
3818

    
3819

    
3820
  def Exec(self, feedback_fn):
3821
    """Reinstall the instance.
3822

3823
    """
3824
    inst = self.instance
3825
    old_name = inst.name
3826

    
3827
    if inst.disk_template == constants.DT_FILE:
3828
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3829

    
3830
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3831
    # Change the instance lock. This is definitely safe while we hold the BGL
3832
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3833
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3834

    
3835
    # re-read the instance from the configuration after rename
3836
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3837

    
3838
    if inst.disk_template == constants.DT_FILE:
3839
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3840
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3841
                                                     old_file_storage_dir,
3842
                                                     new_file_storage_dir)
3843
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3844
                   " (but the instance has been renamed in Ganeti)" %
3845
                   (inst.primary_node, old_file_storage_dir,
3846
                    new_file_storage_dir))
3847

    
3848
    _StartInstanceDisks(self, inst, None)
3849
    try:
3850
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3851
                                                 old_name)
3852
      msg = result.fail_msg
3853
      if msg:
3854
        msg = ("Could not run OS rename script for instance %s on node %s"
3855
               " (but the instance has been renamed in Ganeti): %s" %
3856
               (inst.name, inst.primary_node, msg))
3857
        self.proc.LogWarning(msg)
3858
    finally:
3859
      _ShutdownInstanceDisks(self, inst)
3860

    
3861

    
3862
class LURemoveInstance(LogicalUnit):
3863
  """Remove an instance.
3864

3865
  """
3866
  HPATH = "instance-remove"
3867
  HTYPE = constants.HTYPE_INSTANCE
3868
  _OP_REQP = ["instance_name", "ignore_failures"]
3869
  REQ_BGL = False
3870

    
3871
  def ExpandNames(self):
3872
    self._ExpandAndLockInstance()
3873
    self.needed_locks[locking.LEVEL_NODE] = []
3874
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3875

    
3876
  def DeclareLocks(self, level):
3877
    if level == locking.LEVEL_NODE:
3878
      self._LockInstancesNodes()
3879

    
3880
  def BuildHooksEnv(self):
3881
    """Build hooks env.
3882

3883
    This runs on master, primary and secondary nodes of the instance.
3884

3885
    """
3886
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3887
    nl = [self.cfg.GetMasterNode()]
3888
    return env, nl, nl
3889

    
3890
  def CheckPrereq(self):
3891
    """Check prerequisites.
3892

3893
    This checks that the instance is in the cluster.
3894

3895
    """
3896
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3897
    assert self.instance is not None, \
3898
      "Cannot retrieve locked instance %s" % self.op.instance_name
3899

    
3900
  def Exec(self, feedback_fn):
3901
    """Remove the instance.
3902

3903
    """
3904
    instance = self.instance
3905
    logging.info("Shutting down instance %s on node %s",
3906
                 instance.name, instance.primary_node)
3907

    
3908
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3909
    msg = result.fail_msg
3910
    if msg:
3911
      if self.op.ignore_failures:
3912
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3913
      else:
3914
        raise errors.OpExecError("Could not shutdown instance %s on"
3915
                                 " node %s: %s" %
3916
                                 (instance.name, instance.primary_node, msg))
3917

    
3918
    logging.info("Removing block devices for instance %s", instance.name)
3919

    
3920
    if not _RemoveDisks(self, instance):
3921
      if self.op.ignore_failures:
3922
        feedback_fn("Warning: can't remove instance's disks")
3923
      else:
3924
        raise errors.OpExecError("Can't remove instance's disks")
3925

    
3926
    logging.info("Removing instance %s out of cluster config", instance.name)
3927

    
3928
    self.cfg.RemoveInstance(instance.name)
3929
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3930

    
3931

    
3932
class LUQueryInstances(NoHooksLU):
3933
  """Logical unit for querying instances.
3934

3935
  """
3936
  _OP_REQP = ["output_fields", "names", "use_locking"]
3937
  REQ_BGL = False
3938
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3939
                                    "admin_state",
3940
                                    "disk_template", "ip", "mac", "bridge",
3941
                                    "nic_mode", "nic_link",
3942
                                    "sda_size", "sdb_size", "vcpus", "tags",
3943
                                    "network_port", "beparams",
3944
                                    r"(disk)\.(size)/([0-9]+)",
3945
                                    r"(disk)\.(sizes)", "disk_usage",
3946
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3947
                                    r"(nic)\.(bridge)/([0-9]+)",
3948
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3949
                                    r"(disk|nic)\.(count)",
3950
                                    "serial_no", "hypervisor", "hvparams",
3951
                                    "ctime", "mtime",
3952
                                    ] +
3953
                                  ["hv/%s" % name
3954
                                   for name in constants.HVS_PARAMETERS] +
3955
                                  ["be/%s" % name
3956
                                   for name in constants.BES_PARAMETERS])
3957
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3958

    
3959

    
3960
  def ExpandNames(self):
3961
    _CheckOutputFields(static=self._FIELDS_STATIC,
3962
                       dynamic=self._FIELDS_DYNAMIC,
3963
                       selected=self.op.output_fields)
3964

    
3965
    self.needed_locks = {}
3966
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3967
    self.share_locks[locking.LEVEL_NODE] = 1
3968

    
3969
    if self.op.names:
3970
      self.wanted = _GetWantedInstances(self, self.op.names)
3971
    else:
3972
      self.wanted = locking.ALL_SET
3973

    
3974
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3975
    self.do_locking = self.do_node_query and self.op.use_locking
3976
    if self.do_locking:
3977
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3978
      self.needed_locks[locking.LEVEL_NODE] = []
3979
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3980

    
3981
  def DeclareLocks(self, level):
3982
    if level == locking.LEVEL_NODE and self.do_locking:
3983
      self._LockInstancesNodes()
3984

    
3985
  def CheckPrereq(self):
3986
    """Check prerequisites.
3987

3988
    """
3989
    pass
3990

    
3991
  def Exec(self, feedback_fn):
3992
    """Computes the list of nodes and their attributes.
3993

3994
    """
3995
    all_info = self.cfg.GetAllInstancesInfo()
3996
    if self.wanted == locking.ALL_SET:
3997
      # caller didn't specify instance names, so ordering is not important
3998
      if self.do_locking:
3999
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4000
      else:
4001
        instance_names = all_info.keys()
4002
      instance_names = utils.NiceSort(instance_names)
4003
    else:
4004
      # caller did specify names, so we must keep the ordering
4005
      if self.do_locking:
4006
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4007
      else:
4008
        tgt_set = all_info.keys()
4009
      missing = set(self.wanted).difference(tgt_set)
4010
      if missing:
4011
        raise errors.OpExecError("Some instances were removed before"
4012
                                 " retrieving their data: %s" % missing)
4013
      instance_names = self.wanted
4014

    
4015
    instance_list = [all_info[iname] for iname in instance_names]
4016

    
4017
    # begin data gathering
4018

    
4019
    nodes = frozenset([inst.primary_node for inst in instance_list])
4020
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4021

    
4022
    bad_nodes = []
4023
    off_nodes = []
4024
    if self.do_node_query:
4025
      live_data = {}
4026
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4027
      for name in nodes:
4028
        result = node_data[name]
4029
        if result.offline:
4030
          # offline nodes will be in both lists
4031
          off_nodes.append(name)
4032
        if result.failed or result.fail_msg:
4033
          bad_nodes.append(name)
4034
        else:
4035
          if result.payload:
4036
            live_data.update(result.payload)
4037
          # else no instance is alive
4038
    else:
4039
      live_data = dict([(name, {}) for name in instance_names])
4040

    
4041
    # end data gathering
4042

    
4043
    HVPREFIX = "hv/"
4044
    BEPREFIX = "be/"
4045
    output = []
4046
    cluster = self.cfg.GetClusterInfo()
4047
    for instance in instance_list:
4048
      iout = []
4049
      i_hv = cluster.FillHV(instance)
4050
      i_be = cluster.FillBE(instance)
4051
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4052
                                 nic.nicparams) for nic in instance.nics]
4053
      for field in self.op.output_fields:
4054
        st_match = self._FIELDS_STATIC.Matches(field)
4055
        if field == "name":
4056
          val = instance.name
4057
        elif field == "os":
4058
          val = instance.os
4059
        elif field == "pnode":
4060
          val = instance.primary_node
4061
        elif field == "snodes":
4062
          val = list(instance.secondary_nodes)
4063
        elif field == "admin_state":
4064
          val = instance.admin_up
4065
        elif field == "oper_state":
4066
          if instance.primary_node in bad_nodes:
4067
            val = None
4068
          else:
4069
            val = bool(live_data.get(instance.name))
4070
        elif field == "status":
4071
          if instance.primary_node in off_nodes:
4072
            val = "ERROR_nodeoffline"
4073
          elif instance.primary_node in bad_nodes:
4074
            val = "ERROR_nodedown"
4075
          else:
4076
            running = bool(live_data.get(instance.name))
4077
            if running:
4078
              if instance.admin_up:
4079
                val = "running"
4080
              else:
4081
                val = "ERROR_up"
4082
            else:
4083
              if instance.admin_up:
4084
                val = "ERROR_down"
4085
              else:
4086
                val = "ADMIN_down"
4087
        elif field == "oper_ram":
4088
          if instance.primary_node in bad_nodes:
4089
            val = None
4090
          elif instance.name in live_data:
4091
            val = live_data[instance.name].get("memory", "?")
4092
          else:
4093
            val = "-"
4094
        elif field == "vcpus":
4095
          val = i_be[constants.BE_VCPUS]
4096
        elif field == "disk_template":
4097
          val = instance.disk_template
4098
        elif field == "ip":
4099
          if instance.nics:
4100
            val = instance.nics[0].ip
4101
          else:
4102
            val = None
4103
        elif field == "nic_mode":
4104
          if instance.nics:
4105
            val = i_nicp[0][constants.NIC_MODE]
4106
          else:
4107
            val = None
4108
        elif field == "nic_link":
4109
          if instance.nics:
4110
            val = i_nicp[0][constants.NIC_LINK]
4111
          else:
4112
            val = None
4113
        elif field == "bridge":
4114
          if (instance.nics and
4115
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4116
            val = i_nicp[0][constants.NIC_LINK]
4117
          else:
4118
            val = None
4119
        elif field == "mac":
4120
          if instance.nics:
4121
            val = instance.nics[0].mac
4122
          else:
4123
            val = None
4124
        elif field == "sda_size" or field == "sdb_size":
4125
          idx = ord(field[2]) - ord('a')
4126
          try:
4127
            val = instance.FindDisk(idx).size
4128
          except errors.OpPrereqError:
4129
            val = None
4130
        elif field == "disk_usage": # total disk usage per node
4131
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4132
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4133
        elif field == "tags":
4134
          val = list(instance.GetTags())
4135
        elif field == "serial_no":
4136
          val = instance.serial_no
4137
        elif field == "ctime":
4138
          val = instance.ctime
4139
        elif field == "mtime":
4140
          val = instance.mtime
4141
        elif field == "network_port":
4142
          val = instance.network_port
4143
        elif field == "hypervisor":
4144
          val = instance.hypervisor
4145
        elif field == "hvparams":
4146
          val = i_hv
4147
        elif (field.startswith(HVPREFIX) and
4148
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4149
          val = i_hv.get(field[len(HVPREFIX):], None)
4150
        elif field == "beparams":
4151
          val = i_be
4152
        elif (field.startswith(BEPREFIX) and
4153
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4154
          val = i_be.get(field[len(BEPREFIX):], None)
4155
        elif st_match and st_match.groups():
4156
          # matches a variable list
4157
          st_groups = st_match.groups()
4158
          if st_groups and st_groups[0] == "disk":
4159
            if st_groups[1] == "count":
4160
              val = len(instance.disks)
4161
            elif st_groups[1] == "sizes":
4162
              val = [disk.size for disk in instance.disks]
4163
            elif st_groups[1] == "size":
4164
              try:
4165
                val = instance.FindDisk(st_groups[2]).size
4166
              except errors.OpPrereqError:
4167
                val = None
4168
            else:
4169
              assert False, "Unhandled disk parameter"
4170
          elif st_groups[0] == "nic":
4171
            if st_groups[1] == "count":
4172
              val = len(instance.nics)
4173
            elif st_groups[1] == "macs":
4174
              val = [nic.mac for nic in instance.nics]
4175
            elif st_groups[1] == "ips":
4176
              val = [nic.ip for nic in instance.nics]
4177
            elif st_groups[1] == "modes":
4178
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4179
            elif st_groups[1] == "links":
4180
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4181
            elif st_groups[1] == "bridges":
4182
              val = []
4183
              for nicp in i_nicp:
4184
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4185
                  val.append(nicp[constants.NIC_LINK])
4186
                else:
4187
                  val.append(None)
4188
            else:
4189
              # index-based item
4190
              nic_idx = int(st_groups[2])
4191
              if nic_idx >= len(instance.nics):
4192
                val = None
4193
              else:
4194
                if st_groups[1] == "mac":
4195
                  val = instance.nics[nic_idx].mac
4196
                elif st_groups[1] == "ip":
4197
                  val = instance.nics[nic_idx].ip
4198
                elif st_groups[1] == "mode":
4199
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4200
                elif st_groups[1] == "link":
4201
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4202
                elif st_groups[1] == "bridge":
4203
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4204
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4205
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4206
                  else:
4207
                    val = None
4208
                else:
4209
                  assert False, "Unhandled NIC parameter"
4210
          else:
4211
            assert False, ("Declared but unhandled variable parameter '%s'" %
4212
                           field)
4213
        else:
4214
          assert False, "Declared but unhandled parameter '%s'" % field
4215
        iout.append(val)
4216
      output.append(iout)
4217

    
4218
    return output
4219

    
4220

    
4221
class LUFailoverInstance(LogicalUnit):
4222
  """Failover an instance.
4223

4224
  """
4225
  HPATH = "instance-failover"
4226
  HTYPE = constants.HTYPE_INSTANCE
4227
  _OP_REQP = ["instance_name", "ignore_consistency"]
4228
  REQ_BGL = False
4229

    
4230
  def ExpandNames(self):
4231
    self._ExpandAndLockInstance()
4232
    self.needed_locks[locking.LEVEL_NODE] = []
4233
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4234

    
4235
  def DeclareLocks(self, level):
4236
    if level == locking.LEVEL_NODE:
4237
      self._LockInstancesNodes()
4238

    
4239
  def BuildHooksEnv(self):
4240
    """Build hooks env.
4241

4242
    This runs on master, primary and secondary nodes of the instance.
4243

4244
    """
4245
    env = {
4246
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4247
      }
4248
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4249
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4250
    return env, nl, nl
4251

    
4252
  def CheckPrereq(self):
4253
    """Check prerequisites.
4254

4255
    This checks that the instance is in the cluster.
4256

4257
    """
4258
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4259
    assert self.instance is not None, \
4260
      "Cannot retrieve locked instance %s" % self.op.instance_name
4261

    
4262
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4263
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4264
      raise errors.OpPrereqError("Instance's disk layout is not"
4265
                                 " network mirrored, cannot failover.")
4266

    
4267
    secondary_nodes = instance.secondary_nodes
4268
    if not secondary_nodes:
4269
      raise errors.ProgrammerError("no secondary node but using "
4270
                                   "a mirrored disk template")
4271

    
4272
    target_node = secondary_nodes[0]
4273
    _CheckNodeOnline(self, target_node)
4274
    _CheckNodeNotDrained(self, target_node)
4275
    if instance.admin_up:
4276
      # check memory requirements on the secondary node
4277
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4278
                           instance.name, bep[constants.BE_MEMORY],
4279
                           instance.hypervisor)
4280
    else:
4281
      self.LogInfo("Not checking memory on the secondary node as"
4282
                   " instance will not be started")
4283

    
4284
    # check bridge existance
4285
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4286

    
4287
  def Exec(self, feedback_fn):
4288
    """Failover an instance.
4289

4290
    The failover is done by shutting it down on its present node and
4291
    starting it on the secondary.
4292

4293
    """
4294
    instance = self.instance
4295

    
4296
    source_node = instance.primary_node
4297
    target_node = instance.secondary_nodes[0]
4298

    
4299
    feedback_fn("* checking disk consistency between source and target")
4300
    for dev in instance.disks:
4301
      # for drbd, these are drbd over lvm
4302
      if not _CheckDiskConsistency(self, dev, target_node, False):
4303
        if instance.admin_up and not self.op.ignore_consistency:
4304
          raise errors.OpExecError("Disk %s is degraded on target node,"
4305
                                   " aborting failover." % dev.iv_name)
4306

    
4307
    feedback_fn("* shutting down instance on source node")
4308
    logging.info("Shutting down instance %s on node %s",
4309
                 instance.name, source_node)
4310

    
4311
    result = self.rpc.call_instance_shutdown(source_node, instance)
4312
    msg = result.fail_msg
4313
    if msg:
4314
      if self.op.ignore_consistency:
4315
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4316
                             " Proceeding anyway. Please make sure node"
4317
                             " %s is down. Error details: %s",
4318
                             instance.name, source_node, source_node, msg)
4319
      else:
4320
        raise errors.OpExecError("Could not shutdown instance %s on"
4321
                                 " node %s: %s" %
4322
                                 (instance.name, source_node, msg))
4323

    
4324
    feedback_fn("* deactivating the instance's disks on source node")
4325
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4326
      raise errors.OpExecError("Can't shut down the instance's disks.")
4327

    
4328
    instance.primary_node = target_node
4329
    # distribute new instance config to the other nodes
4330
    self.cfg.Update(instance)
4331

    
4332
    # Only start the instance if it's marked as up
4333
    if instance.admin_up:
4334
      feedback_fn("* activating the instance's disks on target node")
4335
      logging.info("Starting instance %s on node %s",
4336
                   instance.name, target_node)
4337

    
4338
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4339
                                               ignore_secondaries=True)
4340
      if not disks_ok:
4341
        _ShutdownInstanceDisks(self, instance)
4342
        raise errors.OpExecError("Can't activate the instance's disks")
4343

    
4344
      feedback_fn("* starting the instance on the target node")
4345
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4346
      msg = result.fail_msg
4347
      if msg:
4348
        _ShutdownInstanceDisks(self, instance)
4349
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4350
                                 (instance.name, target_node, msg))
4351

    
4352

    
4353
class LUMigrateInstance(LogicalUnit):
4354
  """Migrate an instance.
4355

4356
  This is migration without shutting down, compared to the failover,
4357
  which is done with shutdown.
4358

4359
  """
4360
  HPATH = "instance-migrate"
4361
  HTYPE = constants.HTYPE_INSTANCE
4362
  _OP_REQP = ["instance_name", "live", "cleanup"]
4363

    
4364
  REQ_BGL = False
4365

    
4366
  def ExpandNames(self):
4367
    self._ExpandAndLockInstance()
4368

    
4369
    self.needed_locks[locking.LEVEL_NODE] = []
4370
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4371

    
4372
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4373
                                       self.op.live, self.op.cleanup)
4374
    self.tasklets = [self._migrater]
4375

    
4376
  def DeclareLocks(self, level):
4377
    if level == locking.LEVEL_NODE:
4378
      self._LockInstancesNodes()
4379

    
4380
  def BuildHooksEnv(self):
4381
    """Build hooks env.
4382

4383
    This runs on master, primary and secondary nodes of the instance.
4384

4385
    """
4386
    instance = self._migrater.instance
4387
    env = _BuildInstanceHookEnvByObject(self, instance)
4388
    env["MIGRATE_LIVE"] = self.op.live
4389
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4390
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4391
    return env, nl, nl
4392

    
4393

    
4394
class LUMoveInstance(LogicalUnit):
4395
  """Move an instance by data-copying.
4396

4397
  """
4398
  HPATH = "instance-move"
4399
  HTYPE = constants.HTYPE_INSTANCE
4400
  _OP_REQP = ["instance_name", "target_node"]
4401
  REQ_BGL = False
4402

    
4403
  def ExpandNames(self):
4404
    self._ExpandAndLockInstance()
4405
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4406
    if target_node is None:
4407
      raise errors.OpPrereqError("Node '%s' not known" %
4408
                                  self.op.target_node)
4409
    self.op.target_node = target_node
4410
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4411
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4412

    
4413
  def DeclareLocks(self, level):
4414
    if level == locking.LEVEL_NODE:
4415
      self._LockInstancesNodes(primary_only=True)
4416

    
4417
  def BuildHooksEnv(self):
4418
    """Build hooks env.
4419

4420
    This runs on master, primary and secondary nodes of the instance.
4421

4422
    """
4423
    env = {
4424
      "TARGET_NODE": self.op.target_node,
4425
      }
4426
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4427
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4428
                                       self.op.target_node]
4429
    return env, nl, nl
4430

    
4431
  def CheckPrereq(self):
4432
    """Check prerequisites.
4433

4434
    This checks that the instance is in the cluster.
4435

4436
    """
4437
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4438
    assert self.instance is not None, \
4439
      "Cannot retrieve locked instance %s" % self.op.instance_name
4440

    
4441
    node = self.cfg.GetNodeInfo(self.op.target_node)
4442
    assert node is not None, \
4443
      "Cannot retrieve locked node %s" % self.op.target_node
4444

    
4445
    self.target_node = target_node = node.name
4446

    
4447
    if target_node == instance.primary_node:
4448
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4449
                                 (instance.name, target_node))
4450

    
4451
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4452

    
4453
    for idx, dsk in enumerate(instance.disks):
4454
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4455
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4456
                                   " cannot copy")
4457

    
4458
    _CheckNodeOnline(self, target_node)
4459
    _CheckNodeNotDrained(self, target_node)
4460

    
4461
    if instance.admin_up:
4462
      # check memory requirements on the secondary node
4463
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4464
                           instance.name, bep[constants.BE_MEMORY],
4465
                           instance.hypervisor)
4466
    else:
4467
      self.LogInfo("Not checking memory on the secondary node as"
4468
                   " instance will not be started")
4469

    
4470
    # check bridge existance
4471
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4472

    
4473
  def Exec(self, feedback_fn):
4474
    """Move an instance.
4475

4476
    The move is done by shutting it down on its present node, copying
4477
    the data over (slow) and starting it on the new node.
4478

4479
    """
4480
    instance = self.instance
4481

    
4482
    source_node = instance.primary_node
4483
    target_node = self.target_node
4484

    
4485
    self.LogInfo("Shutting down instance %s on source node %s",
4486
                 instance.name, source_node)
4487

    
4488
    result = self.rpc.call_instance_shutdown(source_node, instance)
4489
    msg = result.fail_msg
4490
    if msg:
4491
      if self.op.ignore_consistency:
4492
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4493
                             " Proceeding anyway. Please make sure node"
4494
                             " %s is down. Error details: %s",
4495
                             instance.name, source_node, source_node, msg)
4496
      else:
4497
        raise errors.OpExecError("Could not shutdown instance %s on"
4498
                                 " node %s: %s" %
4499
                                 (instance.name, source_node, msg))
4500

    
4501
    # create the target disks
4502
    try:
4503
      _CreateDisks(self, instance, target_node=target_node)
4504
    except errors.OpExecError:
4505
      self.LogWarning("Device creation failed, reverting...")
4506
      try:
4507
        _RemoveDisks(self, instance, target_node=target_node)
4508
      finally:
4509
        self.cfg.ReleaseDRBDMinors(instance.name)
4510
        raise
4511

    
4512
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4513

    
4514
    errs = []
4515
    # activate, get path, copy the data over
4516
    for idx, disk in enumerate(instance.disks):
4517
      self.LogInfo("Copying data for disk %d", idx)
4518
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4519
                                               instance.name, True)
4520
      if result.fail_msg:
4521
        self.LogWarning("Can't assemble newly created disk %d: %s",
4522
                        idx, result.fail_msg)
4523
        errs.append(result.fail_msg)
4524
        break
4525
      dev_path = result.payload
4526
      result = self.rpc.call_blockdev_export(source_node, disk,
4527
                                             target_node, dev_path,
4528
                                             cluster_name)
4529
      if result.fail_msg:
4530
        self.LogWarning("Can't copy data over for disk %d: %s",
4531
                        idx, result.fail_msg)
4532
        errs.append(result.fail_msg)
4533
        break
4534

    
4535
    if errs:
4536
      self.LogWarning("Some disks failed to copy, aborting")
4537
      try:
4538
        _RemoveDisks(self, instance, target_node=target_node)
4539
      finally:
4540
        self.cfg.ReleaseDRBDMinors(instance.name)
4541
        raise errors.OpExecError("Errors during disk copy: %s" %
4542
                                 (",".join(errs),))
4543

    
4544
    instance.primary_node = target_node
4545
    self.cfg.Update(instance)
4546

    
4547
    self.LogInfo("Removing the disks on the original node")
4548
    _RemoveDisks(self, instance, target_node=source_node)
4549

    
4550
    # Only start the instance if it's marked as up
4551
    if instance.admin_up:
4552
      self.LogInfo("Starting instance %s on node %s",
4553
                   instance.name, target_node)
4554

    
4555
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4556
                                           ignore_secondaries=True)
4557
      if not disks_ok:
4558
        _ShutdownInstanceDisks(self, instance)
4559
        raise errors.OpExecError("Can't activate the instance's disks")
4560

    
4561
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4562
      msg = result.fail_msg
4563
      if msg:
4564
        _ShutdownInstanceDisks(self, instance)
4565
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4566
                                 (instance.name, target_node, msg))
4567

    
4568

    
4569
class LUMigrateNode(LogicalUnit):
4570
  """Migrate all instances from a node.
4571

4572
  """
4573
  HPATH = "node-migrate"
4574
  HTYPE = constants.HTYPE_NODE
4575
  _OP_REQP = ["node_name", "live"]
4576
  REQ_BGL = False
4577

    
4578
  def ExpandNames(self):
4579
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4580
    if self.op.node_name is None:
4581
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4582

    
4583
    self.needed_locks = {
4584
      locking.LEVEL_NODE: [self.op.node_name],
4585
      }
4586

    
4587
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4588

    
4589
    # Create tasklets for migrating instances for all instances on this node
4590
    names = []
4591
    tasklets = []
4592

    
4593
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4594
      logging.debug("Migrating instance %s", inst.name)
4595
      names.append(inst.name)
4596

    
4597
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4598

    
4599
    self.tasklets = tasklets
4600

    
4601
    # Declare instance locks
4602
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4603

    
4604
  def DeclareLocks(self, level):
4605
    if level == locking.LEVEL_NODE:
4606
      self._LockInstancesNodes()
4607

    
4608
  def BuildHooksEnv(self):
4609
    """Build hooks env.
4610

4611
    This runs on the master, the primary and all the secondaries.
4612

4613
    """
4614
    env = {
4615
      "NODE_NAME": self.op.node_name,
4616
      }
4617

    
4618
    nl = [self.cfg.GetMasterNode()]
4619

    
4620
    return (env, nl, nl)
4621

    
4622

    
4623
class TLMigrateInstance(Tasklet):
4624
  def __init__(self, lu, instance_name, live, cleanup):
4625
    """Initializes this class.
4626

4627
    """
4628
    Tasklet.__init__(self, lu)
4629

    
4630
    # Parameters
4631
    self.instance_name = instance_name
4632
    self.live = live
4633
    self.cleanup = cleanup
4634

    
4635
  def CheckPrereq(self):
4636
    """Check prerequisites.
4637

4638
    This checks that the instance is in the cluster.
4639

4640
    """
4641
    instance = self.cfg.GetInstanceInfo(
4642
      self.cfg.ExpandInstanceName(self.instance_name))
4643
    if instance is None:
4644
      raise errors.OpPrereqError("Instance '%s' not known" %
4645
                                 self.instance_name)
4646

    
4647
    if instance.disk_template != constants.DT_DRBD8:
4648
      raise errors.OpPrereqError("Instance's disk layout is not"
4649
                                 " drbd8, cannot migrate.")
4650

    
4651
    secondary_nodes = instance.secondary_nodes
4652
    if not secondary_nodes:
4653
      raise errors.ConfigurationError("No secondary node but using"
4654
                                      " drbd8 disk template")
4655

    
4656
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4657

    
4658
    target_node = secondary_nodes[0]
4659
    # check memory requirements on the secondary node
4660
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4661
                         instance.name, i_be[constants.BE_MEMORY],
4662
                         instance.hypervisor)
4663

    
4664
    # check bridge existance
4665
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4666

    
4667
    if not self.cleanup:
4668
      _CheckNodeNotDrained(self, target_node)
4669
      result = self.rpc.call_instance_migratable(instance.primary_node,
4670
                                                 instance)
4671
      result.Raise("Can't migrate, please use failover", prereq=True)
4672

    
4673
    self.instance = instance
4674

    
4675
  def _WaitUntilSync(self):
4676
    """Poll with custom rpc for disk sync.
4677

4678
    This uses our own step-based rpc call.
4679

4680
    """
4681
    self.feedback_fn("* wait until resync is done")
4682
    all_done = False
4683
    while not all_done:
4684
      all_done = True
4685
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4686
                                            self.nodes_ip,
4687
                                            self.instance.disks)
4688
      min_percent = 100
4689
      for node, nres in result.items():
4690
        nres.Raise("Cannot resync disks on node %s" % node)
4691
        node_done, node_percent = nres.payload
4692
        all_done = all_done and node_done
4693
        if node_percent is not None:
4694
          min_percent = min(min_percent, node_percent)
4695
      if not all_done:
4696
        if min_percent < 100:
4697
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4698
        time.sleep(2)
4699

    
4700
  def _EnsureSecondary(self, node):
4701
    """Demote a node to secondary.
4702

4703
    """
4704
    self.feedback_fn("* switching node %s to secondary mode" % node)
4705

    
4706
    for dev in self.instance.disks:
4707
      self.cfg.SetDiskID(dev, node)
4708

    
4709
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4710
                                          self.instance.disks)
4711
    result.Raise("Cannot change disk to secondary on node %s" % node)
4712

    
4713
  def _GoStandalone(self):
4714
    """Disconnect from the network.
4715

4716
    """
4717
    self.feedback_fn("* changing into standalone mode")
4718
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4719
                                               self.instance.disks)
4720
    for node, nres in result.items():
4721
      nres.Raise("Cannot disconnect disks node %s" % node)
4722

    
4723
  def _GoReconnect(self, multimaster):
4724
    """Reconnect to the network.
4725

4726
    """
4727
    if multimaster:
4728
      msg = "dual-master"
4729
    else:
4730
      msg = "single-master"
4731
    self.feedback_fn("* changing disks into %s mode" % msg)
4732
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4733
                                           self.instance.disks,
4734
                                           self.instance.name, multimaster)
4735
    for node, nres in result.items():
4736
      nres.Raise("Cannot change disks config on node %s" % node)
4737

    
4738
  def _ExecCleanup(self):
4739
    """Try to cleanup after a failed migration.
4740

4741
    The cleanup is done by:
4742
      - check that the instance is running only on one node
4743
        (and update the config if needed)
4744
      - change disks on its secondary node to secondary
4745
      - wait until disks are fully synchronized
4746
      - disconnect from the network
4747
      - change disks into single-master mode
4748
      - wait again until disks are fully synchronized
4749

4750
    """
4751
    instance = self.instance
4752
    target_node = self.target_node
4753
    source_node = self.source_node
4754

    
4755
    # check running on only one node
4756
    self.feedback_fn("* checking where the instance actually runs"
4757
                     " (if this hangs, the hypervisor might be in"
4758
                     " a bad state)")
4759
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4760
    for node, result in ins_l.items():
4761
      result.Raise("Can't contact node %s" % node)
4762

    
4763
    runningon_source = instance.name in ins_l[source_node].payload
4764
    runningon_target = instance.name in ins_l[target_node].payload
4765

    
4766
    if runningon_source and runningon_target:
4767
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4768
                               " or the hypervisor is confused. You will have"
4769
                               " to ensure manually that it runs only on one"
4770
                               " and restart this operation.")
4771

    
4772
    if not (runningon_source or runningon_target):
4773
      raise errors.OpExecError("Instance does not seem to be running at all."
4774
                               " In this case, it's safer to repair by"
4775
                               " running 'gnt-instance stop' to ensure disk"
4776
                               " shutdown, and then restarting it.")
4777

    
4778
    if runningon_target:
4779
      # the migration has actually succeeded, we need to update the config
4780
      self.feedback_fn("* instance running on secondary node (%s),"
4781
                       " updating config" % target_node)
4782
      instance.primary_node = target_node
4783
      self.cfg.Update(instance)
4784
      demoted_node = source_node
4785
    else:
4786
      self.feedback_fn("* instance confirmed to be running on its"
4787
                       " primary node (%s)" % source_node)
4788
      demoted_node = target_node
4789

    
4790
    self._EnsureSecondary(demoted_node)
4791
    try:
4792
      self._WaitUntilSync()
4793
    except errors.OpExecError:
4794
      # we ignore here errors, since if the device is standalone, it
4795
      # won't be able to sync
4796
      pass
4797
    self._GoStandalone()
4798
    self._GoReconnect(False)
4799
    self._WaitUntilSync()
4800

    
4801
    self.feedback_fn("* done")
4802

    
4803
  def _RevertDiskStatus(self):
4804
    """Try to revert the disk status after a failed migration.
4805

4806
    """
4807
    target_node = self.target_node
4808
    try:
4809
      self._EnsureSecondary(target_node)
4810
      self._GoStandalone()
4811
      self._GoReconnect(False)
4812
      self._WaitUntilSync()
4813
    except errors.OpExecError, err:
4814
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4815
                         " drives: error '%s'\n"
4816
                         "Please look and recover the instance status" %
4817
                         str(err))
4818

    
4819
  def _AbortMigration(self):
4820
    """Call the hypervisor code to abort a started migration.
4821

4822
    """
4823
    instance = self.instance
4824
    target_node = self.target_node
4825
    migration_info = self.migration_info
4826

    
4827
    abort_result = self.rpc.call_finalize_migration(target_node,
4828
                                                    instance,
4829
                                                    migration_info,
4830
                                                    False)
4831
    abort_msg = abort_result.fail_msg
4832
    if abort_msg:
4833
      logging.error("Aborting migration failed on target node %s: %s" %
4834
                    (target_node, abort_msg))
4835
      # Don't raise an exception here, as we stil have to try to revert the
4836
      # disk status, even if this step failed.
4837

    
4838
  def _ExecMigration(self):
4839
    """Migrate an instance.
4840

4841
    The migrate is done by:
4842
      - change the disks into dual-master mode
4843
      - wait until disks are fully synchronized again
4844
      - migrate the instance
4845
      - change disks on the new secondary node (the old primary) to secondary
4846
      - wait until disks are fully synchronized
4847
      - change disks into single-master mode
4848

4849
    """
4850
    instance = self.instance
4851
    target_node = self.target_node
4852
    source_node = self.source_node
4853

    
4854
    self.feedback_fn("* checking disk consistency between source and target")
4855
    for dev in instance.disks:
4856
      if not _CheckDiskConsistency(self, dev, target_node, False):
4857
        raise errors.OpExecError("Disk %s is degraded or not fully"
4858
                                 " synchronized on target node,"
4859
                                 " aborting migrate." % dev.iv_name)
4860

    
4861
    # First get the migration information from the remote node
4862
    result = self.rpc.call_migration_info(source_node, instance)
4863
    msg = result.fail_msg
4864
    if msg:
4865
      log_err = ("Failed fetching source migration information from %s: %s" %
4866
                 (source_node, msg))
4867
      logging.error(log_err)
4868
      raise errors.OpExecError(log_err)
4869

    
4870
    self.migration_info = migration_info = result.payload
4871

    
4872
    # Then switch the disks to master/master mode
4873
    self._EnsureSecondary(target_node)
4874
    self._GoStandalone()
4875
    self._GoReconnect(True)
4876
    self._WaitUntilSync()
4877

    
4878
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4879
    result = self.rpc.call_accept_instance(target_node,
4880
                                           instance,
4881
                                           migration_info,
4882
                                           self.nodes_ip[target_node])
4883

    
4884
    msg = result.fail_msg
4885
    if msg:
4886
      logging.error("Instance pre-migration failed, trying to revert"
4887
                    " disk status: %s", msg)
4888
      self._AbortMigration()
4889
      self._RevertDiskStatus()
4890
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4891
                               (instance.name, msg))
4892

    
4893
    self.feedback_fn("* migrating instance to %s" % target_node)
4894
    time.sleep(10)
4895
    result = self.rpc.call_instance_migrate(source_node, instance,
4896
                                            self.nodes_ip[target_node],
4897
                                            self.live)
4898
    msg = result.fail_msg
4899
    if msg:
4900
      logging.error("Instance migration failed, trying to revert"
4901
                    " disk status: %s", msg)
4902
      self._AbortMigration()
4903
      self._RevertDiskStatus()
4904
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4905
                               (instance.name, msg))
4906
    time.sleep(10)
4907

    
4908
    instance.primary_node = target_node
4909
    # distribute new instance config to the other nodes
4910
    self.cfg.Update(instance)
4911

    
4912
    result = self.rpc.call_finalize_migration(target_node,
4913
                                              instance,
4914
                                              migration_info,
4915
                                              True)
4916
    msg = result.fail_msg
4917
    if msg:
4918
      logging.error("Instance migration succeeded, but finalization failed:"
4919
                    " %s" % msg)
4920
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4921
                               msg)
4922

    
4923
    self._EnsureSecondary(source_node)
4924
    self._WaitUntilSync()
4925
    self._GoStandalone()
4926
    self._GoReconnect(False)
4927
    self._WaitUntilSync()
4928

    
4929
    self.feedback_fn("* done")
4930

    
4931
  def Exec(self, feedback_fn):
4932
    """Perform the migration.
4933

4934
    """
4935
    feedback_fn("Migrating instance %s" % self.instance.name)
4936

    
4937
    self.feedback_fn = feedback_fn
4938

    
4939
    self.source_node = self.instance.primary_node
4940
    self.target_node = self.instance.secondary_nodes[0]
4941
    self.all_nodes = [self.source_node, self.target_node]
4942
    self.nodes_ip = {
4943
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4944
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4945
      }
4946

    
4947
    if self.cleanup:
4948
      return self._ExecCleanup()
4949
    else:
4950
      return self._ExecMigration()
4951

    
4952

    
4953
def _CreateBlockDev(lu, node, instance, device, force_create,
4954
                    info, force_open):
4955
  """Create a tree of block devices on a given node.
4956

4957
  If this device type has to be created on secondaries, create it and
4958
  all its children.
4959

4960
  If not, just recurse to children keeping the same 'force' value.
4961

4962
  @param lu: the lu on whose behalf we execute
4963
  @param node: the node on which to create the device
4964
  @type instance: L{objects.Instance}
4965
  @param instance: the instance which owns the device
4966
  @type device: L{objects.Disk}
4967
  @param device: the device to create
4968
  @type force_create: boolean
4969
  @param force_create: whether to force creation of this device; this
4970
      will be change to True whenever we find a device which has
4971
      CreateOnSecondary() attribute
4972
  @param info: the extra 'metadata' we should attach to the device
4973
      (this will be represented as a LVM tag)
4974
  @type force_open: boolean
4975
  @param force_open: this parameter will be passes to the
4976
      L{backend.BlockdevCreate} function where it specifies
4977
      whether we run on primary or not, and it affects both
4978
      the child assembly and the device own Open() execution
4979

4980
  """
4981
  if device.CreateOnSecondary():
4982
    force_create = True
4983

    
4984
  if device.children:
4985
    for child in device.children:
4986
      _CreateBlockDev(lu, node, instance, child, force_create,
4987
                      info, force_open)
4988

    
4989
  if not force_create:
4990
    return
4991

    
4992
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
4993

    
4994

    
4995
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
4996
  """Create a single block device on a given node.
4997

4998
  This will not recurse over children of the device, so they must be
4999
  created in advance.
5000

5001
  @param lu: the lu on whose behalf we execute
5002
  @param node: the node on which to create the device
5003
  @type instance: L{objects.Instance}
5004
  @param instance: the instance which owns the device
5005
  @type device: L{objects.Disk}
5006
  @param device: the device to create
5007
  @param info: the extra 'metadata' we should attach to the device
5008
      (this will be represented as a LVM tag)
5009
  @type force_open: boolean
5010
  @param force_open: this parameter will be passes to the
5011
      L{backend.BlockdevCreate} function where it specifies
5012
      whether we run on primary or not, and it affects both
5013
      the child assembly and the device own Open() execution
5014

5015
  """
5016
  lu.cfg.SetDiskID(device, node)
5017
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5018
                                       instance.name, force_open, info)
5019
  result.Raise("Can't create block device %s on"
5020
               " node %s for instance %s" % (device, node, instance.name))
5021
  if device.physical_id is None:
5022
    device.physical_id = result.payload
5023

    
5024

    
5025
def _GenerateUniqueNames(lu, exts):
5026
  """Generate a suitable LV name.
5027

5028
  This will generate a logical volume name for the given instance.
5029

5030
  """
5031
  results = []
5032
  for val in exts:
5033
    new_id = lu.cfg.GenerateUniqueID()
5034
    results.append("%s%s" % (new_id, val))
5035
  return results
5036

    
5037

    
5038
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5039
                         p_minor, s_minor):
5040
  """Generate a drbd8 device complete with its children.
5041

5042
  """
5043
  port = lu.cfg.AllocatePort()
5044
  vgname = lu.cfg.GetVGName()
5045
  shared_secret = lu.cfg.GenerateDRBDSecret()
5046
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5047
                          logical_id=(vgname, names[0]))
5048
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5049
                          logical_id=(vgname, names[1]))
5050
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5051
                          logical_id=(primary, secondary, port,
5052
                                      p_minor, s_minor,
5053
                                      shared_secret),
5054
                          children=[dev_data, dev_meta],
5055
                          iv_name=iv_name)
5056
  return drbd_dev
5057

    
5058

    
5059
def _GenerateDiskTemplate(lu, template_name,
5060
                          instance_name, primary_node,
5061
                          secondary_nodes, disk_info,
5062
                          file_storage_dir, file_driver,
5063
                          base_index):
5064
  """Generate the entire disk layout for a given template type.
5065

5066
  """
5067
  #TODO: compute space requirements
5068

    
5069
  vgname = lu.cfg.GetVGName()
5070
  disk_count = len(disk_info)
5071
  disks = []
5072
  if template_name == constants.DT_DISKLESS:
5073
    pass
5074
  elif template_name == constants.DT_PLAIN:
5075
    if len(secondary_nodes) != 0:
5076
      raise errors.ProgrammerError("Wrong template configuration")
5077

    
5078
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5079
                                      for i in range(disk_count)])
5080
    for idx, disk in enumerate(disk_info):
5081
      disk_index = idx + base_index
5082
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5083
                              logical_id=(vgname, names[idx]),
5084
                              iv_name="disk/%d" % disk_index,
5085
                              mode=disk["mode"])
5086
      disks.append(disk_dev)
5087
  elif template_name == constants.DT_DRBD8:
5088
    if len(secondary_nodes) != 1:
5089
      raise errors.ProgrammerError("Wrong template configuration")
5090
    remote_node = secondary_nodes[0]
5091
    minors = lu.cfg.AllocateDRBDMinor(
5092
      [primary_node, remote_node] * len(disk_info), instance_name)
5093

    
5094
    names = []
5095
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5096
                                               for i in range(disk_count)]):
5097
      names.append(lv_prefix + "_data")
5098
      names.append(lv_prefix + "_meta")
5099
    for idx, disk in enumerate(disk_info):
5100
      disk_index = idx + base_index
5101
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5102
                                      disk["size"], names[idx*2:idx*2+2],
5103
                                      "disk/%d" % disk_index,
5104
                                      minors[idx*2], minors[idx*2+1])
5105
      disk_dev.mode = disk["mode"]
5106
      disks.append(disk_dev)
5107
  elif template_name == constants.DT_FILE:
5108
    if len(secondary_nodes) != 0:
5109
      raise errors.ProgrammerError("Wrong template configuration")
5110

    
5111
    for idx, disk in enumerate(disk_info):
5112
      disk_index = idx + base_index
5113
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5114
                              iv_name="disk/%d" % disk_index,
5115
                              logical_id=(file_driver,
5116
                                          "%s/disk%d" % (file_storage_dir,
5117
                                                         disk_index)),
5118
                              mode=disk["mode"])
5119
      disks.append(disk_dev)
5120
  else:
5121
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5122
  return disks
5123

    
5124

    
5125
def _GetInstanceInfoText(instance):
5126
  """Compute that text that should be added to the disk's metadata.
5127

5128
  """
5129
  return "originstname+%s" % instance.name
5130

    
5131

    
5132
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5133
  """Create all disks for an instance.
5134

5135
  This abstracts away some work from AddInstance.
5136

5137
  @type lu: L{LogicalUnit}
5138
  @param lu: the logical unit on whose behalf we execute
5139
  @type instance: L{objects.Instance}
5140
  @param instance: the instance whose disks we should create
5141
  @type to_skip: list
5142
  @param to_skip: list of indices to skip
5143
  @type target_node: string
5144
  @param target_node: if passed, overrides the target node for creation
5145
  @rtype: boolean
5146
  @return: the success of the creation
5147

5148
  """
5149
  info = _GetInstanceInfoText(instance)
5150
  if target_node is None:
5151
    pnode = instance.primary_node
5152
    all_nodes = instance.all_nodes
5153
  else:
5154
    pnode = target_node
5155
    all_nodes = [pnode]
5156

    
5157
  if instance.disk_template == constants.DT_FILE:
5158
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5159
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5160

    
5161
    result.Raise("Failed to create directory '%s' on"
5162
                 " node %s: %s" % (file_storage_dir, pnode))
5163

    
5164
  # Note: this needs to be kept in sync with adding of disks in
5165
  # LUSetInstanceParams
5166
  for idx, device in enumerate(instance.disks):
5167
    if to_skip and idx in to_skip:
5168
      continue
5169
    logging.info("Creating volume %s for instance %s",
5170
                 device.iv_name, instance.name)
5171
    #HARDCODE
5172
    for node in all_nodes:
5173
      f_create = node == pnode
5174
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5175

    
5176

    
5177
def _RemoveDisks(lu, instance, target_node=None):
5178
  """Remove all disks for an instance.
5179

5180
  This abstracts away some work from `AddInstance()` and
5181
  `RemoveInstance()`. Note that in case some of the devices couldn't
5182
  be removed, the removal will continue with the other ones (compare
5183
  with `_CreateDisks()`).
5184

5185
  @type lu: L{LogicalUnit}
5186
  @param lu: the logical unit on whose behalf we execute
5187
  @type instance: L{objects.Instance}
5188
  @param instance: the instance whose disks we should remove
5189
  @type target_node: string
5190
  @param target_node: used to override the node on which to remove the disks
5191
  @rtype: boolean
5192
  @return: the success of the removal
5193

5194
  """
5195
  logging.info("Removing block devices for instance %s", instance.name)
5196

    
5197
  all_result = True
5198
  for device in instance.disks:
5199
    if target_node:
5200
      edata = [(target_node, device)]
5201
    else:
5202
      edata = device.ComputeNodeTree(instance.primary_node)
5203
    for node, disk in edata:
5204
      lu.cfg.SetDiskID(disk, node)
5205
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5206
      if msg:
5207
        lu.LogWarning("Could not remove block device %s on node %s,"
5208
                      " continuing anyway: %s", device.iv_name, node, msg)
5209
        all_result = False
5210

    
5211
  if instance.disk_template == constants.DT_FILE:
5212
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5213
    if target_node is node:
5214
      tgt = instance.primary_node
5215
    else:
5216
      tgt = instance.target_node
5217
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5218
    if result.fail_msg:
5219
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5220
                    file_storage_dir, instance.primary_node, result.fail_msg)
5221
      all_result = False
5222

    
5223
  return all_result
5224

    
5225

    
5226
def _ComputeDiskSize(disk_template, disks):
5227
  """Compute disk size requirements in the volume group
5228

5229
  """
5230
  # Required free disk space as a function of disk and swap space
5231
  req_size_dict = {
5232
    constants.DT_DISKLESS: None,
5233
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5234
    # 128 MB are added for drbd metadata for each disk
5235
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5236
    constants.DT_FILE: None,
5237
  }
5238

    
5239
  if disk_template not in req_size_dict:
5240
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5241
                                 " is unknown" %  disk_template)
5242

    
5243
  return req_size_dict[disk_template]
5244

    
5245

    
5246
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5247
  """Hypervisor parameter validation.
5248

5249
  This function abstract the hypervisor parameter validation to be
5250
  used in both instance create and instance modify.
5251

5252
  @type lu: L{LogicalUnit}
5253
  @param lu: the logical unit for which we check
5254
  @type nodenames: list
5255
  @param nodenames: the list of nodes on which we should check
5256
  @type hvname: string
5257
  @param hvname: the name of the hypervisor we should use
5258
  @type hvparams: dict
5259
  @param hvparams: the parameters which we need to check
5260
  @raise errors.OpPrereqError: if the parameters are not valid
5261

5262
  """
5263
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5264
                                                  hvname,
5265
                                                  hvparams)
5266
  for node in nodenames:
5267
    info = hvinfo[node]
5268
    if info.offline:
5269
      continue
5270
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5271

    
5272

    
5273
class LUCreateInstance(LogicalUnit):
5274
  """Create an instance.
5275

5276
  """
5277
  HPATH = "instance-add"
5278
  HTYPE = constants.HTYPE_INSTANCE
5279
  _OP_REQP = ["instance_name", "disks", "disk_template",
5280
              "mode", "start",
5281
              "wait_for_sync", "ip_check", "nics",
5282
              "hvparams", "beparams"]
5283
  REQ_BGL = False
5284

    
5285
  def _ExpandNode(self, node):
5286
    """Expands and checks one node name.
5287

5288
    """
5289
    node_full = self.cfg.ExpandNodeName(node)
5290
    if node_full is None:
5291
      raise errors.OpPrereqError("Unknown node %s" % node)
5292
    return node_full
5293

    
5294
  def ExpandNames(self):
5295
    """ExpandNames for CreateInstance.
5296

5297
    Figure out the right locks for instance creation.
5298

5299
    """
5300
    self.needed_locks = {}
5301

    
5302
    # set optional parameters to none if they don't exist
5303
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5304
      if not hasattr(self.op, attr):
5305
        setattr(self.op, attr, None)
5306

    
5307
    # cheap checks, mostly valid constants given
5308

    
5309
    # verify creation mode
5310
    if self.op.mode not in (constants.INSTANCE_CREATE,
5311
                            constants.INSTANCE_IMPORT):
5312
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5313
                                 self.op.mode)
5314

    
5315
    # disk template and mirror node verification
5316
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5317
      raise errors.OpPrereqError("Invalid disk template name")
5318

    
5319
    if self.op.hypervisor is None:
5320
      self.op.hypervisor = self.cfg.GetHypervisorType()
5321

    
5322
    cluster = self.cfg.GetClusterInfo()
5323
    enabled_hvs = cluster.enabled_hypervisors
5324
    if self.op.hypervisor not in enabled_hvs:
5325
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5326
                                 " cluster (%s)" % (self.op.hypervisor,
5327
                                  ",".join(enabled_hvs)))
5328

    
5329
    # check hypervisor parameter syntax (locally)
5330
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5331
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5332
                                  self.op.hvparams)
5333
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5334
    hv_type.CheckParameterSyntax(filled_hvp)
5335
    self.hv_full = filled_hvp
5336

    
5337
    # fill and remember the beparams dict
5338
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5339
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5340
                                    self.op.beparams)
5341

    
5342
    #### instance parameters check
5343

    
5344
    # instance name verification
5345
    hostname1 = utils.HostInfo(self.op.instance_name)
5346
    self.op.instance_name = instance_name = hostname1.name
5347

    
5348
    # this is just a preventive check, but someone might still add this
5349
    # instance in the meantime, and creation will fail at lock-add time
5350
    if instance_name in self.cfg.GetInstanceList():
5351
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5352
                                 instance_name)
5353

    
5354
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5355

    
5356
    # NIC buildup
5357
    self.nics = []
5358
    for idx, nic in enumerate(self.op.nics):
5359
      nic_mode_req = nic.get("mode", None)
5360
      nic_mode = nic_mode_req
5361
      if nic_mode is None:
5362
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5363

    
5364
      # in routed mode, for the first nic, the default ip is 'auto'
5365
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5366
        default_ip_mode = constants.VALUE_AUTO
5367
      else:
5368
        default_ip_mode = constants.VALUE_NONE
5369

    
5370
      # ip validity checks
5371
      ip = nic.get("ip", default_ip_mode)
5372
      if ip is None or ip.lower() == constants.VALUE_NONE:
5373
        nic_ip = None
5374
      elif ip.lower() == constants.VALUE_AUTO:
5375
        nic_ip = hostname1.ip
5376
      else:
5377
        if not utils.IsValidIP(ip):
5378
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5379
                                     " like a valid IP" % ip)
5380
        nic_ip = ip
5381

    
5382
      # TODO: check the ip for uniqueness !!
5383
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5384
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5385

    
5386
      # MAC address verification
5387
      mac = nic.get("mac", constants.VALUE_AUTO)
5388
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5389
        if not utils.IsValidMac(mac.lower()):
5390
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5391
                                     mac)
5392
      # bridge verification
5393
      bridge = nic.get("bridge", None)
5394
      link = nic.get("link", None)
5395
      if bridge and link:
5396
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5397
                                   " at the same time")
5398
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5399
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5400
      elif bridge:
5401
        link = bridge
5402

    
5403
      nicparams = {}
5404
      if nic_mode_req:
5405
        nicparams[constants.NIC_MODE] = nic_mode_req
5406
      if link:
5407
        nicparams[constants.NIC_LINK] = link
5408

    
5409
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5410
                                      nicparams)
5411
      objects.NIC.CheckParameterSyntax(check_params)
5412
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5413

    
5414
    # disk checks/pre-build
5415
    self.disks = []
5416
    for disk in self.op.disks:
5417
      mode = disk.get("mode", constants.DISK_RDWR)
5418
      if mode not in constants.DISK_ACCESS_SET:
5419
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5420
                                   mode)
5421
      size = disk.get("size", None)
5422
      if size is None:
5423
        raise errors.OpPrereqError("Missing disk size")
5424
      try:
5425
        size = int(size)
5426
      except ValueError:
5427
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5428
      self.disks.append({"size": size, "mode": mode})
5429

    
5430
    # used in CheckPrereq for ip ping check
5431
    self.check_ip = hostname1.ip
5432

    
5433
    # file storage checks
5434
    if (self.op.file_driver and
5435
        not self.op.file_driver in constants.FILE_DRIVER):
5436
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5437
                                 self.op.file_driver)
5438

    
5439
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5440
      raise errors.OpPrereqError("File storage directory path not absolute")
5441

    
5442
    ### Node/iallocator related checks
5443
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5444
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5445
                                 " node must be given")
5446

    
5447
    if self.op.iallocator:
5448
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5449
    else:
5450
      self.op.pnode = self._ExpandNode(self.op.pnode)
5451
      nodelist = [self.op.pnode]
5452
      if self.op.snode is not None:
5453
        self.op.snode = self._ExpandNode(self.op.snode)
5454
        nodelist.append(self.op.snode)
5455
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5456

    
5457
    # in case of import lock the source node too
5458
    if self.op.mode == constants.INSTANCE_IMPORT:
5459
      src_node = getattr(self.op, "src_node", None)
5460
      src_path = getattr(self.op, "src_path", None)
5461

    
5462
      if src_path is None:
5463
        self.op.src_path = src_path = self.op.instance_name
5464

    
5465
      if src_node is None:
5466
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5467
        self.op.src_node = None
5468
        if os.path.isabs(src_path):
5469
          raise errors.OpPrereqError("Importing an instance from an absolute"
5470
                                     " path requires a source node option.")
5471
      else:
5472
        self.op.src_node = src_node = self._ExpandNode(src_node)
5473
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5474
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5475
        if not os.path.isabs(src_path):
5476
          self.op.src_path = src_path = \
5477
            os.path.join(constants.EXPORT_DIR, src_path)
5478

    
5479
    else: # INSTANCE_CREATE
5480
      if getattr(self.op, "os_type", None) is None:
5481
        raise errors.OpPrereqError("No guest OS specified")
5482

    
5483
  def _RunAllocator(self):
5484
    """Run the allocator based on input opcode.
5485

5486
    """
5487
    nics = [n.ToDict() for n in self.nics]
5488
    ial = IAllocator(self.cfg, self.rpc,
5489
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5490
                     name=self.op.instance_name,
5491
                     disk_template=self.op.disk_template,
5492
                     tags=[],
5493
                     os=self.op.os_type,
5494
                     vcpus=self.be_full[constants.BE_VCPUS],
5495
                     mem_size=self.be_full[constants.BE_MEMORY],
5496
                     disks=self.disks,
5497
                     nics=nics,
5498
                     hypervisor=self.op.hypervisor,
5499
                     )
5500

    
5501
    ial.Run(self.op.iallocator)
5502

    
5503
    if not ial.success:
5504
      raise errors.OpPrereqError("Can't compute nodes using"
5505
                                 " iallocator '%s': %s" % (self.op.iallocator,
5506
                                                           ial.info))
5507
    if len(ial.nodes) != ial.required_nodes:
5508
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5509
                                 " of nodes (%s), required %s" %
5510
                                 (self.op.iallocator, len(ial.nodes),
5511
                                  ial.required_nodes))
5512
    self.op.pnode = ial.nodes[0]
5513
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5514
                 self.op.instance_name, self.op.iallocator,
5515
                 ", ".join(ial.nodes))
5516
    if ial.required_nodes == 2:
5517
      self.op.snode = ial.nodes[1]
5518

    
5519
  def BuildHooksEnv(self):
5520
    """Build hooks env.
5521

5522
    This runs on master, primary and secondary nodes of the instance.
5523

5524
    """
5525
    env = {
5526
      "ADD_MODE": self.op.mode,
5527
      }
5528
    if self.op.mode == constants.INSTANCE_IMPORT:
5529
      env["SRC_NODE"] = self.op.src_node
5530
      env["SRC_PATH"] = self.op.src_path
5531
      env["SRC_IMAGES"] = self.src_images
5532

    
5533
    env.update(_BuildInstanceHookEnv(
5534
      name=self.op.instance_name,
5535
      primary_node=self.op.pnode,
5536
      secondary_nodes=self.secondaries,
5537
      status=self.op.start,
5538
      os_type=self.op.os_type,
5539
      memory=self.be_full[constants.BE_MEMORY],
5540
      vcpus=self.be_full[constants.BE_VCPUS],
5541
      nics=_NICListToTuple(self, self.nics),
5542
      disk_template=self.op.disk_template,
5543
      disks=[(d["size"], d["mode"]) for d in self.disks],
5544
      bep=self.be_full,
5545
      hvp=self.hv_full,
5546
      hypervisor_name=self.op.hypervisor,
5547
    ))
5548

    
5549
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5550
          self.secondaries)
5551
    return env, nl, nl
5552

    
5553

    
5554
  def CheckPrereq(self):
5555
    """Check prerequisites.
5556

5557
    """
5558
    if (not self.cfg.GetVGName() and
5559
        self.op.disk_template not in constants.DTS_NOT_LVM):
5560
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5561
                                 " instances")
5562

    
5563
    if self.op.mode == constants.INSTANCE_IMPORT:
5564
      src_node = self.op.src_node
5565
      src_path = self.op.src_path
5566

    
5567
      if src_node is None:
5568
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5569
        exp_list = self.rpc.call_export_list(locked_nodes)
5570
        found = False
5571
        for node in exp_list:
5572
          if exp_list[node].fail_msg:
5573
            continue
5574
          if src_path in exp_list[node].payload:
5575
            found = True
5576
            self.op.src_node = src_node = node
5577
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5578
                                                       src_path)
5579
            break
5580
        if not found:
5581
          raise errors.OpPrereqError("No export found for relative path %s" %
5582
                                      src_path)
5583

    
5584
      _CheckNodeOnline(self, src_node)
5585
      result = self.rpc.call_export_info(src_node, src_path)
5586
      result.Raise("No export or invalid export found in dir %s" % src_path)
5587

    
5588
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5589
      if not export_info.has_section(constants.INISECT_EXP):
5590
        raise errors.ProgrammerError("Corrupted export config")
5591

    
5592
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5593
      if (int(ei_version) != constants.EXPORT_VERSION):
5594
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5595
                                   (ei_version, constants.EXPORT_VERSION))
5596

    
5597
      # Check that the new instance doesn't have less disks than the export
5598
      instance_disks = len(self.disks)
5599
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5600
      if instance_disks < export_disks:
5601
        raise errors.OpPrereqError("Not enough disks to import."
5602
                                   " (instance: %d, export: %d)" %
5603
                                   (instance_disks, export_disks))
5604

    
5605
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5606
      disk_images = []
5607
      for idx in range(export_disks):
5608
        option = 'disk%d_dump' % idx
5609
        if export_info.has_option(constants.INISECT_INS, option):
5610
          # FIXME: are the old os-es, disk sizes, etc. useful?
5611
          export_name = export_info.get(constants.INISECT_INS, option)
5612
          image = os.path.join(src_path, export_name)
5613
          disk_images.append(image)
5614
        else:
5615
          disk_images.append(False)
5616

    
5617
      self.src_images = disk_images
5618

    
5619
      old_name = export_info.get(constants.INISECT_INS, 'name')
5620
      # FIXME: int() here could throw a ValueError on broken exports
5621
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5622
      if self.op.instance_name == old_name:
5623
        for idx, nic in enumerate(self.nics):
5624
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5625
            nic_mac_ini = 'nic%d_mac' % idx
5626
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5627

    
5628
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5629
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5630
    if self.op.start and not self.op.ip_check:
5631
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5632
                                 " adding an instance in start mode")
5633

    
5634
    if self.op.ip_check:
5635
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5636
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5637
                                   (self.check_ip, self.op.instance_name))
5638

    
5639
    #### mac address generation
5640
    # By generating here the mac address both the allocator and the hooks get
5641
    # the real final mac address rather than the 'auto' or 'generate' value.
5642
    # There is a race condition between the generation and the instance object
5643
    # creation, which means that we know the mac is valid now, but we're not
5644
    # sure it will be when we actually add the instance. If things go bad
5645
    # adding the instance will abort because of a duplicate mac, and the
5646
    # creation job will fail.
5647
    for nic in self.nics:
5648
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5649
        nic.mac = self.cfg.GenerateMAC()
5650

    
5651
    #### allocator run
5652

    
5653
    if self.op.iallocator is not None:
5654
      self._RunAllocator()
5655

    
5656
    #### node related checks
5657

    
5658
    # check primary node
5659
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5660
    assert self.pnode is not None, \
5661
      "Cannot retrieve locked node %s" % self.op.pnode
5662
    if pnode.offline:
5663
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5664
                                 pnode.name)
5665
    if pnode.drained:
5666
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5667
                                 pnode.name)
5668

    
5669
    self.secondaries = []
5670

    
5671
    # mirror node verification
5672
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5673
      if self.op.snode is None:
5674
        raise errors.OpPrereqError("The networked disk templates need"
5675
                                   " a mirror node")
5676
      if self.op.snode == pnode.name:
5677
        raise errors.OpPrereqError("The secondary node cannot be"
5678
                                   " the primary node.")
5679
      _CheckNodeOnline(self, self.op.snode)
5680
      _CheckNodeNotDrained(self, self.op.snode)
5681
      self.secondaries.append(self.op.snode)
5682

    
5683
    nodenames = [pnode.name] + self.secondaries
5684

    
5685
    req_size = _ComputeDiskSize(self.op.disk_template,
5686
                                self.disks)
5687

    
5688
    # Check lv size requirements
5689
    if req_size is not None:
5690
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5691
                                         self.op.hypervisor)
5692
      for node in nodenames:
5693
        info = nodeinfo[node]
5694
        info.Raise("Cannot get current information from node %s" % node)
5695
        info = info.payload
5696
        vg_free = info.get('vg_free', None)
5697
        if not isinstance(vg_free, int):
5698
          raise errors.OpPrereqError("Can't compute free disk space on"
5699
                                     " node %s" % node)
5700
        if req_size > vg_free:
5701
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5702
                                     " %d MB available, %d MB required" %
5703
                                     (node, vg_free, req_size))
5704

    
5705
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5706

    
5707
    # os verification
5708
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5709
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5710
                 (self.op.os_type, pnode.name), prereq=True)
5711

    
5712
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5713

    
5714
    # memory check on primary node
5715
    if self.op.start:
5716
      _CheckNodeFreeMemory(self, self.pnode.name,
5717
                           "creating instance %s" % self.op.instance_name,
5718
                           self.be_full[constants.BE_MEMORY],
5719
                           self.op.hypervisor)
5720

    
5721
    self.dry_run_result = list(nodenames)
5722

    
5723
  def Exec(self, feedback_fn):
5724
    """Create and add the instance to the cluster.
5725

5726
    """
5727
    instance = self.op.instance_name
5728
    pnode_name = self.pnode.name
5729

    
5730
    ht_kind = self.op.hypervisor
5731
    if ht_kind in constants.HTS_REQ_PORT:
5732
      network_port = self.cfg.AllocatePort()
5733
    else:
5734
      network_port = None
5735

    
5736
    ##if self.op.vnc_bind_address is None:
5737
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5738

    
5739
    # this is needed because os.path.join does not accept None arguments
5740
    if self.op.file_storage_dir is None:
5741
      string_file_storage_dir = ""
5742
    else:
5743
      string_file_storage_dir = self.op.file_storage_dir
5744

    
5745
    # build the full file storage dir path
5746
    file_storage_dir = os.path.normpath(os.path.join(
5747
                                        self.cfg.GetFileStorageDir(),
5748
                                        string_file_storage_dir, instance))
5749

    
5750

    
5751
    disks = _GenerateDiskTemplate(self,
5752
                                  self.op.disk_template,
5753
                                  instance, pnode_name,
5754
                                  self.secondaries,
5755
                                  self.disks,
5756
                                  file_storage_dir,
5757
                                  self.op.file_driver,
5758
                                  0)
5759

    
5760
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5761
                            primary_node=pnode_name,
5762
                            nics=self.nics, disks=disks,
5763
                            disk_template=self.op.disk_template,
5764
                            admin_up=False,
5765
                            network_port=network_port,
5766
                            beparams=self.op.beparams,
5767
                            hvparams=self.op.hvparams,
5768
                            hypervisor=self.op.hypervisor,
5769
                            )
5770

    
5771
    feedback_fn("* creating instance disks...")
5772
    try:
5773
      _CreateDisks(self, iobj)
5774
    except errors.OpExecError:
5775
      self.LogWarning("Device creation failed, reverting...")
5776
      try:
5777
        _RemoveDisks(self, iobj)
5778
      finally:
5779
        self.cfg.ReleaseDRBDMinors(instance)
5780
        raise
5781

    
5782
    feedback_fn("adding instance %s to cluster config" % instance)
5783

    
5784
    self.cfg.AddInstance(iobj)
5785
    # Declare that we don't want to remove the instance lock anymore, as we've
5786
    # added the instance to the config
5787
    del self.remove_locks[locking.LEVEL_INSTANCE]
5788
    # Unlock all the nodes
5789
    if self.op.mode == constants.INSTANCE_IMPORT:
5790
      nodes_keep = [self.op.src_node]
5791
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5792
                       if node != self.op.src_node]
5793
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5794
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5795
    else:
5796
      self.context.glm.release(locking.LEVEL_NODE)
5797
      del self.acquired_locks[locking.LEVEL_NODE]
5798

    
5799
    if self.op.wait_for_sync:
5800
      disk_abort = not _WaitForSync(self, iobj)
5801
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5802
      # make sure the disks are not degraded (still sync-ing is ok)
5803
      time.sleep(15)
5804
      feedback_fn("* checking mirrors status")
5805
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5806
    else:
5807
      disk_abort = False
5808

    
5809
    if disk_abort:
5810
      _RemoveDisks(self, iobj)
5811
      self.cfg.RemoveInstance(iobj.name)
5812
      # Make sure the instance lock gets removed
5813
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5814
      raise errors.OpExecError("There are some degraded disks for"
5815
                               " this instance")
5816

    
5817
    feedback_fn("creating os for instance %s on node %s" %
5818
                (instance, pnode_name))
5819

    
5820
    if iobj.disk_template != constants.DT_DISKLESS:
5821
      if self.op.mode == constants.INSTANCE_CREATE:
5822
        feedback_fn("* running the instance OS create scripts...")
5823
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5824
        result.Raise("Could not add os for instance %s"
5825
                     " on node %s" % (instance, pnode_name))
5826

    
5827
      elif self.op.mode == constants.INSTANCE_IMPORT:
5828
        feedback_fn("* running the instance OS import scripts...")
5829
        src_node = self.op.src_node
5830
        src_images = self.src_images
5831
        cluster_name = self.cfg.GetClusterName()
5832
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5833
                                                         src_node, src_images,
5834
                                                         cluster_name)
5835
        msg = import_result.fail_msg
5836
        if msg:
5837
          self.LogWarning("Error while importing the disk images for instance"
5838
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5839
      else:
5840
        # also checked in the prereq part
5841
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5842
                                     % self.op.mode)
5843

    
5844
    if self.op.start:
5845
      iobj.admin_up = True
5846
      self.cfg.Update(iobj)
5847
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5848
      feedback_fn("* starting instance...")
5849
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5850
      result.Raise("Could not start instance")
5851

    
5852
    return list(iobj.all_nodes)
5853

    
5854

    
5855
class LUConnectConsole(NoHooksLU):
5856
  """Connect to an instance's console.
5857

5858
  This is somewhat special in that it returns the command line that
5859
  you need to run on the master node in order to connect to the
5860
  console.
5861

5862
  """
5863
  _OP_REQP = ["instance_name"]
5864
  REQ_BGL = False
5865

    
5866
  def ExpandNames(self):
5867
    self._ExpandAndLockInstance()
5868

    
5869
  def CheckPrereq(self):
5870
    """Check prerequisites.
5871

5872
    This checks that the instance is in the cluster.
5873

5874
    """
5875
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5876
    assert self.instance is not None, \
5877
      "Cannot retrieve locked instance %s" % self.op.instance_name
5878
    _CheckNodeOnline(self, self.instance.primary_node)
5879

    
5880
  def Exec(self, feedback_fn):
5881
    """Connect to the console of an instance
5882

5883
    """
5884
    instance = self.instance
5885
    node = instance.primary_node
5886

    
5887
    node_insts = self.rpc.call_instance_list([node],
5888
                                             [instance.hypervisor])[node]
5889
    node_insts.Raise("Can't get node information from %s" % node)
5890

    
5891
    if instance.name not in node_insts.payload:
5892
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5893

    
5894
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5895

    
5896
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5897
    cluster = self.cfg.GetClusterInfo()
5898
    # beparams and hvparams are passed separately, to avoid editing the
5899
    # instance and then saving the defaults in the instance itself.
5900
    hvparams = cluster.FillHV(instance)
5901
    beparams = cluster.FillBE(instance)
5902
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5903

    
5904
    # build ssh cmdline
5905
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5906

    
5907

    
5908
class LUReplaceDisks(LogicalUnit):
5909
  """Replace the disks of an instance.
5910

5911
  """
5912
  HPATH = "mirrors-replace"
5913
  HTYPE = constants.HTYPE_INSTANCE
5914
  _OP_REQP = ["instance_name", "mode", "disks"]
5915
  REQ_BGL = False
5916

    
5917
  def CheckArguments(self):
5918
    if not hasattr(self.op, "remote_node"):
5919
      self.op.remote_node = None
5920
    if not hasattr(self.op, "iallocator"):
5921
      self.op.iallocator = None
5922

    
5923
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5924
                                  self.op.iallocator)
5925

    
5926
  def ExpandNames(self):
5927
    self._ExpandAndLockInstance()
5928

    
5929
    if self.op.iallocator is not None:
5930
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5931

    
5932
    elif self.op.remote_node is not None:
5933
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5934
      if remote_node is None:
5935
        raise errors.OpPrereqError("Node '%s' not known" %
5936
                                   self.op.remote_node)
5937

    
5938
      self.op.remote_node = remote_node
5939

    
5940
      # Warning: do not remove the locking of the new secondary here
5941
      # unless DRBD8.AddChildren is changed to work in parallel;
5942
      # currently it doesn't since parallel invocations of
5943
      # FindUnusedMinor will conflict
5944
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5945
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5946

    
5947
    else:
5948
      self.needed_locks[locking.LEVEL_NODE] = []
5949
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5950

    
5951
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5952
                                   self.op.iallocator, self.op.remote_node,
5953
                                   self.op.disks)
5954

    
5955
    self.tasklets = [self.replacer]
5956

    
5957
  def DeclareLocks(self, level):
5958
    # If we're not already locking all nodes in the set we have to declare the
5959
    # instance's primary/secondary nodes.
5960
    if (level == locking.LEVEL_NODE and
5961
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5962
      self._LockInstancesNodes()
5963

    
5964
  def BuildHooksEnv(self):
5965
    """Build hooks env.
5966

5967
    This runs on the master, the primary and all the secondaries.
5968

5969
    """
5970
    instance = self.replacer.instance
5971
    env = {
5972
      "MODE": self.op.mode,
5973
      "NEW_SECONDARY": self.op.remote_node,
5974
      "OLD_SECONDARY": instance.secondary_nodes[0],
5975
      }
5976
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5977
    nl = [
5978
      self.cfg.GetMasterNode(),
5979
      instance.primary_node,
5980
      ]
5981
    if self.op.remote_node is not None:
5982
      nl.append(self.op.remote_node)
5983
    return env, nl, nl
5984

    
5985

    
5986
class LUEvacuateNode(LogicalUnit):
5987
  """Relocate the secondary instances from a node.
5988

5989
  """
5990
  HPATH = "node-evacuate"
5991
  HTYPE = constants.HTYPE_NODE
5992
  _OP_REQP = ["node_name"]
5993
  REQ_BGL = False
5994

    
5995
  def CheckArguments(self):
5996
    if not hasattr(self.op, "remote_node"):
5997
      self.op.remote_node = None
5998
    if not hasattr(self.op, "iallocator"):
5999
      self.op.iallocator = None
6000

    
6001
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6002
                                  self.op.remote_node,
6003
                                  self.op.iallocator)
6004

    
6005
  def ExpandNames(self):
6006
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6007
    if self.op.node_name is None:
6008
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6009

    
6010
    self.needed_locks = {}
6011

    
6012
    # Declare node locks
6013
    if self.op.iallocator is not None:
6014
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6015

    
6016
    elif self.op.remote_node is not None:
6017
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6018
      if remote_node is None:
6019
        raise errors.OpPrereqError("Node '%s' not known" %
6020
                                   self.op.remote_node)
6021

    
6022
      self.op.remote_node = remote_node
6023

    
6024
      # Warning: do not remove the locking of the new secondary here
6025
      # unless DRBD8.AddChildren is changed to work in parallel;
6026
      # currently it doesn't since parallel invocations of
6027
      # FindUnusedMinor will conflict
6028
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6029
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6030

    
6031
    else:
6032
      raise errors.OpPrereqError("Invalid parameters")
6033

    
6034
    # Create tasklets for replacing disks for all secondary instances on this
6035
    # node
6036
    names = []
6037
    tasklets = []
6038

    
6039
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6040
      logging.debug("Replacing disks for instance %s", inst.name)
6041
      names.append(inst.name)
6042

    
6043
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6044
                                self.op.iallocator, self.op.remote_node, [])
6045
      tasklets.append(replacer)
6046

    
6047
    self.tasklets = tasklets
6048
    self.instance_names = names
6049

    
6050
    # Declare instance locks
6051
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6052

    
6053
  def DeclareLocks(self, level):
6054
    # If we're not already locking all nodes in the set we have to declare the
6055
    # instance's primary/secondary nodes.
6056
    if (level == locking.LEVEL_NODE and
6057
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6058
      self._LockInstancesNodes()
6059

    
6060
  def BuildHooksEnv(self):
6061
    """Build hooks env.
6062

6063
    This runs on the master, the primary and all the secondaries.
6064

6065
    """
6066
    env = {
6067
      "NODE_NAME": self.op.node_name,
6068
      }
6069

    
6070
    nl = [self.cfg.GetMasterNode()]
6071

    
6072
    if self.op.remote_node is not None:
6073
      env["NEW_SECONDARY"] = self.op.remote_node
6074
      nl.append(self.op.remote_node)
6075

    
6076
    return (env, nl, nl)
6077

    
6078

    
6079
class TLReplaceDisks(Tasklet):
6080
  """Replaces disks for an instance.
6081

6082
  Note: Locking is not within the scope of this class.
6083

6084
  """
6085
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6086
               disks):
6087
    """Initializes this class.
6088

6089
    """
6090
    Tasklet.__init__(self, lu)
6091

    
6092
    # Parameters
6093
    self.instance_name = instance_name
6094
    self.mode = mode
6095
    self.iallocator_name = iallocator_name
6096
    self.remote_node = remote_node
6097
    self.disks = disks
6098

    
6099
    # Runtime data
6100
    self.instance = None
6101
    self.new_node = None
6102
    self.target_node = None
6103
    self.other_node = None
6104
    self.remote_node_info = None
6105
    self.node_secondary_ip = None
6106

    
6107
  @staticmethod
6108
  def CheckArguments(mode, remote_node, iallocator):
6109
    """Helper function for users of this class.
6110

6111
    """
6112
    # check for valid parameter combination
6113
    if mode == constants.REPLACE_DISK_CHG:
6114
      if remote_node is None and iallocator is None:
6115
        raise errors.OpPrereqError("When changing the secondary either an"
6116
                                   " iallocator script must be used or the"
6117
                                   " new node given")
6118

    
6119
      if remote_node is not None and iallocator is not None:
6120
        raise errors.OpPrereqError("Give either the iallocator or the new"
6121
                                   " secondary, not both")
6122

    
6123
    elif remote_node is not None or iallocator is not None:
6124
      # Not replacing the secondary
6125
      raise errors.OpPrereqError("The iallocator and new node options can"
6126
                                 " only be used when changing the"
6127
                                 " secondary node")
6128

    
6129
  @staticmethod
6130
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6131
    """Compute a new secondary node using an IAllocator.
6132

6133
    """
6134
    ial = IAllocator(lu.cfg, lu.rpc,
6135
                     mode=constants.IALLOCATOR_MODE_RELOC,
6136
                     name=instance_name,
6137
                     relocate_from=relocate_from)
6138

    
6139
    ial.Run(iallocator_name)
6140

    
6141
    if not ial.success:
6142
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6143
                                 " %s" % (iallocator_name, ial.info))
6144

    
6145
    if len(ial.nodes) != ial.required_nodes:
6146
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6147
                                 " of nodes (%s), required %s" %
6148
                                 (len(ial.nodes), ial.required_nodes))
6149

    
6150
    remote_node_name = ial.nodes[0]
6151

    
6152
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6153
               instance_name, remote_node_name)
6154

    
6155
    return remote_node_name
6156

    
6157
  def _FindFaultyDisks(self, node_name):
6158
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6159
                                    node_name, True)
6160

    
6161
  def CheckPrereq(self):
6162
    """Check prerequisites.
6163

6164
    This checks that the instance is in the cluster.
6165

6166
    """
6167
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
6168
    assert self.instance is not None, \
6169
      "Cannot retrieve locked instance %s" % self.instance_name
6170

    
6171
    if self.instance.disk_template != constants.DT_DRBD8:
6172
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6173
                                 " instances")
6174

    
6175
    if len(self.instance.secondary_nodes) != 1:
6176
      raise errors.OpPrereqError("The instance has a strange layout,"
6177
                                 " expected one secondary but found %d" %
6178
                                 len(self.instance.secondary_nodes))
6179

    
6180
    secondary_node = self.instance.secondary_nodes[0]
6181

    
6182
    if self.iallocator_name is None:
6183
      remote_node = self.remote_node
6184
    else:
6185
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6186
                                       self.instance.name, secondary_node)
6187

    
6188
    if remote_node is not None:
6189
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6190
      assert self.remote_node_info is not None, \
6191
        "Cannot retrieve locked node %s" % remote_node
6192
    else:
6193
      self.remote_node_info = None
6194

    
6195
    if remote_node == self.instance.primary_node:
6196
      raise errors.OpPrereqError("The specified node is the primary node of"
6197
                                 " the instance.")
6198

    
6199
    if remote_node == secondary_node:
6200
      raise errors.OpPrereqError("The specified node is already the"
6201
                                 " secondary node of the instance.")
6202

    
6203
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6204
                                    constants.REPLACE_DISK_CHG):
6205
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
6206

    
6207
    if self.mode == constants.REPLACE_DISK_AUTO:
6208
      faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
6209
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6210

    
6211
      if faulty_primary and faulty_secondary:
6212
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6213
                                   " one node and can not be repaired"
6214
                                   " automatically" % self.instance_name)
6215

    
6216
      if faulty_primary:
6217
        self.disks = faulty_primary
6218
        self.target_node = self.instance.primary_node
6219
        self.other_node = secondary_node
6220
        check_nodes = [self.target_node, self.other_node]
6221
      elif faulty_secondary:
6222
        self.disks = faulty_secondary
6223
        self.target_node = secondary_node
6224
        self.other_node = self.instance.primary_node
6225
        check_nodes = [self.target_node, self.other_node]
6226
      else:
6227
        self.disks = []
6228
        check_nodes = []
6229

    
6230
    else:
6231
      # Non-automatic modes
6232
      if self.mode == constants.REPLACE_DISK_PRI:
6233
        self.target_node = self.instance.primary_node
6234
        self.other_node = secondary_node
6235
        check_nodes = [self.target_node, self.other_node]
6236

    
6237
      elif self.mode == constants.REPLACE_DISK_SEC:
6238
        self.target_node = secondary_node
6239
        self.other_node = self.instance.primary_node
6240
        check_nodes = [self.target_node, self.other_node]
6241

    
6242
      elif self.mode == constants.REPLACE_DISK_CHG:
6243
        self.new_node = remote_node
6244
        self.other_node = self.instance.primary_node
6245
        self.target_node = secondary_node
6246
        check_nodes = [self.new_node, self.other_node]
6247

    
6248
        _CheckNodeNotDrained(self.lu, remote_node)
6249

    
6250
      else:
6251
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6252
                                     self.mode)
6253

    
6254
      # If not specified all disks should be replaced
6255
      if not self.disks:
6256
        self.disks = range(len(self.instance.disks))
6257

    
6258
    for node in check_nodes:
6259
      _CheckNodeOnline(self.lu, node)
6260

    
6261
    # Check whether disks are valid
6262
    for disk_idx in self.disks:
6263
      self.instance.FindDisk(disk_idx)
6264

    
6265
    # Get secondary node IP addresses
6266
    node_2nd_ip = {}
6267

    
6268
    for node_name in [self.target_node, self.other_node, self.new_node]:
6269
      if node_name is not None:
6270
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6271

    
6272
    self.node_secondary_ip = node_2nd_ip
6273

    
6274
  def Exec(self, feedback_fn):
6275
    """Execute disk replacement.
6276

6277
    This dispatches the disk replacement to the appropriate handler.
6278

6279
    """
6280
    if not self.disks:
6281
      feedback_fn("No disks need replacement")
6282
      return
6283

    
6284
    feedback_fn("Replacing disk(s) %s for %s" %
6285
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6286

    
6287
    activate_disks = (not self.instance.admin_up)
6288

    
6289
    # Activate the instance disks if we're replacing them on a down instance
6290
    if activate_disks:
6291
      _StartInstanceDisks(self.lu, self.instance, True)
6292

    
6293
    try:
6294
      # Should we replace the secondary node?
6295
      if self.new_node is not None:
6296
        return self._ExecDrbd8Secondary()
6297
      else:
6298
        return self._ExecDrbd8DiskOnly()
6299

    
6300
    finally:
6301
      # Deactivate the instance disks if we're replacing them on a down instance
6302
      if activate_disks:
6303
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6304

    
6305
  def _CheckVolumeGroup(self, nodes):
6306
    self.lu.LogInfo("Checking volume groups")
6307

    
6308
    vgname = self.cfg.GetVGName()
6309

    
6310
    # Make sure volume group exists on all involved nodes
6311
    results = self.rpc.call_vg_list(nodes)
6312
    if not results:
6313
      raise errors.OpExecError("Can't list volume groups on the nodes")
6314

    
6315
    for node in nodes:
6316
      res = results[node]
6317
      res.Raise("Error checking node %s" % node)
6318
      if vgname not in res.payload:
6319
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6320
                                 (vgname, node))
6321

    
6322
  def _CheckDisksExistence(self, nodes):
6323
    # Check disk existence
6324
    for idx, dev in enumerate(self.instance.disks):
6325
      if idx not in self.disks:
6326
        continue
6327

    
6328
      for node in nodes:
6329
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6330
        self.cfg.SetDiskID(dev, node)
6331

    
6332
        result = self.rpc.call_blockdev_find(node, dev)
6333

    
6334
        msg = result.fail_msg
6335
        if msg or not result.payload:
6336
          if not msg:
6337
            msg = "disk not found"
6338
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6339
                                   (idx, node, msg))
6340

    
6341
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6342
    for idx, dev in enumerate(self.instance.disks):
6343
      if idx not in self.disks:
6344
        continue
6345

    
6346
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6347
                      (idx, node_name))
6348

    
6349
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6350
                                   ldisk=ldisk):
6351
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6352
                                 " replace disks for instance %s" %
6353
                                 (node_name, self.instance.name))
6354

    
6355
  def _CreateNewStorage(self, node_name):
6356
    vgname = self.cfg.GetVGName()
6357
    iv_names = {}
6358

    
6359
    for idx, dev in enumerate(self.instance.disks):
6360
      if idx not in self.disks:
6361
        continue
6362

    
6363
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6364

    
6365
      self.cfg.SetDiskID(dev, node_name)
6366

    
6367
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6368
      names = _GenerateUniqueNames(self.lu, lv_names)
6369

    
6370
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6371
                             logical_id=(vgname, names[0]))
6372
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6373
                             logical_id=(vgname, names[1]))
6374

    
6375
      new_lvs = [lv_data, lv_meta]
6376
      old_lvs = dev.children
6377
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6378

    
6379
      # we pass force_create=True to force the LVM creation
6380
      for new_lv in new_lvs:
6381
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6382
                        _GetInstanceInfoText(self.instance), False)
6383

    
6384
    return iv_names
6385

    
6386
  def _CheckDevices(self, node_name, iv_names):
6387
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6388
      self.cfg.SetDiskID(dev, node_name)
6389

    
6390
      result = self.rpc.call_blockdev_find(node_name, dev)
6391

    
6392
      msg = result.fail_msg
6393
      if msg or not result.payload:
6394
        if not msg:
6395
          msg = "disk not found"
6396
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6397
                                 (name, msg))
6398

    
6399
      if result.payload.is_degraded:
6400
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6401

    
6402
  def _RemoveOldStorage(self, node_name, iv_names):
6403
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6404
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6405

    
6406
      for lv in old_lvs:
6407
        self.cfg.SetDiskID(lv, node_name)
6408

    
6409
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6410
        if msg:
6411
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6412
                             hint="remove unused LVs manually")
6413

    
6414
  def _ExecDrbd8DiskOnly(self):
6415
    """Replace a disk on the primary or secondary for DRBD 8.
6416

6417
    The algorithm for replace is quite complicated:
6418

6419
      1. for each disk to be replaced:
6420

6421
        1. create new LVs on the target node with unique names
6422
        1. detach old LVs from the drbd device
6423
        1. rename old LVs to name_replaced.<time_t>
6424
        1. rename new LVs to old LVs
6425
        1. attach the new LVs (with the old names now) to the drbd device
6426

6427
      1. wait for sync across all devices
6428

6429
      1. for each modified disk:
6430

6431
        1. remove old LVs (which have the name name_replaces.<time_t>)
6432

6433
    Failures are not very well handled.
6434

6435
    """
6436
    steps_total = 6
6437

    
6438
    # Step: check device activation
6439
    self.lu.LogStep(1, steps_total, "Check device existence")
6440
    self._CheckDisksExistence([self.other_node, self.target_node])
6441
    self._CheckVolumeGroup([self.target_node, self.other_node])
6442

    
6443
    # Step: check other node consistency
6444
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6445
    self._CheckDisksConsistency(self.other_node,
6446
                                self.other_node == self.instance.primary_node,
6447
                                False)
6448

    
6449
    # Step: create new storage
6450
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6451
    iv_names = self._CreateNewStorage(self.target_node)
6452

    
6453
    # Step: for each lv, detach+rename*2+attach
6454
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6455
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6456
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6457

    
6458
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
6459
      result.Raise("Can't detach drbd from local storage on node"
6460
                   " %s for device %s" % (self.target_node, dev.iv_name))
6461
      #dev.children = []
6462
      #cfg.Update(instance)
6463

    
6464
      # ok, we created the new LVs, so now we know we have the needed
6465
      # storage; as such, we proceed on the target node to rename
6466
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6467
      # using the assumption that logical_id == physical_id (which in
6468
      # turn is the unique_id on that node)
6469

    
6470
      # FIXME(iustin): use a better name for the replaced LVs
6471
      temp_suffix = int(time.time())
6472
      ren_fn = lambda d, suff: (d.physical_id[0],
6473
                                d.physical_id[1] + "_replaced-%s" % suff)
6474

    
6475
      # Build the rename list based on what LVs exist on the node
6476
      rename_old_to_new = []
6477
      for to_ren in old_lvs:
6478
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6479
        if not result.fail_msg and result.payload:
6480
          # device exists
6481
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6482

    
6483
      self.lu.LogInfo("Renaming the old LVs on the target node")
6484
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
6485
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6486

    
6487
      # Now we rename the new LVs to the old LVs
6488
      self.lu.LogInfo("Renaming the new LVs on the target node")
6489
      rename_new_to_old = [(new, old.physical_id)
6490
                           for old, new in zip(old_lvs, new_lvs)]
6491
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
6492
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6493

    
6494
      for old, new in zip(old_lvs, new_lvs):
6495
        new.logical_id = old.logical_id
6496
        self.cfg.SetDiskID(new, self.target_node)
6497

    
6498
      for disk in old_lvs:
6499
        disk.logical_id = ren_fn(disk, temp_suffix)
6500
        self.cfg.SetDiskID(disk, self.target_node)
6501

    
6502
      # Now that the new lvs have the old name, we can add them to the device
6503
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6504
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
6505
      msg = result.fail_msg
6506
      if msg:
6507
        for new_lv in new_lvs:
6508
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
6509
          if msg2:
6510
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6511
                               hint=("cleanup manually the unused logical"
6512
                                     "volumes"))
6513
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6514

    
6515
      dev.children = new_lvs
6516

    
6517
      self.cfg.Update(self.instance)
6518

    
6519
    # Wait for sync
6520
    # This can fail as the old devices are degraded and _WaitForSync
6521
    # does a combined result over all disks, so we don't check its return value
6522
    self.lu.LogStep(5, steps_total, "Sync devices")
6523
    _WaitForSync(self.lu, self.instance, unlock=True)
6524

    
6525
    # Check all devices manually
6526
    self._CheckDevices(self.instance.primary_node, iv_names)
6527

    
6528
    # Step: remove old storage
6529
    self.lu.LogStep(6, steps_total, "Removing old storage")
6530
    self._RemoveOldStorage(self.target_node, iv_names)
6531

    
6532
  def _ExecDrbd8Secondary(self):
6533
    """Replace the secondary node for DRBD 8.
6534

6535
    The algorithm for replace is quite complicated:
6536
      - for all disks of the instance:
6537
        - create new LVs on the new node with same names
6538
        - shutdown the drbd device on the old secondary
6539
        - disconnect the drbd network on the primary
6540
        - create the drbd device on the new secondary
6541
        - network attach the drbd on the primary, using an artifice:
6542
          the drbd code for Attach() will connect to the network if it
6543
          finds a device which is connected to the good local disks but
6544
          not network enabled
6545
      - wait for sync across all devices
6546
      - remove all disks from the old secondary
6547

6548
    Failures are not very well handled.
6549

6550
    """
6551
    steps_total = 6
6552

    
6553
    # Step: check device activation
6554
    self.lu.LogStep(1, steps_total, "Check device existence")
6555
    self._CheckDisksExistence([self.instance.primary_node])
6556
    self._CheckVolumeGroup([self.instance.primary_node])
6557

    
6558
    # Step: check other node consistency
6559
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6560
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6561

    
6562
    # Step: create new storage
6563
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6564
    for idx, dev in enumerate(self.instance.disks):
6565
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6566
                      (self.new_node, idx))
6567
      # we pass force_create=True to force LVM creation
6568
      for new_lv in dev.children:
6569
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6570
                        _GetInstanceInfoText(self.instance), False)
6571

    
6572
    # Step 4: dbrd minors and drbd setups changes
6573
    # after this, we must manually remove the drbd minors on both the
6574
    # error and the success paths
6575
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6576
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
6577
                                        self.instance.name)
6578
    logging.debug("Allocated minors %r" % (minors,))
6579

    
6580
    iv_names = {}
6581
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6582
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
6583
      # create new devices on new_node; note that we create two IDs:
6584
      # one without port, so the drbd will be activated without
6585
      # networking information on the new node at this stage, and one
6586
      # with network, for the latter activation in step 4
6587
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6588
      if self.instance.primary_node == o_node1:
6589
        p_minor = o_minor1
6590
      else:
6591
        p_minor = o_minor2
6592

    
6593
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
6594
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
6595

    
6596
      iv_names[idx] = (dev, dev.children, new_net_id)
6597
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6598
                    new_net_id)
6599
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6600
                              logical_id=new_alone_id,
6601
                              children=dev.children,
6602
                              size=dev.size)
6603
      try:
6604
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6605
                              _GetInstanceInfoText(self.instance), False)
6606
      except errors.GenericError:
6607
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6608
        raise
6609

    
6610
    # We have new devices, shutdown the drbd on the old secondary
6611
    for idx, dev in enumerate(self.instance.disks):
6612
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6613
      self.cfg.SetDiskID(dev, self.target_node)
6614
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6615
      if msg:
6616
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6617
                           "node: %s" % (idx, msg),
6618
                           hint=("Please cleanup this device manually as"
6619
                                 " soon as possible"))
6620

    
6621
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6622
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
6623
                                               self.instance.disks)[self.instance.primary_node]
6624

    
6625
    msg = result.fail_msg
6626
    if msg:
6627
      # detaches didn't succeed (unlikely)
6628
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6629
      raise errors.OpExecError("Can't detach the disks from the network on"
6630
                               " old node: %s" % (msg,))
6631

    
6632
    # if we managed to detach at least one, we update all the disks of
6633
    # the instance to point to the new secondary
6634
    self.lu.LogInfo("Updating instance configuration")
6635
    for dev, _, new_logical_id in iv_names.itervalues():
6636
      dev.logical_id = new_logical_id
6637
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6638

    
6639
    self.cfg.Update(self.instance)
6640

    
6641
    # and now perform the drbd attach
6642
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6643
                    " (standalone => connected)")
6644
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
6645
                                           self.instance.disks, self.instance.name,
6646
                                           False)
6647
    for to_node, to_result in result.items():
6648
      msg = to_result.fail_msg
6649
      if msg:
6650
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
6651
                           hint=("please do a gnt-instance info to see the"
6652
                                 " status of disks"))
6653

    
6654
    # Wait for sync
6655
    # This can fail as the old devices are degraded and _WaitForSync
6656
    # does a combined result over all disks, so we don't check its return value
6657
    self.lu.LogStep(5, steps_total, "Sync devices")
6658
    _WaitForSync(self.lu, self.instance, unlock=True)
6659

    
6660
    # Check all devices manually
6661
    self._CheckDevices(self.instance.primary_node, iv_names)
6662

    
6663
    # Step: remove old storage
6664
    self.lu.LogStep(6, steps_total, "Removing old storage")
6665
    self._RemoveOldStorage(self.target_node, iv_names)
6666

    
6667

    
6668
class LURepairNodeStorage(NoHooksLU):
6669
  """Repairs the volume group on a node.
6670

6671
  """
6672
  _OP_REQP = ["node_name"]
6673
  REQ_BGL = False
6674

    
6675
  def CheckArguments(self):
6676
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6677
    if node_name is None:
6678
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6679

    
6680
    self.op.node_name = node_name
6681

    
6682
  def ExpandNames(self):
6683
    self.needed_locks = {
6684
      locking.LEVEL_NODE: [self.op.node_name],
6685
      }
6686

    
6687
  def _CheckFaultyDisks(self, instance, node_name):
6688
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6689
                                node_name, True):
6690
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6691
                                 " node '%s'" % (inst.name, node_name))
6692

    
6693
  def CheckPrereq(self):
6694
    """Check prerequisites.
6695

6696
    """
6697
    storage_type = self.op.storage_type
6698

    
6699
    if (constants.SO_FIX_CONSISTENCY not in
6700
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6701
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6702
                                 " repaired" % storage_type)
6703

    
6704
    # Check whether any instance on this node has faulty disks
6705
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6706
      check_nodes = set(inst.all_nodes)
6707
      check_nodes.discard(self.op.node_name)
6708
      for inst_node_name in check_nodes:
6709
        self._CheckFaultyDisks(inst, inst_node_name)
6710

    
6711
  def Exec(self, feedback_fn):
6712
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6713
                (self.op.name, self.op.node_name))
6714

    
6715
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6716
    result = self.rpc.call_storage_execute(self.op.node_name,
6717
                                           self.op.storage_type, st_args,
6718
                                           self.op.name,
6719
                                           constants.SO_FIX_CONSISTENCY)
6720
    result.Raise("Failed to repair storage unit '%s' on %s" %
6721
                 (self.op.name, self.op.node_name))
6722

    
6723

    
6724
class LUGrowDisk(LogicalUnit):
6725
  """Grow a disk of an instance.
6726

6727
  """
6728
  HPATH = "disk-grow"
6729
  HTYPE = constants.HTYPE_INSTANCE
6730
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6731
  REQ_BGL = False
6732

    
6733
  def ExpandNames(self):
6734
    self._ExpandAndLockInstance()
6735
    self.needed_locks[locking.LEVEL_NODE] = []
6736
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6737

    
6738
  def DeclareLocks(self, level):
6739
    if level == locking.LEVEL_NODE:
6740
      self._LockInstancesNodes()
6741

    
6742
  def BuildHooksEnv(self):
6743
    """Build hooks env.
6744

6745
    This runs on the master, the primary and all the secondaries.
6746

6747
    """
6748
    env = {
6749
      "DISK": self.op.disk,
6750
      "AMOUNT": self.op.amount,
6751
      }
6752
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6753
    nl = [
6754
      self.cfg.GetMasterNode(),
6755
      self.instance.primary_node,
6756
      ]
6757
    return env, nl, nl
6758

    
6759
  def CheckPrereq(self):
6760
    """Check prerequisites.
6761

6762
    This checks that the instance is in the cluster.
6763

6764
    """
6765
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6766
    assert instance is not None, \
6767
      "Cannot retrieve locked instance %s" % self.op.instance_name
6768
    nodenames = list(instance.all_nodes)
6769
    for node in nodenames:
6770
      _CheckNodeOnline(self, node)
6771

    
6772

    
6773
    self.instance = instance
6774

    
6775
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6776
      raise errors.OpPrereqError("Instance's disk layout does not support"
6777
                                 " growing.")
6778

    
6779
    self.disk = instance.FindDisk(self.op.disk)
6780

    
6781
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6782
                                       instance.hypervisor)
6783
    for node in nodenames:
6784
      info = nodeinfo[node]
6785
      info.Raise("Cannot get current information from node %s" % node)
6786
      vg_free = info.payload.get('vg_free', None)
6787
      if not isinstance(vg_free, int):
6788
        raise errors.OpPrereqError("Can't compute free disk space on"
6789
                                   " node %s" % node)
6790
      if self.op.amount > vg_free:
6791
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
6792
                                   " %d MiB available, %d MiB required" %
6793
                                   (node, vg_free, self.op.amount))
6794

    
6795
  def Exec(self, feedback_fn):
6796
    """Execute disk grow.
6797

6798
    """
6799
    instance = self.instance
6800
    disk = self.disk
6801
    for node in instance.all_nodes:
6802
      self.cfg.SetDiskID(disk, node)
6803
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6804
      result.Raise("Grow request failed to node %s" % node)
6805
    disk.RecordGrow(self.op.amount)
6806
    self.cfg.Update(instance)
6807
    if self.op.wait_for_sync:
6808
      disk_abort = not _WaitForSync(self, instance)
6809
      if disk_abort:
6810
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6811
                             " status.\nPlease check the instance.")
6812

    
6813

    
6814
class LUQueryInstanceData(NoHooksLU):
6815
  """Query runtime instance data.
6816

6817
  """
6818
  _OP_REQP = ["instances", "static"]
6819
  REQ_BGL = False
6820

    
6821
  def ExpandNames(self):
6822
    self.needed_locks = {}
6823
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6824

    
6825
    if not isinstance(self.op.instances, list):
6826
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6827

    
6828
    if self.op.instances:
6829
      self.wanted_names = []
6830
      for name in self.op.instances:
6831
        full_name = self.cfg.ExpandInstanceName(name)
6832
        if full_name is None:
6833
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6834
        self.wanted_names.append(full_name)
6835
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6836
    else:
6837
      self.wanted_names = None
6838
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6839

    
6840
    self.needed_locks[locking.LEVEL_NODE] = []
6841
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6842

    
6843
  def DeclareLocks(self, level):
6844
    if level == locking.LEVEL_NODE:
6845
      self._LockInstancesNodes()
6846

    
6847
  def CheckPrereq(self):
6848
    """Check prerequisites.
6849

6850
    This only checks the optional instance list against the existing names.
6851

6852
    """
6853
    if self.wanted_names is None:
6854
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6855

    
6856
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6857
                             in self.wanted_names]
6858
    return
6859

    
6860
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
6861
    """Returns the status of a block device
6862

6863
    """
6864
    if self.op.static or not node:
6865
      return None
6866

    
6867
    self.cfg.SetDiskID(dev, node)
6868

    
6869
    result = self.rpc.call_blockdev_find(node, dev)
6870
    if result.offline:
6871
      return None
6872

    
6873
    result.Raise("Can't compute disk status for %s" % instance_name)
6874

    
6875
    status = result.payload
6876
    if status is None:
6877
      return None
6878

    
6879
    return (status.dev_path, status.major, status.minor,
6880
            status.sync_percent, status.estimated_time,
6881
            status.is_degraded, status.ldisk_status)
6882

    
6883
  def _ComputeDiskStatus(self, instance, snode, dev):
6884
    """Compute block device status.
6885

6886
    """
6887
    if dev.dev_type in constants.LDS_DRBD:
6888
      # we change the snode then (otherwise we use the one passed in)
6889
      if dev.logical_id[0] == instance.primary_node:
6890
        snode = dev.logical_id[1]
6891
      else:
6892
        snode = dev.logical_id[0]
6893

    
6894
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6895
                                              instance.name, dev)
6896
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6897

    
6898
    if dev.children:
6899
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6900
                      for child in dev.children]
6901
    else:
6902
      dev_children = []
6903

    
6904
    data = {
6905
      "iv_name": dev.iv_name,
6906
      "dev_type": dev.dev_type,
6907
      "logical_id": dev.logical_id,
6908
      "physical_id": dev.physical_id,
6909
      "pstatus": dev_pstatus,
6910
      "sstatus": dev_sstatus,
6911
      "children": dev_children,
6912
      "mode": dev.mode,
6913
      "size": dev.size,
6914
      }
6915

    
6916
    return data
6917

    
6918
  def Exec(self, feedback_fn):
6919
    """Gather and return data"""
6920
    result = {}
6921

    
6922
    cluster = self.cfg.GetClusterInfo()
6923

    
6924
    for instance in self.wanted_instances:
6925
      if not self.op.static:
6926
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6927
                                                  instance.name,
6928
                                                  instance.hypervisor)
6929
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6930
        remote_info = remote_info.payload
6931
        if remote_info and "state" in remote_info:
6932
          remote_state = "up"
6933
        else:
6934
          remote_state = "down"
6935
      else:
6936
        remote_state = None
6937
      if instance.admin_up:
6938
        config_state = "up"
6939
      else:
6940
        config_state = "down"
6941

    
6942
      disks = [self._ComputeDiskStatus(instance, None, device)
6943
               for device in instance.disks]
6944

    
6945
      idict = {
6946
        "name": instance.name,
6947
        "config_state": config_state,
6948
        "run_state": remote_state,
6949
        "pnode": instance.primary_node,
6950
        "snodes": instance.secondary_nodes,
6951
        "os": instance.os,
6952
        # this happens to be the same format used for hooks
6953
        "nics": _NICListToTuple(self, instance.nics),
6954
        "disks": disks,
6955
        "hypervisor": instance.hypervisor,
6956
        "network_port": instance.network_port,
6957
        "hv_instance": instance.hvparams,
6958
        "hv_actual": cluster.FillHV(instance),
6959
        "be_instance": instance.beparams,
6960
        "be_actual": cluster.FillBE(instance),
6961
        "serial_no": instance.serial_no,
6962
        "mtime": instance.mtime,
6963
        "ctime": instance.ctime,
6964
        }
6965

    
6966
      result[instance.name] = idict
6967

    
6968
    return result
6969

    
6970

    
6971
class LUSetInstanceParams(LogicalUnit):
6972
  """Modifies an instances's parameters.
6973

6974
  """
6975
  HPATH = "instance-modify"
6976
  HTYPE = constants.HTYPE_INSTANCE
6977
  _OP_REQP = ["instance_name"]
6978
  REQ_BGL = False
6979

    
6980
  def CheckArguments(self):
6981
    if not hasattr(self.op, 'nics'):
6982
      self.op.nics = []
6983
    if not hasattr(self.op, 'disks'):
6984
      self.op.disks = []
6985
    if not hasattr(self.op, 'beparams'):
6986
      self.op.beparams = {}
6987
    if not hasattr(self.op, 'hvparams'):
6988
      self.op.hvparams = {}
6989
    self.op.force = getattr(self.op, "force", False)
6990
    if not (self.op.nics or self.op.disks or
6991
            self.op.hvparams or self.op.beparams):
6992
      raise errors.OpPrereqError("No changes submitted")
6993

    
6994
    # Disk validation
6995
    disk_addremove = 0
6996
    for disk_op, disk_dict in self.op.disks:
6997
      if disk_op == constants.DDM_REMOVE:
6998
        disk_addremove += 1
6999
        continue
7000
      elif disk_op == constants.DDM_ADD:
7001
        disk_addremove += 1
7002
      else:
7003
        if not isinstance(disk_op, int):
7004
          raise errors.OpPrereqError("Invalid disk index")
7005
        if not isinstance(disk_dict, dict):
7006
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7007
          raise errors.OpPrereqError(msg)
7008

    
7009
      if disk_op == constants.DDM_ADD:
7010
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7011
        if mode not in constants.DISK_ACCESS_SET:
7012
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7013
        size = disk_dict.get('size', None)
7014
        if size is None:
7015
          raise errors.OpPrereqError("Required disk parameter size missing")
7016
        try:
7017
          size = int(size)
7018
        except ValueError, err:
7019
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7020
                                     str(err))
7021
        disk_dict['size'] = size
7022
      else:
7023
        # modification of disk
7024
        if 'size' in disk_dict:
7025
          raise errors.OpPrereqError("Disk size change not possible, use"
7026
                                     " grow-disk")
7027

    
7028
    if disk_addremove > 1:
7029
      raise errors.OpPrereqError("Only one disk add or remove operation"
7030
                                 " supported at a time")
7031

    
7032
    # NIC validation
7033
    nic_addremove = 0
7034
    for nic_op, nic_dict in self.op.nics:
7035
      if nic_op == constants.DDM_REMOVE:
7036
        nic_addremove += 1
7037
        continue
7038
      elif nic_op == constants.DDM_ADD:
7039
        nic_addremove += 1
7040
      else:
7041
        if not isinstance(nic_op, int):
7042
          raise errors.OpPrereqError("Invalid nic index")
7043
        if not isinstance(nic_dict, dict):
7044
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7045
          raise errors.OpPrereqError(msg)
7046

    
7047
      # nic_dict should be a dict
7048
      nic_ip = nic_dict.get('ip', None)
7049
      if nic_ip is not None:
7050
        if nic_ip.lower() == constants.VALUE_NONE:
7051
          nic_dict['ip'] = None
7052
        else:
7053
          if not utils.IsValidIP(nic_ip):
7054
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7055

    
7056
      nic_bridge = nic_dict.get('bridge', None)
7057
      nic_link = nic_dict.get('link', None)
7058
      if nic_bridge and nic_link:
7059
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7060
                                   " at the same time")
7061
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7062
        nic_dict['bridge'] = None
7063
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7064
        nic_dict['link'] = None
7065

    
7066
      if nic_op == constants.DDM_ADD:
7067
        nic_mac = nic_dict.get('mac', None)
7068
        if nic_mac is None:
7069
          nic_dict['mac'] = constants.VALUE_AUTO
7070

    
7071
      if 'mac' in nic_dict:
7072
        nic_mac = nic_dict['mac']
7073
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7074
          if not utils.IsValidMac(nic_mac):
7075
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7076
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7077
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7078
                                     " modifying an existing nic")
7079

    
7080
    if nic_addremove > 1:
7081
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7082
                                 " supported at a time")
7083

    
7084
  def ExpandNames(self):
7085
    self._ExpandAndLockInstance()
7086
    self.needed_locks[locking.LEVEL_NODE] = []
7087
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7088

    
7089
  def DeclareLocks(self, level):
7090
    if level == locking.LEVEL_NODE:
7091
      self._LockInstancesNodes()
7092

    
7093
  def BuildHooksEnv(self):
7094
    """Build hooks env.
7095

7096
    This runs on the master, primary and secondaries.
7097

7098
    """
7099
    args = dict()
7100
    if constants.BE_MEMORY in self.be_new:
7101
      args['memory'] = self.be_new[constants.BE_MEMORY]
7102
    if constants.BE_VCPUS in self.be_new:
7103
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7104
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7105
    # information at all.
7106
    if self.op.nics:
7107
      args['nics'] = []
7108
      nic_override = dict(self.op.nics)
7109
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7110
      for idx, nic in enumerate(self.instance.nics):
7111
        if idx in nic_override:
7112
          this_nic_override = nic_override[idx]
7113
        else:
7114
          this_nic_override = {}
7115
        if 'ip' in this_nic_override:
7116
          ip = this_nic_override['ip']
7117
        else:
7118
          ip = nic.ip
7119
        if 'mac' in this_nic_override:
7120
          mac = this_nic_override['mac']
7121
        else:
7122
          mac = nic.mac
7123
        if idx in self.nic_pnew:
7124
          nicparams = self.nic_pnew[idx]
7125
        else:
7126
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7127
        mode = nicparams[constants.NIC_MODE]
7128
        link = nicparams[constants.NIC_LINK]
7129
        args['nics'].append((ip, mac, mode, link))
7130
      if constants.DDM_ADD in nic_override:
7131
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7132
        mac = nic_override[constants.DDM_ADD]['mac']
7133
        nicparams = self.nic_pnew[constants.DDM_ADD]
7134
        mode = nicparams[constants.NIC_MODE]
7135
        link = nicparams[constants.NIC_LINK]
7136
        args['nics'].append((ip, mac, mode, link))
7137
      elif constants.DDM_REMOVE in nic_override:
7138
        del args['nics'][-1]
7139

    
7140
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7141
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7142
    return env, nl, nl
7143

    
7144
  def _GetUpdatedParams(self, old_params, update_dict,
7145
                        default_values, parameter_types):
7146
    """Return the new params dict for the given params.
7147

7148
    @type old_params: dict
7149
    @param old_params: old parameters
7150
    @type update_dict: dict
7151
    @param update_dict: dict containing new parameter values,
7152
                        or constants.VALUE_DEFAULT to reset the
7153
                        parameter to its default value
7154
    @type default_values: dict
7155
    @param default_values: default values for the filled parameters
7156
    @type parameter_types: dict
7157
    @param parameter_types: dict mapping target dict keys to types
7158
                            in constants.ENFORCEABLE_TYPES
7159
    @rtype: (dict, dict)
7160
    @return: (new_parameters, filled_parameters)
7161

7162
    """
7163
    params_copy = copy.deepcopy(old_params)
7164
    for key, val in update_dict.iteritems():
7165
      if val == constants.VALUE_DEFAULT:
7166
        try:
7167
          del params_copy[key]
7168
        except KeyError:
7169
          pass
7170
      else:
7171
        params_copy[key] = val
7172
    utils.ForceDictType(params_copy, parameter_types)
7173
    params_filled = objects.FillDict(default_values, params_copy)
7174
    return (params_copy, params_filled)
7175

    
7176
  def CheckPrereq(self):
7177
    """Check prerequisites.
7178

7179
    This only checks the instance list against the existing names.
7180

7181
    """
7182
    self.force = self.op.force
7183

    
7184
    # checking the new params on the primary/secondary nodes
7185

    
7186
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7187
    cluster = self.cluster = self.cfg.GetClusterInfo()
7188
    assert self.instance is not None, \
7189
      "Cannot retrieve locked instance %s" % self.op.instance_name
7190
    pnode = instance.primary_node
7191
    nodelist = list(instance.all_nodes)
7192

    
7193
    # hvparams processing
7194
    if self.op.hvparams:
7195
      i_hvdict, hv_new = self._GetUpdatedParams(
7196
                             instance.hvparams, self.op.hvparams,
7197
                             cluster.hvparams[instance.hypervisor],
7198
                             constants.HVS_PARAMETER_TYPES)
7199
      # local check
7200
      hypervisor.GetHypervisor(
7201
        instance.hypervisor).CheckParameterSyntax(hv_new)
7202
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7203
      self.hv_new = hv_new # the new actual values
7204
      self.hv_inst = i_hvdict # the new dict (without defaults)
7205
    else:
7206
      self.hv_new = self.hv_inst = {}
7207

    
7208
    # beparams processing
7209
    if self.op.beparams:
7210
      i_bedict, be_new = self._GetUpdatedParams(
7211
                             instance.beparams, self.op.beparams,
7212
                             cluster.beparams[constants.PP_DEFAULT],
7213
                             constants.BES_PARAMETER_TYPES)
7214
      self.be_new = be_new # the new actual values
7215
      self.be_inst = i_bedict # the new dict (without defaults)
7216
    else:
7217
      self.be_new = self.be_inst = {}
7218

    
7219
    self.warn = []
7220

    
7221
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7222
      mem_check_list = [pnode]
7223
      if be_new[constants.BE_AUTO_BALANCE]:
7224
        # either we changed auto_balance to yes or it was from before
7225
        mem_check_list.extend(instance.secondary_nodes)
7226
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7227
                                                  instance.hypervisor)
7228
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7229
                                         instance.hypervisor)
7230
      pninfo = nodeinfo[pnode]
7231
      msg = pninfo.fail_msg
7232
      if msg:
7233
        # Assume the primary node is unreachable and go ahead
7234
        self.warn.append("Can't get info from primary node %s: %s" %
7235
                         (pnode,  msg))
7236
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7237
        self.warn.append("Node data from primary node %s doesn't contain"
7238
                         " free memory information" % pnode)
7239
      elif instance_info.fail_msg:
7240
        self.warn.append("Can't get instance runtime information: %s" %
7241
                        instance_info.fail_msg)
7242
      else:
7243
        if instance_info.payload:
7244
          current_mem = int(instance_info.payload['memory'])
7245
        else:
7246
          # Assume instance not running
7247
          # (there is a slight race condition here, but it's not very probable,
7248
          # and we have no other way to check)
7249
          current_mem = 0
7250
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7251
                    pninfo.payload['memory_free'])
7252
        if miss_mem > 0:
7253
          raise errors.OpPrereqError("This change will prevent the instance"
7254
                                     " from starting, due to %d MB of memory"
7255
                                     " missing on its primary node" % miss_mem)
7256

    
7257
      if be_new[constants.BE_AUTO_BALANCE]:
7258
        for node, nres in nodeinfo.items():
7259
          if node not in instance.secondary_nodes:
7260
            continue
7261
          msg = nres.fail_msg
7262
          if msg:
7263
            self.warn.append("Can't get info from secondary node %s: %s" %
7264
                             (node, msg))
7265
          elif not isinstance(nres.payload.get('memory_free', None), int):
7266
            self.warn.append("Secondary node %s didn't return free"
7267
                             " memory information" % node)
7268
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7269
            self.warn.append("Not enough memory to failover instance to"
7270
                             " secondary node %s" % node)
7271

    
7272
    # NIC processing
7273
    self.nic_pnew = {}
7274
    self.nic_pinst = {}
7275
    for nic_op, nic_dict in self.op.nics:
7276
      if nic_op == constants.DDM_REMOVE:
7277
        if not instance.nics:
7278
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7279
        continue
7280
      if nic_op != constants.DDM_ADD:
7281
        # an existing nic
7282
        if nic_op < 0 or nic_op >= len(instance.nics):
7283
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7284
                                     " are 0 to %d" %
7285
                                     (nic_op, len(instance.nics)))
7286
        old_nic_params = instance.nics[nic_op].nicparams
7287
        old_nic_ip = instance.nics[nic_op].ip
7288
      else:
7289
        old_nic_params = {}
7290
        old_nic_ip = None
7291

    
7292
      update_params_dict = dict([(key, nic_dict[key])
7293
                                 for key in constants.NICS_PARAMETERS
7294
                                 if key in nic_dict])
7295

    
7296
      if 'bridge' in nic_dict:
7297
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7298

    
7299
      new_nic_params, new_filled_nic_params = \
7300
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7301
                                 cluster.nicparams[constants.PP_DEFAULT],
7302
                                 constants.NICS_PARAMETER_TYPES)
7303
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7304
      self.nic_pinst[nic_op] = new_nic_params
7305
      self.nic_pnew[nic_op] = new_filled_nic_params
7306
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7307

    
7308
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7309
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7310
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7311
        if msg:
7312
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7313
          if self.force:
7314
            self.warn.append(msg)
7315
          else:
7316
            raise errors.OpPrereqError(msg)
7317
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7318
        if 'ip' in nic_dict:
7319
          nic_ip = nic_dict['ip']
7320
        else:
7321
          nic_ip = old_nic_ip
7322
        if nic_ip is None:
7323
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7324
                                     ' on a routed nic')
7325
      if 'mac' in nic_dict:
7326
        nic_mac = nic_dict['mac']
7327
        if nic_mac is None:
7328
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7329
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7330
          # otherwise generate the mac
7331
          nic_dict['mac'] = self.cfg.GenerateMAC()
7332
        else:
7333
          # or validate/reserve the current one
7334
          if self.cfg.IsMacInUse(nic_mac):
7335
            raise errors.OpPrereqError("MAC address %s already in use"
7336
                                       " in cluster" % nic_mac)
7337

    
7338
    # DISK processing
7339
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7340
      raise errors.OpPrereqError("Disk operations not supported for"
7341
                                 " diskless instances")
7342
    for disk_op, disk_dict in self.op.disks:
7343
      if disk_op == constants.DDM_REMOVE:
7344
        if len(instance.disks) == 1:
7345
          raise errors.OpPrereqError("Cannot remove the last disk of"
7346
                                     " an instance")
7347
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7348
        ins_l = ins_l[pnode]
7349
        msg = ins_l.fail_msg
7350
        if msg:
7351
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7352
                                     (pnode, msg))
7353
        if instance.name in ins_l.payload:
7354
          raise errors.OpPrereqError("Instance is running, can't remove"
7355
                                     " disks.")
7356

    
7357
      if (disk_op == constants.DDM_ADD and
7358
          len(instance.nics) >= constants.MAX_DISKS):
7359
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7360
                                   " add more" % constants.MAX_DISKS)
7361
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7362
        # an existing disk
7363
        if disk_op < 0 or disk_op >= len(instance.disks):
7364
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7365
                                     " are 0 to %d" %
7366
                                     (disk_op, len(instance.disks)))
7367

    
7368
    return
7369

    
7370
  def Exec(self, feedback_fn):
7371
    """Modifies an instance.
7372

7373
    All parameters take effect only at the next restart of the instance.
7374

7375
    """
7376
    # Process here the warnings from CheckPrereq, as we don't have a
7377
    # feedback_fn there.
7378
    for warn in self.warn:
7379
      feedback_fn("WARNING: %s" % warn)
7380

    
7381
    result = []
7382
    instance = self.instance
7383
    cluster = self.cluster
7384
    # disk changes
7385
    for disk_op, disk_dict in self.op.disks:
7386
      if disk_op == constants.DDM_REMOVE:
7387
        # remove the last disk
7388
        device = instance.disks.pop()
7389
        device_idx = len(instance.disks)
7390
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7391
          self.cfg.SetDiskID(disk, node)
7392
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7393
          if msg:
7394
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7395
                            " continuing anyway", device_idx, node, msg)
7396
        result.append(("disk/%d" % device_idx, "remove"))
7397
      elif disk_op == constants.DDM_ADD:
7398
        # add a new disk
7399
        if instance.disk_template == constants.DT_FILE:
7400
          file_driver, file_path = instance.disks[0].logical_id
7401
          file_path = os.path.dirname(file_path)
7402
        else:
7403
          file_driver = file_path = None
7404
        disk_idx_base = len(instance.disks)
7405
        new_disk = _GenerateDiskTemplate(self,
7406
                                         instance.disk_template,
7407
                                         instance.name, instance.primary_node,
7408
                                         instance.secondary_nodes,
7409
                                         [disk_dict],
7410
                                         file_path,
7411
                                         file_driver,
7412
                                         disk_idx_base)[0]
7413
        instance.disks.append(new_disk)
7414
        info = _GetInstanceInfoText(instance)
7415

    
7416
        logging.info("Creating volume %s for instance %s",
7417
                     new_disk.iv_name, instance.name)
7418
        # Note: this needs to be kept in sync with _CreateDisks
7419
        #HARDCODE
7420
        for node in instance.all_nodes:
7421
          f_create = node == instance.primary_node
7422
          try:
7423
            _CreateBlockDev(self, node, instance, new_disk,
7424
                            f_create, info, f_create)
7425
          except errors.OpExecError, err:
7426
            self.LogWarning("Failed to create volume %s (%s) on"
7427
                            " node %s: %s",
7428
                            new_disk.iv_name, new_disk, node, err)
7429
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7430
                       (new_disk.size, new_disk.mode)))
7431
      else:
7432
        # change a given disk
7433
        instance.disks[disk_op].mode = disk_dict['mode']
7434
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7435
    # NIC changes
7436
    for nic_op, nic_dict in self.op.nics:
7437
      if nic_op == constants.DDM_REMOVE:
7438
        # remove the last nic
7439
        del instance.nics[-1]
7440
        result.append(("nic.%d" % len(instance.nics), "remove"))
7441
      elif nic_op == constants.DDM_ADD:
7442
        # mac and bridge should be set, by now
7443
        mac = nic_dict['mac']
7444
        ip = nic_dict.get('ip', None)
7445
        nicparams = self.nic_pinst[constants.DDM_ADD]
7446
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7447
        instance.nics.append(new_nic)
7448
        result.append(("nic.%d" % (len(instance.nics) - 1),
7449
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7450
                       (new_nic.mac, new_nic.ip,
7451
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7452
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7453
                       )))
7454
      else:
7455
        for key in 'mac', 'ip':
7456
          if key in nic_dict:
7457
            setattr(instance.nics[nic_op], key, nic_dict[key])
7458
        if nic_op in self.nic_pnew:
7459
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7460
        for key, val in nic_dict.iteritems():
7461
          result.append(("nic.%s/%d" % (key, nic_op), val))
7462

    
7463
    # hvparams changes
7464
    if self.op.hvparams:
7465
      instance.hvparams = self.hv_inst
7466
      for key, val in self.op.hvparams.iteritems():
7467
        result.append(("hv/%s" % key, val))
7468

    
7469
    # beparams changes
7470
    if self.op.beparams:
7471
      instance.beparams = self.be_inst
7472
      for key, val in self.op.beparams.iteritems():
7473
        result.append(("be/%s" % key, val))
7474

    
7475
    self.cfg.Update(instance)
7476

    
7477
    return result
7478

    
7479

    
7480
class LUQueryExports(NoHooksLU):
7481
  """Query the exports list
7482

7483
  """
7484
  _OP_REQP = ['nodes']
7485
  REQ_BGL = False
7486

    
7487
  def ExpandNames(self):
7488
    self.needed_locks = {}
7489
    self.share_locks[locking.LEVEL_NODE] = 1
7490
    if not self.op.nodes:
7491
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7492
    else:
7493
      self.needed_locks[locking.LEVEL_NODE] = \
7494
        _GetWantedNodes(self, self.op.nodes)
7495

    
7496
  def CheckPrereq(self):
7497
    """Check prerequisites.
7498

7499
    """
7500
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7501

    
7502
  def Exec(self, feedback_fn):
7503
    """Compute the list of all the exported system images.
7504

7505
    @rtype: dict
7506
    @return: a dictionary with the structure node->(export-list)
7507
        where export-list is a list of the instances exported on
7508
        that node.
7509

7510
    """
7511
    rpcresult = self.rpc.call_export_list(self.nodes)
7512
    result = {}
7513
    for node in rpcresult:
7514
      if rpcresult[node].fail_msg:
7515
        result[node] = False
7516
      else:
7517
        result[node] = rpcresult[node].payload
7518

    
7519
    return result
7520

    
7521

    
7522
class LUExportInstance(LogicalUnit):
7523
  """Export an instance to an image in the cluster.
7524

7525
  """
7526
  HPATH = "instance-export"
7527
  HTYPE = constants.HTYPE_INSTANCE
7528
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7529
  REQ_BGL = False
7530

    
7531
  def ExpandNames(self):
7532
    self._ExpandAndLockInstance()
7533
    # FIXME: lock only instance primary and destination node
7534
    #
7535
    # Sad but true, for now we have do lock all nodes, as we don't know where
7536
    # the previous export might be, and and in this LU we search for it and
7537
    # remove it from its current node. In the future we could fix this by:
7538
    #  - making a tasklet to search (share-lock all), then create the new one,
7539
    #    then one to remove, after
7540
    #  - removing the removal operation altogether
7541
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7542

    
7543
  def DeclareLocks(self, level):
7544
    """Last minute lock declaration."""
7545
    # All nodes are locked anyway, so nothing to do here.
7546

    
7547
  def BuildHooksEnv(self):
7548
    """Build hooks env.
7549

7550
    This will run on the master, primary node and target node.
7551

7552
    """
7553
    env = {
7554
      "EXPORT_NODE": self.op.target_node,
7555
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7556
      }
7557
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7558
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7559
          self.op.target_node]
7560
    return env, nl, nl
7561

    
7562
  def CheckPrereq(self):
7563
    """Check prerequisites.
7564

7565
    This checks that the instance and node names are valid.
7566

7567
    """
7568
    instance_name = self.op.instance_name
7569
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7570
    assert self.instance is not None, \
7571
          "Cannot retrieve locked instance %s" % self.op.instance_name
7572
    _CheckNodeOnline(self, self.instance.primary_node)
7573

    
7574
    self.dst_node = self.cfg.GetNodeInfo(
7575
      self.cfg.ExpandNodeName(self.op.target_node))
7576

    
7577
    if self.dst_node is None:
7578
      # This is wrong node name, not a non-locked node
7579
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7580
    _CheckNodeOnline(self, self.dst_node.name)
7581
    _CheckNodeNotDrained(self, self.dst_node.name)
7582

    
7583
    # instance disk type verification
7584
    for disk in self.instance.disks:
7585
      if disk.dev_type == constants.LD_FILE:
7586
        raise errors.OpPrereqError("Export not supported for instances with"
7587
                                   " file-based disks")
7588

    
7589
  def Exec(self, feedback_fn):
7590
    """Export an instance to an image in the cluster.
7591

7592
    """
7593
    instance = self.instance
7594
    dst_node = self.dst_node
7595
    src_node = instance.primary_node
7596
    if self.op.shutdown:
7597
      # shutdown the instance, but not the disks
7598
      result = self.rpc.call_instance_shutdown(src_node, instance)
7599
      result.Raise("Could not shutdown instance %s on"
7600
                   " node %s" % (instance.name, src_node))
7601

    
7602
    vgname = self.cfg.GetVGName()
7603

    
7604
    snap_disks = []
7605

    
7606
    # set the disks ID correctly since call_instance_start needs the
7607
    # correct drbd minor to create the symlinks
7608
    for disk in instance.disks:
7609
      self.cfg.SetDiskID(disk, src_node)
7610

    
7611
    # per-disk results
7612
    dresults = []
7613
    try:
7614
      for idx, disk in enumerate(instance.disks):
7615
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7616
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7617
        msg = result.fail_msg
7618
        if msg:
7619
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7620
                          idx, src_node, msg)
7621
          snap_disks.append(False)
7622
        else:
7623
          disk_id = (vgname, result.payload)
7624
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7625
                                 logical_id=disk_id, physical_id=disk_id,
7626
                                 iv_name=disk.iv_name)
7627
          snap_disks.append(new_dev)
7628

    
7629
    finally:
7630
      if self.op.shutdown and instance.admin_up:
7631
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7632
        msg = result.fail_msg
7633
        if msg:
7634
          _ShutdownInstanceDisks(self, instance)
7635
          raise errors.OpExecError("Could not start instance: %s" % msg)
7636

    
7637
    # TODO: check for size
7638

    
7639
    cluster_name = self.cfg.GetClusterName()
7640
    for idx, dev in enumerate(snap_disks):
7641
      if dev:
7642
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7643
                                               instance, cluster_name, idx)
7644
        msg = result.fail_msg
7645
        if msg:
7646
          self.LogWarning("Could not export disk/%s from node %s to"
7647
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7648
          dresults.append(False)
7649
        else:
7650
          dresults.append(True)
7651
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7652
        if msg:
7653
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7654
                          " %s: %s", idx, src_node, msg)
7655
      else:
7656
        dresults.append(False)
7657

    
7658
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7659
    fin_resu = True
7660
    msg = result.fail_msg
7661
    if msg:
7662
      self.LogWarning("Could not finalize export for instance %s"
7663
                      " on node %s: %s", instance.name, dst_node.name, msg)
7664
      fin_resu = False
7665

    
7666
    nodelist = self.cfg.GetNodeList()
7667
    nodelist.remove(dst_node.name)
7668

    
7669
    # on one-node clusters nodelist will be empty after the removal
7670
    # if we proceed the backup would be removed because OpQueryExports
7671
    # substitutes an empty list with the full cluster node list.
7672
    iname = instance.name
7673
    if nodelist:
7674
      exportlist = self.rpc.call_export_list(nodelist)
7675
      for node in exportlist:
7676
        if exportlist[node].fail_msg:
7677
          continue
7678
        if iname in exportlist[node].payload:
7679
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7680
          if msg:
7681
            self.LogWarning("Could not remove older export for instance %s"
7682
                            " on node %s: %s", iname, node, msg)
7683
    return fin_resu, dresults
7684

    
7685

    
7686
class LURemoveExport(NoHooksLU):
7687
  """Remove exports related to the named instance.
7688

7689
  """
7690
  _OP_REQP = ["instance_name"]
7691
  REQ_BGL = False
7692

    
7693
  def ExpandNames(self):
7694
    self.needed_locks = {}
7695
    # We need all nodes to be locked in order for RemoveExport to work, but we
7696
    # don't need to lock the instance itself, as nothing will happen to it (and
7697
    # we can remove exports also for a removed instance)
7698
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7699

    
7700
  def CheckPrereq(self):
7701
    """Check prerequisites.
7702
    """
7703
    pass
7704

    
7705
  def Exec(self, feedback_fn):
7706
    """Remove any export.
7707

7708
    """
7709
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7710
    # If the instance was not found we'll try with the name that was passed in.
7711
    # This will only work if it was an FQDN, though.
7712
    fqdn_warn = False
7713
    if not instance_name:
7714
      fqdn_warn = True
7715
      instance_name = self.op.instance_name
7716

    
7717
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7718
    exportlist = self.rpc.call_export_list(locked_nodes)
7719
    found = False
7720
    for node in exportlist:
7721
      msg = exportlist[node].fail_msg
7722
      if msg:
7723
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7724
        continue
7725
      if instance_name in exportlist[node].payload:
7726
        found = True
7727
        result = self.rpc.call_export_remove(node, instance_name)
7728
        msg = result.fail_msg
7729
        if msg:
7730
          logging.error("Could not remove export for instance %s"
7731
                        " on node %s: %s", instance_name, node, msg)
7732

    
7733
    if fqdn_warn and not found:
7734
      feedback_fn("Export not found. If trying to remove an export belonging"
7735
                  " to a deleted instance please use its Fully Qualified"
7736
                  " Domain Name.")
7737

    
7738

    
7739
class TagsLU(NoHooksLU):
7740
  """Generic tags LU.
7741

7742
  This is an abstract class which is the parent of all the other tags LUs.
7743

7744
  """
7745

    
7746
  def ExpandNames(self):
7747
    self.needed_locks = {}
7748
    if self.op.kind == constants.TAG_NODE:
7749
      name = self.cfg.ExpandNodeName(self.op.name)
7750
      if name is None:
7751
        raise errors.OpPrereqError("Invalid node name (%s)" %
7752
                                   (self.op.name,))
7753
      self.op.name = name
7754
      self.needed_locks[locking.LEVEL_NODE] = name
7755
    elif self.op.kind == constants.TAG_INSTANCE:
7756
      name = self.cfg.ExpandInstanceName(self.op.name)
7757
      if name is None:
7758
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7759
                                   (self.op.name,))
7760
      self.op.name = name
7761
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7762

    
7763
  def CheckPrereq(self):
7764
    """Check prerequisites.
7765

7766
    """
7767
    if self.op.kind == constants.TAG_CLUSTER:
7768
      self.target = self.cfg.GetClusterInfo()
7769
    elif self.op.kind == constants.TAG_NODE:
7770
      self.target = self.cfg.GetNodeInfo(self.op.name)
7771
    elif self.op.kind == constants.TAG_INSTANCE:
7772
      self.target = self.cfg.GetInstanceInfo(self.op.name)
7773
    else:
7774
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7775
                                 str(self.op.kind))
7776

    
7777

    
7778
class LUGetTags(TagsLU):
7779
  """Returns the tags of a given object.
7780

7781
  """
7782
  _OP_REQP = ["kind", "name"]
7783
  REQ_BGL = False
7784

    
7785
  def Exec(self, feedback_fn):
7786
    """Returns the tag list.
7787

7788
    """
7789
    return list(self.target.GetTags())
7790

    
7791

    
7792
class LUSearchTags(NoHooksLU):
7793
  """Searches the tags for a given pattern.
7794

7795
  """
7796
  _OP_REQP = ["pattern"]
7797
  REQ_BGL = False
7798

    
7799
  def ExpandNames(self):
7800
    self.needed_locks = {}
7801

    
7802
  def CheckPrereq(self):
7803
    """Check prerequisites.
7804

7805
    This checks the pattern passed for validity by compiling it.
7806

7807
    """
7808
    try:
7809
      self.re = re.compile(self.op.pattern)
7810
    except re.error, err:
7811
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7812
                                 (self.op.pattern, err))
7813

    
7814
  def Exec(self, feedback_fn):
7815
    """Returns the tag list.
7816

7817
    """
7818
    cfg = self.cfg
7819
    tgts = [("/cluster", cfg.GetClusterInfo())]
7820
    ilist = cfg.GetAllInstancesInfo().values()
7821
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7822
    nlist = cfg.GetAllNodesInfo().values()
7823
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7824
    results = []
7825
    for path, target in tgts:
7826
      for tag in target.GetTags():
7827
        if self.re.search(tag):
7828
          results.append((path, tag))
7829
    return results
7830

    
7831

    
7832
class LUAddTags(TagsLU):
7833
  """Sets a tag on a given object.
7834

7835
  """
7836
  _OP_REQP = ["kind", "name", "tags"]
7837
  REQ_BGL = False
7838

    
7839
  def CheckPrereq(self):
7840
    """Check prerequisites.
7841

7842
    This checks the type and length of the tag name and value.
7843

7844
    """
7845
    TagsLU.CheckPrereq(self)
7846
    for tag in self.op.tags:
7847
      objects.TaggableObject.ValidateTag(tag)
7848

    
7849
  def Exec(self, feedback_fn):
7850
    """Sets the tag.
7851

7852
    """
7853
    try:
7854
      for tag in self.op.tags:
7855
        self.target.AddTag(tag)
7856
    except errors.TagError, err:
7857
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7858
    try:
7859
      self.cfg.Update(self.target)
7860
    except errors.ConfigurationError:
7861
      raise errors.OpRetryError("There has been a modification to the"
7862
                                " config file and the operation has been"
7863
                                " aborted. Please retry.")
7864

    
7865

    
7866
class LUDelTags(TagsLU):
7867
  """Delete a list of tags from a given object.
7868

7869
  """
7870
  _OP_REQP = ["kind", "name", "tags"]
7871
  REQ_BGL = False
7872

    
7873
  def CheckPrereq(self):
7874
    """Check prerequisites.
7875

7876
    This checks that we have the given tag.
7877

7878
    """
7879
    TagsLU.CheckPrereq(self)
7880
    for tag in self.op.tags:
7881
      objects.TaggableObject.ValidateTag(tag)
7882
    del_tags = frozenset(self.op.tags)
7883
    cur_tags = self.target.GetTags()
7884
    if not del_tags <= cur_tags:
7885
      diff_tags = del_tags - cur_tags
7886
      diff_names = ["'%s'" % tag for tag in diff_tags]
7887
      diff_names.sort()
7888
      raise errors.OpPrereqError("Tag(s) %s not found" %
7889
                                 (",".join(diff_names)))
7890

    
7891
  def Exec(self, feedback_fn):
7892
    """Remove the tag from the object.
7893

7894
    """
7895
    for tag in self.op.tags:
7896
      self.target.RemoveTag(tag)
7897
    try:
7898
      self.cfg.Update(self.target)
7899
    except errors.ConfigurationError:
7900
      raise errors.OpRetryError("There has been a modification to the"
7901
                                " config file and the operation has been"
7902
                                " aborted. Please retry.")
7903

    
7904

    
7905
class LUTestDelay(NoHooksLU):
7906
  """Sleep for a specified amount of time.
7907

7908
  This LU sleeps on the master and/or nodes for a specified amount of
7909
  time.
7910

7911
  """
7912
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7913
  REQ_BGL = False
7914

    
7915
  def ExpandNames(self):
7916
    """Expand names and set required locks.
7917

7918
    This expands the node list, if any.
7919

7920
    """
7921
    self.needed_locks = {}
7922
    if self.op.on_nodes:
7923
      # _GetWantedNodes can be used here, but is not always appropriate to use
7924
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7925
      # more information.
7926
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7927
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7928

    
7929
  def CheckPrereq(self):
7930
    """Check prerequisites.
7931

7932
    """
7933

    
7934
  def Exec(self, feedback_fn):
7935
    """Do the actual sleep.
7936

7937
    """
7938
    if self.op.on_master:
7939
      if not utils.TestDelay(self.op.duration):
7940
        raise errors.OpExecError("Error during master delay test")
7941
    if self.op.on_nodes:
7942
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7943
      for node, node_result in result.items():
7944
        node_result.Raise("Failure during rpc call to node %s" % node)
7945

    
7946

    
7947
class IAllocator(object):
7948
  """IAllocator framework.
7949

7950
  An IAllocator instance has three sets of attributes:
7951
    - cfg that is needed to query the cluster
7952
    - input data (all members of the _KEYS class attribute are required)
7953
    - four buffer attributes (in|out_data|text), that represent the
7954
      input (to the external script) in text and data structure format,
7955
      and the output from it, again in two formats
7956
    - the result variables from the script (success, info, nodes) for
7957
      easy usage
7958

7959
  """
7960
  _ALLO_KEYS = [
7961
    "mem_size", "disks", "disk_template",
7962
    "os", "tags", "nics", "vcpus", "hypervisor",
7963
    ]
7964
  _RELO_KEYS = [
7965
    "relocate_from",
7966
    ]
7967

    
7968
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7969
    self.cfg = cfg
7970
    self.rpc = rpc
7971
    # init buffer variables
7972
    self.in_text = self.out_text = self.in_data = self.out_data = None
7973
    # init all input fields so that pylint is happy
7974
    self.mode = mode
7975
    self.name = name
7976
    self.mem_size = self.disks = self.disk_template = None
7977
    self.os = self.tags = self.nics = self.vcpus = None
7978
    self.hypervisor = None
7979
    self.relocate_from = None
7980
    # computed fields
7981
    self.required_nodes = None
7982
    # init result fields
7983
    self.success = self.info = self.nodes = None
7984
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
7985
      keyset = self._ALLO_KEYS
7986
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
7987
      keyset = self._RELO_KEYS
7988
    else:
7989
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
7990
                                   " IAllocator" % self.mode)
7991
    for key in kwargs:
7992
      if key not in keyset:
7993
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
7994
                                     " IAllocator" % key)
7995
      setattr(self, key, kwargs[key])
7996
    for key in keyset:
7997
      if key not in kwargs:
7998
        raise errors.ProgrammerError("Missing input parameter '%s' to"
7999
                                     " IAllocator" % key)
8000
    self._BuildInputData()
8001

    
8002
  def _ComputeClusterData(self):
8003
    """Compute the generic allocator input data.
8004

8005
    This is the data that is independent of the actual operation.
8006

8007
    """
8008
    cfg = self.cfg
8009
    cluster_info = cfg.GetClusterInfo()
8010
    # cluster data
8011
    data = {
8012
      "version": constants.IALLOCATOR_VERSION,
8013
      "cluster_name": cfg.GetClusterName(),
8014
      "cluster_tags": list(cluster_info.GetTags()),
8015
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8016
      # we don't have job IDs
8017
      }
8018
    iinfo = cfg.GetAllInstancesInfo().values()
8019
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8020

    
8021
    # node data
8022
    node_results = {}
8023
    node_list = cfg.GetNodeList()
8024

    
8025
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8026
      hypervisor_name = self.hypervisor
8027
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8028
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8029

    
8030
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8031
                                        hypervisor_name)
8032
    node_iinfo = \
8033
      self.rpc.call_all_instances_info(node_list,
8034
                                       cluster_info.enabled_hypervisors)
8035
    for nname, nresult in node_data.items():
8036
      # first fill in static (config-based) values
8037
      ninfo = cfg.GetNodeInfo(nname)
8038
      pnr = {
8039
        "tags": list(ninfo.GetTags()),
8040
        "primary_ip": ninfo.primary_ip,
8041
        "secondary_ip": ninfo.secondary_ip,
8042
        "offline": ninfo.offline,
8043
        "drained": ninfo.drained,
8044
        "master_candidate": ninfo.master_candidate,
8045
        }
8046

    
8047
      if not (ninfo.offline or ninfo.drained):
8048
        nresult.Raise("Can't get data for node %s" % nname)
8049
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8050
                                nname)
8051
        remote_info = nresult.payload
8052

    
8053
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8054
                     'vg_size', 'vg_free', 'cpu_total']:
8055
          if attr not in remote_info:
8056
            raise errors.OpExecError("Node '%s' didn't return attribute"
8057
                                     " '%s'" % (nname, attr))
8058
          if not isinstance(remote_info[attr], int):
8059
            raise errors.OpExecError("Node '%s' returned invalid value"
8060
                                     " for '%s': %s" %
8061
                                     (nname, attr, remote_info[attr]))
8062
        # compute memory used by primary instances
8063
        i_p_mem = i_p_up_mem = 0
8064
        for iinfo, beinfo in i_list:
8065
          if iinfo.primary_node == nname:
8066
            i_p_mem += beinfo[constants.BE_MEMORY]
8067
            if iinfo.name not in node_iinfo[nname].payload:
8068
              i_used_mem = 0
8069
            else:
8070
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8071
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8072
            remote_info['memory_free'] -= max(0, i_mem_diff)
8073

    
8074
            if iinfo.admin_up:
8075
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8076

    
8077
        # compute memory used by instances
8078
        pnr_dyn = {
8079
          "total_memory": remote_info['memory_total'],
8080
          "reserved_memory": remote_info['memory_dom0'],
8081
          "free_memory": remote_info['memory_free'],
8082
          "total_disk": remote_info['vg_size'],
8083
          "free_disk": remote_info['vg_free'],
8084
          "total_cpus": remote_info['cpu_total'],
8085
          "i_pri_memory": i_p_mem,
8086
          "i_pri_up_memory": i_p_up_mem,
8087
          }
8088
        pnr.update(pnr_dyn)
8089

    
8090
      node_results[nname] = pnr
8091
    data["nodes"] = node_results
8092

    
8093
    # instance data
8094
    instance_data = {}
8095
    for iinfo, beinfo in i_list:
8096
      nic_data = []
8097
      for nic in iinfo.nics:
8098
        filled_params = objects.FillDict(
8099
            cluster_info.nicparams[constants.PP_DEFAULT],
8100
            nic.nicparams)
8101
        nic_dict = {"mac": nic.mac,
8102
                    "ip": nic.ip,
8103
                    "mode": filled_params[constants.NIC_MODE],
8104
                    "link": filled_params[constants.NIC_LINK],
8105
                   }
8106
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8107
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8108
        nic_data.append(nic_dict)
8109
      pir = {
8110
        "tags": list(iinfo.GetTags()),
8111
        "admin_up": iinfo.admin_up,
8112
        "vcpus": beinfo[constants.BE_VCPUS],
8113
        "memory": beinfo[constants.BE_MEMORY],
8114
        "os": iinfo.os,
8115
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8116
        "nics": nic_data,
8117
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8118
        "disk_template": iinfo.disk_template,
8119
        "hypervisor": iinfo.hypervisor,
8120
        }
8121
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8122
                                                 pir["disks"])
8123
      instance_data[iinfo.name] = pir
8124

    
8125
    data["instances"] = instance_data
8126

    
8127
    self.in_data = data
8128

    
8129
  def _AddNewInstance(self):
8130
    """Add new instance data to allocator structure.
8131

8132
    This in combination with _AllocatorGetClusterData will create the
8133
    correct structure needed as input for the allocator.
8134

8135
    The checks for the completeness of the opcode must have already been
8136
    done.
8137

8138
    """
8139
    data = self.in_data
8140

    
8141
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8142

    
8143
    if self.disk_template in constants.DTS_NET_MIRROR:
8144
      self.required_nodes = 2
8145
    else:
8146
      self.required_nodes = 1
8147
    request = {
8148
      "type": "allocate",
8149
      "name": self.name,
8150
      "disk_template": self.disk_template,
8151
      "tags": self.tags,
8152
      "os": self.os,
8153
      "vcpus": self.vcpus,
8154
      "memory": self.mem_size,
8155
      "disks": self.disks,
8156
      "disk_space_total": disk_space,
8157
      "nics": self.nics,
8158
      "required_nodes": self.required_nodes,
8159
      }
8160
    data["request"] = request
8161

    
8162
  def _AddRelocateInstance(self):
8163
    """Add relocate instance data to allocator structure.
8164

8165
    This in combination with _IAllocatorGetClusterData will create the
8166
    correct structure needed as input for the allocator.
8167

8168
    The checks for the completeness of the opcode must have already been
8169
    done.
8170

8171
    """
8172
    instance = self.cfg.GetInstanceInfo(self.name)
8173
    if instance is None:
8174
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8175
                                   " IAllocator" % self.name)
8176

    
8177
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8178
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8179

    
8180
    if len(instance.secondary_nodes) != 1:
8181
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
8182

    
8183
    self.required_nodes = 1
8184
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8185
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8186

    
8187
    request = {
8188
      "type": "relocate",
8189
      "name": self.name,
8190
      "disk_space_total": disk_space,
8191
      "required_nodes": self.required_nodes,
8192
      "relocate_from": self.relocate_from,
8193
      }
8194
    self.in_data["request"] = request
8195

    
8196
  def _BuildInputData(self):
8197
    """Build input data structures.
8198

8199
    """
8200
    self._ComputeClusterData()
8201

    
8202
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8203
      self._AddNewInstance()
8204
    else:
8205
      self._AddRelocateInstance()
8206

    
8207
    self.in_text = serializer.Dump(self.in_data)
8208

    
8209
  def Run(self, name, validate=True, call_fn=None):
8210
    """Run an instance allocator and return the results.
8211

8212
    """
8213
    if call_fn is None:
8214
      call_fn = self.rpc.call_iallocator_runner
8215

    
8216
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8217
    result.Raise("Failure while running the iallocator script")
8218

    
8219
    self.out_text = result.payload
8220
    if validate:
8221
      self._ValidateResult()
8222

    
8223
  def _ValidateResult(self):
8224
    """Process the allocator results.
8225

8226
    This will process and if successful save the result in
8227
    self.out_data and the other parameters.
8228

8229
    """
8230
    try:
8231
      rdict = serializer.Load(self.out_text)
8232
    except Exception, err:
8233
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8234

    
8235
    if not isinstance(rdict, dict):
8236
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8237

    
8238
    for key in "success", "info", "nodes":
8239
      if key not in rdict:
8240
        raise errors.OpExecError("Can't parse iallocator results:"
8241
                                 " missing key '%s'" % key)
8242
      setattr(self, key, rdict[key])
8243

    
8244
    if not isinstance(rdict["nodes"], list):
8245
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8246
                               " is not a list")
8247
    self.out_data = rdict
8248

    
8249

    
8250
class LUTestAllocator(NoHooksLU):
8251
  """Run allocator tests.
8252

8253
  This LU runs the allocator tests
8254

8255
  """
8256
  _OP_REQP = ["direction", "mode", "name"]
8257

    
8258
  def CheckPrereq(self):
8259
    """Check prerequisites.
8260

8261
    This checks the opcode parameters depending on the director and mode test.
8262

8263
    """
8264
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8265
      for attr in ["name", "mem_size", "disks", "disk_template",
8266
                   "os", "tags", "nics", "vcpus"]:
8267
        if not hasattr(self.op, attr):
8268
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8269
                                     attr)
8270
      iname = self.cfg.ExpandInstanceName(self.op.name)
8271
      if iname is not None:
8272
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8273
                                   iname)
8274
      if not isinstance(self.op.nics, list):
8275
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8276
      for row in self.op.nics:
8277
        if (not isinstance(row, dict) or
8278
            "mac" not in row or
8279
            "ip" not in row or
8280
            "bridge" not in row):
8281
          raise errors.OpPrereqError("Invalid contents of the"
8282
                                     " 'nics' parameter")
8283
      if not isinstance(self.op.disks, list):
8284
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8285
      for row in self.op.disks:
8286
        if (not isinstance(row, dict) or
8287
            "size" not in row or
8288
            not isinstance(row["size"], int) or
8289
            "mode" not in row or
8290
            row["mode"] not in ['r', 'w']):
8291
          raise errors.OpPrereqError("Invalid contents of the"
8292
                                     " 'disks' parameter")
8293
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8294
        self.op.hypervisor = self.cfg.GetHypervisorType()
8295
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8296
      if not hasattr(self.op, "name"):
8297
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8298
      fname = self.cfg.ExpandInstanceName(self.op.name)
8299
      if fname is None:
8300
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8301
                                   self.op.name)
8302
      self.op.name = fname
8303
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8304
    else:
8305
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8306
                                 self.op.mode)
8307

    
8308
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8309
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8310
        raise errors.OpPrereqError("Missing allocator name")
8311
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8312
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8313
                                 self.op.direction)
8314

    
8315
  def Exec(self, feedback_fn):
8316
    """Run the allocator test.
8317

8318
    """
8319
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8320
      ial = IAllocator(self.cfg, self.rpc,
8321
                       mode=self.op.mode,
8322
                       name=self.op.name,
8323
                       mem_size=self.op.mem_size,
8324
                       disks=self.op.disks,
8325
                       disk_template=self.op.disk_template,
8326
                       os=self.op.os,
8327
                       tags=self.op.tags,
8328
                       nics=self.op.nics,
8329
                       vcpus=self.op.vcpus,
8330
                       hypervisor=self.op.hypervisor,
8331
                       )
8332
    else:
8333
      ial = IAllocator(self.cfg, self.rpc,
8334
                       mode=self.op.mode,
8335
                       name=self.op.name,
8336
                       relocate_from=list(self.relocate_from),
8337
                       )
8338

    
8339
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8340
      result = ial.in_text
8341
    else:
8342
      ial.Run(self.op.allocator, validate=False)
8343
      result = ial.out_text
8344
    return result