Statistics
| Branch: | Tag: | Revision:

root / lib / cmdlib.py @ 05e50653

History | View | Annotate | Download (287.2 kB)

1
#
2
#
3

    
4
# Copyright (C) 2006, 2007, 2008 Google Inc.
5
#
6
# This program is free software; you can redistribute it and/or modify
7
# it under the terms of the GNU General Public License as published by
8
# the Free Software Foundation; either version 2 of the License, or
9
# (at your option) any later version.
10
#
11
# This program is distributed in the hope that it will be useful, but
12
# WITHOUT ANY WARRANTY; without even the implied warranty of
13
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
# General Public License for more details.
15
#
16
# You should have received a copy of the GNU General Public License
17
# along with this program; if not, write to the Free Software
18
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19
# 02110-1301, USA.
20

    
21

    
22
"""Module implementing the master-side code."""
23

    
24
# pylint: disable-msg=W0613,W0201
25

    
26
import os
27
import os.path
28
import time
29
import re
30
import platform
31
import logging
32
import copy
33

    
34
from ganeti import ssh
35
from ganeti import utils
36
from ganeti import errors
37
from ganeti import hypervisor
38
from ganeti import locking
39
from ganeti import constants
40
from ganeti import objects
41
from ganeti import serializer
42
from ganeti import ssconf
43

    
44

    
45
class LogicalUnit(object):
46
  """Logical Unit base class.
47

48
  Subclasses must follow these rules:
49
    - implement ExpandNames
50
    - implement CheckPrereq (except when tasklets are used)
51
    - implement Exec (except when tasklets are used)
52
    - implement BuildHooksEnv
53
    - redefine HPATH and HTYPE
54
    - optionally redefine their run requirements:
55
        REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
56

57
  Note that all commands require root permissions.
58

59
  @ivar dry_run_result: the value (if any) that will be returned to the caller
60
      in dry-run mode (signalled by opcode dry_run parameter)
61

62
  """
63
  HPATH = None
64
  HTYPE = None
65
  _OP_REQP = []
66
  REQ_BGL = True
67

    
68
  def __init__(self, processor, op, context, rpc):
69
    """Constructor for LogicalUnit.
70

71
    This needs to be overridden in derived classes in order to check op
72
    validity.
73

74
    """
75
    self.proc = processor
76
    self.op = op
77
    self.cfg = context.cfg
78
    self.context = context
79
    self.rpc = rpc
80
    # Dicts used to declare locking needs to mcpu
81
    self.needed_locks = None
82
    self.acquired_locks = {}
83
    self.share_locks = dict.fromkeys(locking.LEVELS, 0)
84
    self.add_locks = {}
85
    self.remove_locks = {}
86
    # Used to force good behavior when calling helper functions
87
    self.recalculate_locks = {}
88
    self.__ssh = None
89
    # logging
90
    self.LogWarning = processor.LogWarning
91
    self.LogInfo = processor.LogInfo
92
    self.LogStep = processor.LogStep
93
    # support for dry-run
94
    self.dry_run_result = None
95

    
96
    # Tasklets
97
    self.tasklets = None
98

    
99
    for attr_name in self._OP_REQP:
100
      attr_val = getattr(op, attr_name, None)
101
      if attr_val is None:
102
        raise errors.OpPrereqError("Required parameter '%s' missing" %
103
                                   attr_name)
104

    
105
    self.CheckArguments()
106

    
107
  def __GetSSH(self):
108
    """Returns the SshRunner object
109

110
    """
111
    if not self.__ssh:
112
      self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
113
    return self.__ssh
114

    
115
  ssh = property(fget=__GetSSH)
116

    
117
  def CheckArguments(self):
118
    """Check syntactic validity for the opcode arguments.
119

120
    This method is for doing a simple syntactic check and ensure
121
    validity of opcode parameters, without any cluster-related
122
    checks. While the same can be accomplished in ExpandNames and/or
123
    CheckPrereq, doing these separate is better because:
124

125
      - ExpandNames is left as as purely a lock-related function
126
      - CheckPrereq is run after we have acquired locks (and possible
127
        waited for them)
128

129
    The function is allowed to change the self.op attribute so that
130
    later methods can no longer worry about missing parameters.
131

132
    """
133
    pass
134

    
135
  def ExpandNames(self):
136
    """Expand names for this LU.
137

138
    This method is called before starting to execute the opcode, and it should
139
    update all the parameters of the opcode to their canonical form (e.g. a
140
    short node name must be fully expanded after this method has successfully
141
    completed). This way locking, hooks, logging, ecc. can work correctly.
142

143
    LUs which implement this method must also populate the self.needed_locks
144
    member, as a dict with lock levels as keys, and a list of needed lock names
145
    as values. Rules:
146

147
      - use an empty dict if you don't need any lock
148
      - if you don't need any lock at a particular level omit that level
149
      - don't put anything for the BGL level
150
      - if you want all locks at a level use locking.ALL_SET as a value
151

152
    If you need to share locks (rather than acquire them exclusively) at one
153
    level you can modify self.share_locks, setting a true value (usually 1) for
154
    that level. By default locks are not shared.
155

156
    This function can also define a list of tasklets, which then will be
157
    executed in order instead of the usual LU-level CheckPrereq and Exec
158
    functions, if those are not defined by the LU.
159

160
    Examples::
161

162
      # Acquire all nodes and one instance
163
      self.needed_locks = {
164
        locking.LEVEL_NODE: locking.ALL_SET,
165
        locking.LEVEL_INSTANCE: ['instance1.example.tld'],
166
      }
167
      # Acquire just two nodes
168
      self.needed_locks = {
169
        locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
170
      }
171
      # Acquire no locks
172
      self.needed_locks = {} # No, you can't leave it to the default value None
173

174
    """
175
    # The implementation of this method is mandatory only if the new LU is
176
    # concurrent, so that old LUs don't need to be changed all at the same
177
    # time.
178
    if self.REQ_BGL:
179
      self.needed_locks = {} # Exclusive LUs don't need locks.
180
    else:
181
      raise NotImplementedError
182

    
183
  def DeclareLocks(self, level):
184
    """Declare LU locking needs for a level
185

186
    While most LUs can just declare their locking needs at ExpandNames time,
187
    sometimes there's the need to calculate some locks after having acquired
188
    the ones before. This function is called just before acquiring locks at a
189
    particular level, but after acquiring the ones at lower levels, and permits
190
    such calculations. It can be used to modify self.needed_locks, and by
191
    default it does nothing.
192

193
    This function is only called if you have something already set in
194
    self.needed_locks for the level.
195

196
    @param level: Locking level which is going to be locked
197
    @type level: member of ganeti.locking.LEVELS
198

199
    """
200

    
201
  def CheckPrereq(self):
202
    """Check prerequisites for this LU.
203

204
    This method should check that the prerequisites for the execution
205
    of this LU are fulfilled. It can do internode communication, but
206
    it should be idempotent - no cluster or system changes are
207
    allowed.
208

209
    The method should raise errors.OpPrereqError in case something is
210
    not fulfilled. Its return value is ignored.
211

212
    This method should also update all the parameters of the opcode to
213
    their canonical form if it hasn't been done by ExpandNames before.
214

215
    """
216
    if self.tasklets is not None:
217
      for (idx, tl) in enumerate(self.tasklets):
218
        logging.debug("Checking prerequisites for tasklet %s/%s",
219
                      idx + 1, len(self.tasklets))
220
        tl.CheckPrereq()
221
    else:
222
      raise NotImplementedError
223

    
224
  def Exec(self, feedback_fn):
225
    """Execute the LU.
226

227
    This method should implement the actual work. It should raise
228
    errors.OpExecError for failures that are somewhat dealt with in
229
    code, or expected.
230

231
    """
232
    if self.tasklets is not None:
233
      for (idx, tl) in enumerate(self.tasklets):
234
        logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
235
        tl.Exec(feedback_fn)
236
    else:
237
      raise NotImplementedError
238

    
239
  def BuildHooksEnv(self):
240
    """Build hooks environment for this LU.
241

242
    This method should return a three-node tuple consisting of: a dict
243
    containing the environment that will be used for running the
244
    specific hook for this LU, a list of node names on which the hook
245
    should run before the execution, and a list of node names on which
246
    the hook should run after the execution.
247

248
    The keys of the dict must not have 'GANETI_' prefixed as this will
249
    be handled in the hooks runner. Also note additional keys will be
250
    added by the hooks runner. If the LU doesn't define any
251
    environment, an empty dict (and not None) should be returned.
252

253
    No nodes should be returned as an empty list (and not None).
254

255
    Note that if the HPATH for a LU class is None, this function will
256
    not be called.
257

258
    """
259
    raise NotImplementedError
260

    
261
  def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
262
    """Notify the LU about the results of its hooks.
263

264
    This method is called every time a hooks phase is executed, and notifies
265
    the Logical Unit about the hooks' result. The LU can then use it to alter
266
    its result based on the hooks.  By default the method does nothing and the
267
    previous result is passed back unchanged but any LU can define it if it
268
    wants to use the local cluster hook-scripts somehow.
269

270
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
271
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
272
    @param hook_results: the results of the multi-node hooks rpc call
273
    @param feedback_fn: function used send feedback back to the caller
274
    @param lu_result: the previous Exec result this LU had, or None
275
        in the PRE phase
276
    @return: the new Exec result, based on the previous result
277
        and hook results
278

279
    """
280
    return lu_result
281

    
282
  def _ExpandAndLockInstance(self):
283
    """Helper function to expand and lock an instance.
284

285
    Many LUs that work on an instance take its name in self.op.instance_name
286
    and need to expand it and then declare the expanded name for locking. This
287
    function does it, and then updates self.op.instance_name to the expanded
288
    name. It also initializes needed_locks as a dict, if this hasn't been done
289
    before.
290

291
    """
292
    if self.needed_locks is None:
293
      self.needed_locks = {}
294
    else:
295
      assert locking.LEVEL_INSTANCE not in self.needed_locks, \
296
        "_ExpandAndLockInstance called with instance-level locks set"
297
    expanded_name = self.cfg.ExpandInstanceName(self.op.instance_name)
298
    if expanded_name is None:
299
      raise errors.OpPrereqError("Instance '%s' not known" %
300
                                  self.op.instance_name)
301
    self.needed_locks[locking.LEVEL_INSTANCE] = expanded_name
302
    self.op.instance_name = expanded_name
303

    
304
  def _LockInstancesNodes(self, primary_only=False):
305
    """Helper function to declare instances' nodes for locking.
306

307
    This function should be called after locking one or more instances to lock
308
    their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
309
    with all primary or secondary nodes for instances already locked and
310
    present in self.needed_locks[locking.LEVEL_INSTANCE].
311

312
    It should be called from DeclareLocks, and for safety only works if
313
    self.recalculate_locks[locking.LEVEL_NODE] is set.
314

315
    In the future it may grow parameters to just lock some instance's nodes, or
316
    to just lock primaries or secondary nodes, if needed.
317

318
    If should be called in DeclareLocks in a way similar to::
319

320
      if level == locking.LEVEL_NODE:
321
        self._LockInstancesNodes()
322

323
    @type primary_only: boolean
324
    @param primary_only: only lock primary nodes of locked instances
325

326
    """
327
    assert locking.LEVEL_NODE in self.recalculate_locks, \
328
      "_LockInstancesNodes helper function called with no nodes to recalculate"
329

    
330
    # TODO: check if we're really been called with the instance locks held
331

    
332
    # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
333
    # future we might want to have different behaviors depending on the value
334
    # of self.recalculate_locks[locking.LEVEL_NODE]
335
    wanted_nodes = []
336
    for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
337
      instance = self.context.cfg.GetInstanceInfo(instance_name)
338
      wanted_nodes.append(instance.primary_node)
339
      if not primary_only:
340
        wanted_nodes.extend(instance.secondary_nodes)
341

    
342
    if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
343
      self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
344
    elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
345
      self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
346

    
347
    del self.recalculate_locks[locking.LEVEL_NODE]
348

    
349

    
350
class NoHooksLU(LogicalUnit):
351
  """Simple LU which runs no hooks.
352

353
  This LU is intended as a parent for other LogicalUnits which will
354
  run no hooks, in order to reduce duplicate code.
355

356
  """
357
  HPATH = None
358
  HTYPE = None
359

    
360

    
361
class Tasklet:
362
  """Tasklet base class.
363

364
  Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
365
  they can mix legacy code with tasklets. Locking needs to be done in the LU,
366
  tasklets know nothing about locks.
367

368
  Subclasses must follow these rules:
369
    - Implement CheckPrereq
370
    - Implement Exec
371

372
  """
373
  def __init__(self, lu):
374
    self.lu = lu
375

    
376
    # Shortcuts
377
    self.cfg = lu.cfg
378
    self.rpc = lu.rpc
379

    
380
  def CheckPrereq(self):
381
    """Check prerequisites for this tasklets.
382

383
    This method should check whether the prerequisites for the execution of
384
    this tasklet are fulfilled. It can do internode communication, but it
385
    should be idempotent - no cluster or system changes are allowed.
386

387
    The method should raise errors.OpPrereqError in case something is not
388
    fulfilled. Its return value is ignored.
389

390
    This method should also update all parameters to their canonical form if it
391
    hasn't been done before.
392

393
    """
394
    raise NotImplementedError
395

    
396
  def Exec(self, feedback_fn):
397
    """Execute the tasklet.
398

399
    This method should implement the actual work. It should raise
400
    errors.OpExecError for failures that are somewhat dealt with in code, or
401
    expected.
402

403
    """
404
    raise NotImplementedError
405

    
406

    
407
def _GetWantedNodes(lu, nodes):
408
  """Returns list of checked and expanded node names.
409

410
  @type lu: L{LogicalUnit}
411
  @param lu: the logical unit on whose behalf we execute
412
  @type nodes: list
413
  @param nodes: list of node names or None for all nodes
414
  @rtype: list
415
  @return: the list of nodes, sorted
416
  @raise errors.OpProgrammerError: if the nodes parameter is wrong type
417

418
  """
419
  if not isinstance(nodes, list):
420
    raise errors.OpPrereqError("Invalid argument type 'nodes'")
421

    
422
  if not nodes:
423
    raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
424
      " non-empty list of nodes whose name is to be expanded.")
425

    
426
  wanted = []
427
  for name in nodes:
428
    node = lu.cfg.ExpandNodeName(name)
429
    if node is None:
430
      raise errors.OpPrereqError("No such node name '%s'" % name)
431
    wanted.append(node)
432

    
433
  return utils.NiceSort(wanted)
434

    
435

    
436
def _GetWantedInstances(lu, instances):
437
  """Returns list of checked and expanded instance names.
438

439
  @type lu: L{LogicalUnit}
440
  @param lu: the logical unit on whose behalf we execute
441
  @type instances: list
442
  @param instances: list of instance names or None for all instances
443
  @rtype: list
444
  @return: the list of instances, sorted
445
  @raise errors.OpPrereqError: if the instances parameter is wrong type
446
  @raise errors.OpPrereqError: if any of the passed instances is not found
447

448
  """
449
  if not isinstance(instances, list):
450
    raise errors.OpPrereqError("Invalid argument type 'instances'")
451

    
452
  if instances:
453
    wanted = []
454

    
455
    for name in instances:
456
      instance = lu.cfg.ExpandInstanceName(name)
457
      if instance is None:
458
        raise errors.OpPrereqError("No such instance name '%s'" % name)
459
      wanted.append(instance)
460

    
461
  else:
462
    wanted = utils.NiceSort(lu.cfg.GetInstanceList())
463
  return wanted
464

    
465

    
466
def _CheckOutputFields(static, dynamic, selected):
467
  """Checks whether all selected fields are valid.
468

469
  @type static: L{utils.FieldSet}
470
  @param static: static fields set
471
  @type dynamic: L{utils.FieldSet}
472
  @param dynamic: dynamic fields set
473

474
  """
475
  f = utils.FieldSet()
476
  f.Extend(static)
477
  f.Extend(dynamic)
478

    
479
  delta = f.NonMatching(selected)
480
  if delta:
481
    raise errors.OpPrereqError("Unknown output fields selected: %s"
482
                               % ",".join(delta))
483

    
484

    
485
def _CheckBooleanOpField(op, name):
486
  """Validates boolean opcode parameters.
487

488
  This will ensure that an opcode parameter is either a boolean value,
489
  or None (but that it always exists).
490

491
  """
492
  val = getattr(op, name, None)
493
  if not (val is None or isinstance(val, bool)):
494
    raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
495
                               (name, str(val)))
496
  setattr(op, name, val)
497

    
498

    
499
def _CheckNodeOnline(lu, node):
500
  """Ensure that a given node is online.
501

502
  @param lu: the LU on behalf of which we make the check
503
  @param node: the node to check
504
  @raise errors.OpPrereqError: if the node is offline
505

506
  """
507
  if lu.cfg.GetNodeInfo(node).offline:
508
    raise errors.OpPrereqError("Can't use offline node %s" % node)
509

    
510

    
511
def _CheckNodeNotDrained(lu, node):
512
  """Ensure that a given node is not drained.
513

514
  @param lu: the LU on behalf of which we make the check
515
  @param node: the node to check
516
  @raise errors.OpPrereqError: if the node is drained
517

518
  """
519
  if lu.cfg.GetNodeInfo(node).drained:
520
    raise errors.OpPrereqError("Can't use drained node %s" % node)
521

    
522

    
523
def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
524
                          memory, vcpus, nics, disk_template, disks,
525
                          bep, hvp, hypervisor_name):
526
  """Builds instance related env variables for hooks
527

528
  This builds the hook environment from individual variables.
529

530
  @type name: string
531
  @param name: the name of the instance
532
  @type primary_node: string
533
  @param primary_node: the name of the instance's primary node
534
  @type secondary_nodes: list
535
  @param secondary_nodes: list of secondary nodes as strings
536
  @type os_type: string
537
  @param os_type: the name of the instance's OS
538
  @type status: boolean
539
  @param status: the should_run status of the instance
540
  @type memory: string
541
  @param memory: the memory size of the instance
542
  @type vcpus: string
543
  @param vcpus: the count of VCPUs the instance has
544
  @type nics: list
545
  @param nics: list of tuples (ip, mac, mode, link) representing
546
      the NICs the instance has
547
  @type disk_template: string
548
  @param disk_template: the disk template of the instance
549
  @type disks: list
550
  @param disks: the list of (size, mode) pairs
551
  @type bep: dict
552
  @param bep: the backend parameters for the instance
553
  @type hvp: dict
554
  @param hvp: the hypervisor parameters for the instance
555
  @type hypervisor_name: string
556
  @param hypervisor_name: the hypervisor for the instance
557
  @rtype: dict
558
  @return: the hook environment for this instance
559

560
  """
561
  if status:
562
    str_status = "up"
563
  else:
564
    str_status = "down"
565
  env = {
566
    "OP_TARGET": name,
567
    "INSTANCE_NAME": name,
568
    "INSTANCE_PRIMARY": primary_node,
569
    "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
570
    "INSTANCE_OS_TYPE": os_type,
571
    "INSTANCE_STATUS": str_status,
572
    "INSTANCE_MEMORY": memory,
573
    "INSTANCE_VCPUS": vcpus,
574
    "INSTANCE_DISK_TEMPLATE": disk_template,
575
    "INSTANCE_HYPERVISOR": hypervisor_name,
576
  }
577

    
578
  if nics:
579
    nic_count = len(nics)
580
    for idx, (ip, mac, mode, link) in enumerate(nics):
581
      if ip is None:
582
        ip = ""
583
      env["INSTANCE_NIC%d_IP" % idx] = ip
584
      env["INSTANCE_NIC%d_MAC" % idx] = mac
585
      env["INSTANCE_NIC%d_MODE" % idx] = mode
586
      env["INSTANCE_NIC%d_LINK" % idx] = link
587
      if mode == constants.NIC_MODE_BRIDGED:
588
        env["INSTANCE_NIC%d_BRIDGE" % idx] = link
589
  else:
590
    nic_count = 0
591

    
592
  env["INSTANCE_NIC_COUNT"] = nic_count
593

    
594
  if disks:
595
    disk_count = len(disks)
596
    for idx, (size, mode) in enumerate(disks):
597
      env["INSTANCE_DISK%d_SIZE" % idx] = size
598
      env["INSTANCE_DISK%d_MODE" % idx] = mode
599
  else:
600
    disk_count = 0
601

    
602
  env["INSTANCE_DISK_COUNT"] = disk_count
603

    
604
  for source, kind in [(bep, "BE"), (hvp, "HV")]:
605
    for key, value in source.items():
606
      env["INSTANCE_%s_%s" % (kind, key)] = value
607

    
608
  return env
609

    
610

    
611
def _NICListToTuple(lu, nics):
612
  """Build a list of nic information tuples.
613

614
  This list is suitable to be passed to _BuildInstanceHookEnv or as a return
615
  value in LUQueryInstanceData.
616

617
  @type lu:  L{LogicalUnit}
618
  @param lu: the logical unit on whose behalf we execute
619
  @type nics: list of L{objects.NIC}
620
  @param nics: list of nics to convert to hooks tuples
621

622
  """
623
  hooks_nics = []
624
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[constants.PP_DEFAULT]
625
  for nic in nics:
626
    ip = nic.ip
627
    mac = nic.mac
628
    filled_params = objects.FillDict(c_nicparams, nic.nicparams)
629
    mode = filled_params[constants.NIC_MODE]
630
    link = filled_params[constants.NIC_LINK]
631
    hooks_nics.append((ip, mac, mode, link))
632
  return hooks_nics
633

    
634

    
635
def _BuildInstanceHookEnvByObject(lu, instance, override=None):
636
  """Builds instance related env variables for hooks from an object.
637

638
  @type lu: L{LogicalUnit}
639
  @param lu: the logical unit on whose behalf we execute
640
  @type instance: L{objects.Instance}
641
  @param instance: the instance for which we should build the
642
      environment
643
  @type override: dict
644
  @param override: dictionary with key/values that will override
645
      our values
646
  @rtype: dict
647
  @return: the hook environment dictionary
648

649
  """
650
  cluster = lu.cfg.GetClusterInfo()
651
  bep = cluster.FillBE(instance)
652
  hvp = cluster.FillHV(instance)
653
  args = {
654
    'name': instance.name,
655
    'primary_node': instance.primary_node,
656
    'secondary_nodes': instance.secondary_nodes,
657
    'os_type': instance.os,
658
    'status': instance.admin_up,
659
    'memory': bep[constants.BE_MEMORY],
660
    'vcpus': bep[constants.BE_VCPUS],
661
    'nics': _NICListToTuple(lu, instance.nics),
662
    'disk_template': instance.disk_template,
663
    'disks': [(disk.size, disk.mode) for disk in instance.disks],
664
    'bep': bep,
665
    'hvp': hvp,
666
    'hypervisor_name': instance.hypervisor,
667
  }
668
  if override:
669
    args.update(override)
670
  return _BuildInstanceHookEnv(**args)
671

    
672

    
673
def _AdjustCandidatePool(lu):
674
  """Adjust the candidate pool after node operations.
675

676
  """
677
  mod_list = lu.cfg.MaintainCandidatePool()
678
  if mod_list:
679
    lu.LogInfo("Promoted nodes to master candidate role: %s",
680
               ", ".join(node.name for node in mod_list))
681
    for name in mod_list:
682
      lu.context.ReaddNode(name)
683
  mc_now, mc_max = lu.cfg.GetMasterCandidateStats()
684
  if mc_now > mc_max:
685
    lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
686
               (mc_now, mc_max))
687

    
688

    
689
def _CheckNicsBridgesExist(lu, target_nics, target_node,
690
                               profile=constants.PP_DEFAULT):
691
  """Check that the brigdes needed by a list of nics exist.
692

693
  """
694
  c_nicparams = lu.cfg.GetClusterInfo().nicparams[profile]
695
  paramslist = [objects.FillDict(c_nicparams, nic.nicparams)
696
                for nic in target_nics]
697
  brlist = [params[constants.NIC_LINK] for params in paramslist
698
            if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
699
  if brlist:
700
    result = lu.rpc.call_bridges_exist(target_node, brlist)
701
    result.Raise("Error checking bridges on destination node '%s'" %
702
                 target_node, prereq=True)
703

    
704

    
705
def _CheckInstanceBridgesExist(lu, instance, node=None):
706
  """Check that the brigdes needed by an instance exist.
707

708
  """
709
  if node is None:
710
    node = instance.primary_node
711
  _CheckNicsBridgesExist(lu, instance.nics, node)
712

    
713

    
714
def _GetNodeInstancesInner(cfg, fn):
715
  return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
716

    
717

    
718
def _GetNodeInstances(cfg, node_name):
719
  """Returns a list of all primary and secondary instances on a node.
720

721
  """
722

    
723
  return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
724

    
725

    
726
def _GetNodePrimaryInstances(cfg, node_name):
727
  """Returns primary instances on a node.
728

729
  """
730
  return _GetNodeInstancesInner(cfg,
731
                                lambda inst: node_name == inst.primary_node)
732

    
733

    
734
def _GetNodeSecondaryInstances(cfg, node_name):
735
  """Returns secondary instances on a node.
736

737
  """
738
  return _GetNodeInstancesInner(cfg,
739
                                lambda inst: node_name in inst.secondary_nodes)
740

    
741

    
742
def _GetStorageTypeArgs(cfg, storage_type):
743
  """Returns the arguments for a storage type.
744

745
  """
746
  # Special case for file storage
747
  if storage_type == constants.ST_FILE:
748
    # storage.FileStorage wants a list of storage directories
749
    return [[cfg.GetFileStorageDir()]]
750

    
751
  return []
752

    
753

    
754
def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
755
  faulty = []
756

    
757
  for dev in instance.disks:
758
    cfg.SetDiskID(dev, node_name)
759

    
760
  result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
761
  result.Raise("Failed to get disk status from node %s" % node_name,
762
               prereq=prereq)
763

    
764
  for idx, bdev_status in enumerate(result.payload):
765
    if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
766
      faulty.append(idx)
767

    
768
  return faulty
769

    
770

    
771
class LUPostInitCluster(LogicalUnit):
772
  """Logical unit for running hooks after cluster initialization.
773

774
  """
775
  HPATH = "cluster-init"
776
  HTYPE = constants.HTYPE_CLUSTER
777
  _OP_REQP = []
778

    
779
  def BuildHooksEnv(self):
780
    """Build hooks env.
781

782
    """
783
    env = {"OP_TARGET": self.cfg.GetClusterName()}
784
    mn = self.cfg.GetMasterNode()
785
    return env, [], [mn]
786

    
787
  def CheckPrereq(self):
788
    """No prerequisites to check.
789

790
    """
791
    return True
792

    
793
  def Exec(self, feedback_fn):
794
    """Nothing to do.
795

796
    """
797
    return True
798

    
799

    
800
class LUDestroyCluster(LogicalUnit):
801
  """Logical unit for destroying the cluster.
802

803
  """
804
  HPATH = "cluster-destroy"
805
  HTYPE = constants.HTYPE_CLUSTER
806
  _OP_REQP = []
807

    
808
  def BuildHooksEnv(self):
809
    """Build hooks env.
810

811
    """
812
    env = {"OP_TARGET": self.cfg.GetClusterName()}
813
    return env, [], []
814

    
815
  def CheckPrereq(self):
816
    """Check prerequisites.
817

818
    This checks whether the cluster is empty.
819

820
    Any errors are signaled by raising errors.OpPrereqError.
821

822
    """
823
    master = self.cfg.GetMasterNode()
824

    
825
    nodelist = self.cfg.GetNodeList()
826
    if len(nodelist) != 1 or nodelist[0] != master:
827
      raise errors.OpPrereqError("There are still %d node(s) in"
828
                                 " this cluster." % (len(nodelist) - 1))
829
    instancelist = self.cfg.GetInstanceList()
830
    if instancelist:
831
      raise errors.OpPrereqError("There are still %d instance(s) in"
832
                                 " this cluster." % len(instancelist))
833

    
834
  def Exec(self, feedback_fn):
835
    """Destroys the cluster.
836

837
    """
838
    master = self.cfg.GetMasterNode()
839

    
840
    # Run post hooks on master node before it's removed
841
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
842
    try:
843
      hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
844
    except:
845
      self.LogWarning("Errors occurred running hooks on %s" % master)
846

    
847
    result = self.rpc.call_node_stop_master(master, False)
848
    result.Raise("Could not disable the master role")
849
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
850
    utils.CreateBackup(priv_key)
851
    utils.CreateBackup(pub_key)
852
    return master
853

    
854

    
855
class LUVerifyCluster(LogicalUnit):
856
  """Verifies the cluster status.
857

858
  """
859
  HPATH = "cluster-verify"
860
  HTYPE = constants.HTYPE_CLUSTER
861
  _OP_REQP = ["skip_checks"]
862
  REQ_BGL = False
863

    
864
  def ExpandNames(self):
865
    self.needed_locks = {
866
      locking.LEVEL_NODE: locking.ALL_SET,
867
      locking.LEVEL_INSTANCE: locking.ALL_SET,
868
    }
869
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
870

    
871
  def _VerifyNode(self, nodeinfo, file_list, local_cksum,
872
                  node_result, feedback_fn, master_files,
873
                  drbd_map, vg_name):
874
    """Run multiple tests against a node.
875

876
    Test list:
877

878
      - compares ganeti version
879
      - checks vg existence and size > 20G
880
      - checks config file checksum
881
      - checks ssh to other nodes
882

883
    @type nodeinfo: L{objects.Node}
884
    @param nodeinfo: the node to check
885
    @param file_list: required list of files
886
    @param local_cksum: dictionary of local files and their checksums
887
    @param node_result: the results from the node
888
    @param feedback_fn: function used to accumulate results
889
    @param master_files: list of files that only masters should have
890
    @param drbd_map: the useddrbd minors for this node, in
891
        form of minor: (instance, must_exist) which correspond to instances
892
        and their running status
893
    @param vg_name: Ganeti Volume Group (result of self.cfg.GetVGName())
894

895
    """
896
    node = nodeinfo.name
897

    
898
    # main result, node_result should be a non-empty dict
899
    if not node_result or not isinstance(node_result, dict):
900
      feedback_fn("  - ERROR: unable to verify node %s." % (node,))
901
      return True
902

    
903
    # compares ganeti version
904
    local_version = constants.PROTOCOL_VERSION
905
    remote_version = node_result.get('version', None)
906
    if not (remote_version and isinstance(remote_version, (list, tuple)) and
907
            len(remote_version) == 2):
908
      feedback_fn("  - ERROR: connection to %s failed" % (node))
909
      return True
910

    
911
    if local_version != remote_version[0]:
912
      feedback_fn("  - ERROR: incompatible protocol versions: master %s,"
913
                  " node %s %s" % (local_version, node, remote_version[0]))
914
      return True
915

    
916
    # node seems compatible, we can actually try to look into its results
917

    
918
    bad = False
919

    
920
    # full package version
921
    if constants.RELEASE_VERSION != remote_version[1]:
922
      feedback_fn("  - WARNING: software version mismatch: master %s,"
923
                  " node %s %s" %
924
                  (constants.RELEASE_VERSION, node, remote_version[1]))
925

    
926
    # checks vg existence and size > 20G
927
    if vg_name is not None:
928
      vglist = node_result.get(constants.NV_VGLIST, None)
929
      if not vglist:
930
        feedback_fn("  - ERROR: unable to check volume groups on node %s." %
931
                        (node,))
932
        bad = True
933
      else:
934
        vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
935
                                              constants.MIN_VG_SIZE)
936
        if vgstatus:
937
          feedback_fn("  - ERROR: %s on node %s" % (vgstatus, node))
938
          bad = True
939

    
940
    # checks config file checksum
941

    
942
    remote_cksum = node_result.get(constants.NV_FILELIST, None)
943
    if not isinstance(remote_cksum, dict):
944
      bad = True
945
      feedback_fn("  - ERROR: node hasn't returned file checksum data")
946
    else:
947
      for file_name in file_list:
948
        node_is_mc = nodeinfo.master_candidate
949
        must_have_file = file_name not in master_files
950
        if file_name not in remote_cksum:
951
          if node_is_mc or must_have_file:
952
            bad = True
953
            feedback_fn("  - ERROR: file '%s' missing" % file_name)
954
        elif remote_cksum[file_name] != local_cksum[file_name]:
955
          if node_is_mc or must_have_file:
956
            bad = True
957
            feedback_fn("  - ERROR: file '%s' has wrong checksum" % file_name)
958
          else:
959
            # not candidate and this is not a must-have file
960
            bad = True
961
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
962
                        " candidates (and the file is outdated)" % file_name)
963
        else:
964
          # all good, except non-master/non-must have combination
965
          if not node_is_mc and not must_have_file:
966
            feedback_fn("  - ERROR: file '%s' should not exist on non master"
967
                        " candidates" % file_name)
968

    
969
    # checks ssh to any
970

    
971
    if constants.NV_NODELIST not in node_result:
972
      bad = True
973
      feedback_fn("  - ERROR: node hasn't returned node ssh connectivity data")
974
    else:
975
      if node_result[constants.NV_NODELIST]:
976
        bad = True
977
        for node in node_result[constants.NV_NODELIST]:
978
          feedback_fn("  - ERROR: ssh communication with node '%s': %s" %
979
                          (node, node_result[constants.NV_NODELIST][node]))
980

    
981
    if constants.NV_NODENETTEST not in node_result:
982
      bad = True
983
      feedback_fn("  - ERROR: node hasn't returned node tcp connectivity data")
984
    else:
985
      if node_result[constants.NV_NODENETTEST]:
986
        bad = True
987
        nlist = utils.NiceSort(node_result[constants.NV_NODENETTEST].keys())
988
        for node in nlist:
989
          feedback_fn("  - ERROR: tcp communication with node '%s': %s" %
990
                          (node, node_result[constants.NV_NODENETTEST][node]))
991

    
992
    hyp_result = node_result.get(constants.NV_HYPERVISOR, None)
993
    if isinstance(hyp_result, dict):
994
      for hv_name, hv_result in hyp_result.iteritems():
995
        if hv_result is not None:
996
          feedback_fn("  - ERROR: hypervisor %s verify failure: '%s'" %
997
                      (hv_name, hv_result))
998

    
999
    # check used drbd list
1000
    if vg_name is not None:
1001
      used_minors = node_result.get(constants.NV_DRBDLIST, [])
1002
      if not isinstance(used_minors, (tuple, list)):
1003
        feedback_fn("  - ERROR: cannot parse drbd status file: %s" %
1004
                    str(used_minors))
1005
      else:
1006
        for minor, (iname, must_exist) in drbd_map.items():
1007
          if minor not in used_minors and must_exist:
1008
            feedback_fn("  - ERROR: drbd minor %d of instance %s is"
1009
                        " not active" % (minor, iname))
1010
            bad = True
1011
        for minor in used_minors:
1012
          if minor not in drbd_map:
1013
            feedback_fn("  - ERROR: unallocated drbd minor %d is in use" %
1014
                        minor)
1015
            bad = True
1016

    
1017
    return bad
1018

    
1019
  def _VerifyInstance(self, instance, instanceconfig, node_vol_is,
1020
                      node_instance, feedback_fn, n_offline):
1021
    """Verify an instance.
1022

1023
    This function checks to see if the required block devices are
1024
    available on the instance's node.
1025

1026
    """
1027
    bad = False
1028

    
1029
    node_current = instanceconfig.primary_node
1030

    
1031
    node_vol_should = {}
1032
    instanceconfig.MapLVsByNode(node_vol_should)
1033

    
1034
    for node in node_vol_should:
1035
      if node in n_offline:
1036
        # ignore missing volumes on offline nodes
1037
        continue
1038
      for volume in node_vol_should[node]:
1039
        if node not in node_vol_is or volume not in node_vol_is[node]:
1040
          feedback_fn("  - ERROR: volume %s missing on node %s" %
1041
                          (volume, node))
1042
          bad = True
1043

    
1044
    if instanceconfig.admin_up:
1045
      if ((node_current not in node_instance or
1046
          not instance in node_instance[node_current]) and
1047
          node_current not in n_offline):
1048
        feedback_fn("  - ERROR: instance %s not running on node %s" %
1049
                        (instance, node_current))
1050
        bad = True
1051

    
1052
    for node in node_instance:
1053
      if (not node == node_current):
1054
        if instance in node_instance[node]:
1055
          feedback_fn("  - ERROR: instance %s should not run on node %s" %
1056
                          (instance, node))
1057
          bad = True
1058

    
1059
    return bad
1060

    
1061
  def _VerifyOrphanVolumes(self, node_vol_should, node_vol_is, feedback_fn):
1062
    """Verify if there are any unknown volumes in the cluster.
1063

1064
    The .os, .swap and backup volumes are ignored. All other volumes are
1065
    reported as unknown.
1066

1067
    """
1068
    bad = False
1069

    
1070
    for node in node_vol_is:
1071
      for volume in node_vol_is[node]:
1072
        if node not in node_vol_should or volume not in node_vol_should[node]:
1073
          feedback_fn("  - ERROR: volume %s on node %s should not exist" %
1074
                      (volume, node))
1075
          bad = True
1076
    return bad
1077

    
1078
  def _VerifyOrphanInstances(self, instancelist, node_instance, feedback_fn):
1079
    """Verify the list of running instances.
1080

1081
    This checks what instances are running but unknown to the cluster.
1082

1083
    """
1084
    bad = False
1085
    for node in node_instance:
1086
      for runninginstance in node_instance[node]:
1087
        if runninginstance not in instancelist:
1088
          feedback_fn("  - ERROR: instance %s on node %s should not exist" %
1089
                          (runninginstance, node))
1090
          bad = True
1091
    return bad
1092

    
1093
  def _VerifyNPlusOneMemory(self, node_info, instance_cfg, feedback_fn):
1094
    """Verify N+1 Memory Resilience.
1095

1096
    Check that if one single node dies we can still start all the instances it
1097
    was primary for.
1098

1099
    """
1100
    bad = False
1101

    
1102
    for node, nodeinfo in node_info.iteritems():
1103
      # This code checks that every node which is now listed as secondary has
1104
      # enough memory to host all instances it is supposed to should a single
1105
      # other node in the cluster fail.
1106
      # FIXME: not ready for failover to an arbitrary node
1107
      # FIXME: does not support file-backed instances
1108
      # WARNING: we currently take into account down instances as well as up
1109
      # ones, considering that even if they're down someone might want to start
1110
      # them even in the event of a node failure.
1111
      for prinode, instances in nodeinfo['sinst-by-pnode'].iteritems():
1112
        needed_mem = 0
1113
        for instance in instances:
1114
          bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1115
          if bep[constants.BE_AUTO_BALANCE]:
1116
            needed_mem += bep[constants.BE_MEMORY]
1117
        if nodeinfo['mfree'] < needed_mem:
1118
          feedback_fn("  - ERROR: not enough memory on node %s to accommodate"
1119
                      " failovers should node %s fail" % (node, prinode))
1120
          bad = True
1121
    return bad
1122

    
1123
  def CheckPrereq(self):
1124
    """Check prerequisites.
1125

1126
    Transform the list of checks we're going to skip into a set and check that
1127
    all its members are valid.
1128

1129
    """
1130
    self.skip_set = frozenset(self.op.skip_checks)
1131
    if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1132
      raise errors.OpPrereqError("Invalid checks to be skipped specified")
1133

    
1134
  def BuildHooksEnv(self):
1135
    """Build hooks env.
1136

1137
    Cluster-Verify hooks just ran in the post phase and their failure makes
1138
    the output be logged in the verify output and the verification to fail.
1139

1140
    """
1141
    all_nodes = self.cfg.GetNodeList()
1142
    env = {
1143
      "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1144
      }
1145
    for node in self.cfg.GetAllNodesInfo().values():
1146
      env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1147

    
1148
    return env, [], all_nodes
1149

    
1150
  def Exec(self, feedback_fn):
1151
    """Verify integrity of cluster, performing various test on nodes.
1152

1153
    """
1154
    bad = False
1155
    feedback_fn("* Verifying global settings")
1156
    for msg in self.cfg.VerifyConfig():
1157
      feedback_fn("  - ERROR: %s" % msg)
1158

    
1159
    vg_name = self.cfg.GetVGName()
1160
    hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1161
    nodelist = utils.NiceSort(self.cfg.GetNodeList())
1162
    nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1163
    instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1164
    instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1165
                        for iname in instancelist)
1166
    i_non_redundant = [] # Non redundant instances
1167
    i_non_a_balanced = [] # Non auto-balanced instances
1168
    n_offline = [] # List of offline nodes
1169
    n_drained = [] # List of nodes being drained
1170
    node_volume = {}
1171
    node_instance = {}
1172
    node_info = {}
1173
    instance_cfg = {}
1174

    
1175
    # FIXME: verify OS list
1176
    # do local checksums
1177
    master_files = [constants.CLUSTER_CONF_FILE]
1178

    
1179
    file_names = ssconf.SimpleStore().GetFileList()
1180
    file_names.append(constants.SSL_CERT_FILE)
1181
    file_names.append(constants.RAPI_CERT_FILE)
1182
    file_names.extend(master_files)
1183

    
1184
    local_checksums = utils.FingerprintFiles(file_names)
1185

    
1186
    feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1187
    node_verify_param = {
1188
      constants.NV_FILELIST: file_names,
1189
      constants.NV_NODELIST: [node.name for node in nodeinfo
1190
                              if not node.offline],
1191
      constants.NV_HYPERVISOR: hypervisors,
1192
      constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1193
                                  node.secondary_ip) for node in nodeinfo
1194
                                 if not node.offline],
1195
      constants.NV_INSTANCELIST: hypervisors,
1196
      constants.NV_VERSION: None,
1197
      constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1198
      }
1199
    if vg_name is not None:
1200
      node_verify_param[constants.NV_VGLIST] = None
1201
      node_verify_param[constants.NV_LVLIST] = vg_name
1202
      node_verify_param[constants.NV_DRBDLIST] = None
1203
    all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1204
                                           self.cfg.GetClusterName())
1205

    
1206
    cluster = self.cfg.GetClusterInfo()
1207
    master_node = self.cfg.GetMasterNode()
1208
    all_drbd_map = self.cfg.ComputeDRBDMap()
1209

    
1210
    for node_i in nodeinfo:
1211
      node = node_i.name
1212

    
1213
      if node_i.offline:
1214
        feedback_fn("* Skipping offline node %s" % (node,))
1215
        n_offline.append(node)
1216
        continue
1217

    
1218
      if node == master_node:
1219
        ntype = "master"
1220
      elif node_i.master_candidate:
1221
        ntype = "master candidate"
1222
      elif node_i.drained:
1223
        ntype = "drained"
1224
        n_drained.append(node)
1225
      else:
1226
        ntype = "regular"
1227
      feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1228

    
1229
      msg = all_nvinfo[node].fail_msg
1230
      if msg:
1231
        feedback_fn("  - ERROR: while contacting node %s: %s" % (node, msg))
1232
        bad = True
1233
        continue
1234

    
1235
      nresult = all_nvinfo[node].payload
1236
      node_drbd = {}
1237
      for minor, instance in all_drbd_map[node].items():
1238
        if instance not in instanceinfo:
1239
          feedback_fn("  - ERROR: ghost instance '%s' in temporary DRBD map" %
1240
                      instance)
1241
          # ghost instance should not be running, but otherwise we
1242
          # don't give double warnings (both ghost instance and
1243
          # unallocated minor in use)
1244
          node_drbd[minor] = (instance, False)
1245
        else:
1246
          instance = instanceinfo[instance]
1247
          node_drbd[minor] = (instance.name, instance.admin_up)
1248
      result = self._VerifyNode(node_i, file_names, local_checksums,
1249
                                nresult, feedback_fn, master_files,
1250
                                node_drbd, vg_name)
1251
      bad = bad or result
1252

    
1253
      lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1254
      if vg_name is None:
1255
        node_volume[node] = {}
1256
      elif isinstance(lvdata, basestring):
1257
        feedback_fn("  - ERROR: LVM problem on node %s: %s" %
1258
                    (node, utils.SafeEncode(lvdata)))
1259
        bad = True
1260
        node_volume[node] = {}
1261
      elif not isinstance(lvdata, dict):
1262
        feedback_fn("  - ERROR: connection to %s failed (lvlist)" % (node,))
1263
        bad = True
1264
        continue
1265
      else:
1266
        node_volume[node] = lvdata
1267

    
1268
      # node_instance
1269
      idata = nresult.get(constants.NV_INSTANCELIST, None)
1270
      if not isinstance(idata, list):
1271
        feedback_fn("  - ERROR: connection to %s failed (instancelist)" %
1272
                    (node,))
1273
        bad = True
1274
        continue
1275

    
1276
      node_instance[node] = idata
1277

    
1278
      # node_info
1279
      nodeinfo = nresult.get(constants.NV_HVINFO, None)
1280
      if not isinstance(nodeinfo, dict):
1281
        feedback_fn("  - ERROR: connection to %s failed (hvinfo)" % (node,))
1282
        bad = True
1283
        continue
1284

    
1285
      try:
1286
        node_info[node] = {
1287
          "mfree": int(nodeinfo['memory_free']),
1288
          "pinst": [],
1289
          "sinst": [],
1290
          # dictionary holding all instances this node is secondary for,
1291
          # grouped by their primary node. Each key is a cluster node, and each
1292
          # value is a list of instances which have the key as primary and the
1293
          # current node as secondary.  this is handy to calculate N+1 memory
1294
          # availability if you can only failover from a primary to its
1295
          # secondary.
1296
          "sinst-by-pnode": {},
1297
        }
1298
        # FIXME: devise a free space model for file based instances as well
1299
        if vg_name is not None:
1300
          if (constants.NV_VGLIST not in nresult or
1301
              vg_name not in nresult[constants.NV_VGLIST]):
1302
            feedback_fn("  - ERROR: node %s didn't return data for the"
1303
                        " volume group '%s' - it is either missing or broken" %
1304
                        (node, vg_name))
1305
            bad = True
1306
            continue
1307
          node_info[node]["dfree"] = int(nresult[constants.NV_VGLIST][vg_name])
1308
      except (ValueError, KeyError):
1309
        feedback_fn("  - ERROR: invalid nodeinfo value returned"
1310
                    " from node %s" % (node,))
1311
        bad = True
1312
        continue
1313

    
1314
    node_vol_should = {}
1315

    
1316
    for instance in instancelist:
1317
      feedback_fn("* Verifying instance %s" % instance)
1318
      inst_config = instanceinfo[instance]
1319
      result =  self._VerifyInstance(instance, inst_config, node_volume,
1320
                                     node_instance, feedback_fn, n_offline)
1321
      bad = bad or result
1322
      inst_nodes_offline = []
1323

    
1324
      inst_config.MapLVsByNode(node_vol_should)
1325

    
1326
      instance_cfg[instance] = inst_config
1327

    
1328
      pnode = inst_config.primary_node
1329
      if pnode in node_info:
1330
        node_info[pnode]['pinst'].append(instance)
1331
      elif pnode not in n_offline:
1332
        feedback_fn("  - ERROR: instance %s, connection to primary node"
1333
                    " %s failed" % (instance, pnode))
1334
        bad = True
1335

    
1336
      if pnode in n_offline:
1337
        inst_nodes_offline.append(pnode)
1338

    
1339
      # If the instance is non-redundant we cannot survive losing its primary
1340
      # node, so we are not N+1 compliant. On the other hand we have no disk
1341
      # templates with more than one secondary so that situation is not well
1342
      # supported either.
1343
      # FIXME: does not support file-backed instances
1344
      if len(inst_config.secondary_nodes) == 0:
1345
        i_non_redundant.append(instance)
1346
      elif len(inst_config.secondary_nodes) > 1:
1347
        feedback_fn("  - WARNING: multiple secondaries for instance %s"
1348
                    % instance)
1349

    
1350
      if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1351
        i_non_a_balanced.append(instance)
1352

    
1353
      for snode in inst_config.secondary_nodes:
1354
        if snode in node_info:
1355
          node_info[snode]['sinst'].append(instance)
1356
          if pnode not in node_info[snode]['sinst-by-pnode']:
1357
            node_info[snode]['sinst-by-pnode'][pnode] = []
1358
          node_info[snode]['sinst-by-pnode'][pnode].append(instance)
1359
        elif snode not in n_offline:
1360
          feedback_fn("  - ERROR: instance %s, connection to secondary node"
1361
                      " %s failed" % (instance, snode))
1362
          bad = True
1363
        if snode in n_offline:
1364
          inst_nodes_offline.append(snode)
1365

    
1366
      if inst_nodes_offline:
1367
        # warn that the instance lives on offline nodes, and set bad=True
1368
        feedback_fn("  - ERROR: instance lives on offline node(s) %s" %
1369
                    ", ".join(inst_nodes_offline))
1370
        bad = True
1371

    
1372
    feedback_fn("* Verifying orphan volumes")
1373
    result = self._VerifyOrphanVolumes(node_vol_should, node_volume,
1374
                                       feedback_fn)
1375
    bad = bad or result
1376

    
1377
    feedback_fn("* Verifying remaining instances")
1378
    result = self._VerifyOrphanInstances(instancelist, node_instance,
1379
                                         feedback_fn)
1380
    bad = bad or result
1381

    
1382
    if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1383
      feedback_fn("* Verifying N+1 Memory redundancy")
1384
      result = self._VerifyNPlusOneMemory(node_info, instance_cfg, feedback_fn)
1385
      bad = bad or result
1386

    
1387
    feedback_fn("* Other Notes")
1388
    if i_non_redundant:
1389
      feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1390
                  % len(i_non_redundant))
1391

    
1392
    if i_non_a_balanced:
1393
      feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1394
                  % len(i_non_a_balanced))
1395

    
1396
    if n_offline:
1397
      feedback_fn("  - NOTICE: %d offline node(s) found." % len(n_offline))
1398

    
1399
    if n_drained:
1400
      feedback_fn("  - NOTICE: %d drained node(s) found." % len(n_drained))
1401

    
1402
    return not bad
1403

    
1404
  def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1405
    """Analyze the post-hooks' result
1406

1407
    This method analyses the hook result, handles it, and sends some
1408
    nicely-formatted feedback back to the user.
1409

1410
    @param phase: one of L{constants.HOOKS_PHASE_POST} or
1411
        L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1412
    @param hooks_results: the results of the multi-node hooks rpc call
1413
    @param feedback_fn: function used send feedback back to the caller
1414
    @param lu_result: previous Exec result
1415
    @return: the new Exec result, based on the previous result
1416
        and hook results
1417

1418
    """
1419
    # We only really run POST phase hooks, and are only interested in
1420
    # their results
1421
    if phase == constants.HOOKS_PHASE_POST:
1422
      # Used to change hooks' output to proper indentation
1423
      indent_re = re.compile('^', re.M)
1424
      feedback_fn("* Hooks Results")
1425
      if not hooks_results:
1426
        feedback_fn("  - ERROR: general communication failure")
1427
        lu_result = 1
1428
      else:
1429
        for node_name in hooks_results:
1430
          show_node_header = True
1431
          res = hooks_results[node_name]
1432
          msg = res.fail_msg
1433
          if msg:
1434
            if res.offline:
1435
              # no need to warn or set fail return value
1436
              continue
1437
            feedback_fn("    Communication failure in hooks execution: %s" %
1438
                        msg)
1439
            lu_result = 1
1440
            continue
1441
          for script, hkr, output in res.payload:
1442
            if hkr == constants.HKR_FAIL:
1443
              # The node header is only shown once, if there are
1444
              # failing hooks on that node
1445
              if show_node_header:
1446
                feedback_fn("  Node %s:" % node_name)
1447
                show_node_header = False
1448
              feedback_fn("    ERROR: Script %s failed, output:" % script)
1449
              output = indent_re.sub('      ', output)
1450
              feedback_fn("%s" % output)
1451
              lu_result = 1
1452

    
1453
      return lu_result
1454

    
1455

    
1456
class LUVerifyDisks(NoHooksLU):
1457
  """Verifies the cluster disks status.
1458

1459
  """
1460
  _OP_REQP = []
1461
  REQ_BGL = False
1462

    
1463
  def ExpandNames(self):
1464
    self.needed_locks = {
1465
      locking.LEVEL_NODE: locking.ALL_SET,
1466
      locking.LEVEL_INSTANCE: locking.ALL_SET,
1467
    }
1468
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1469

    
1470
  def CheckPrereq(self):
1471
    """Check prerequisites.
1472

1473
    This has no prerequisites.
1474

1475
    """
1476
    pass
1477

    
1478
  def Exec(self, feedback_fn):
1479
    """Verify integrity of cluster disks.
1480

1481
    @rtype: tuple of three items
1482
    @return: a tuple of (dict of node-to-node_error, list of instances
1483
        which need activate-disks, dict of instance: (node, volume) for
1484
        missing volumes
1485

1486
    """
1487
    result = res_nodes, res_instances, res_missing = {}, [], {}
1488

    
1489
    vg_name = self.cfg.GetVGName()
1490
    nodes = utils.NiceSort(self.cfg.GetNodeList())
1491
    instances = [self.cfg.GetInstanceInfo(name)
1492
                 for name in self.cfg.GetInstanceList()]
1493

    
1494
    nv_dict = {}
1495
    for inst in instances:
1496
      inst_lvs = {}
1497
      if (not inst.admin_up or
1498
          inst.disk_template not in constants.DTS_NET_MIRROR):
1499
        continue
1500
      inst.MapLVsByNode(inst_lvs)
1501
      # transform { iname: {node: [vol,],},} to {(node, vol): iname}
1502
      for node, vol_list in inst_lvs.iteritems():
1503
        for vol in vol_list:
1504
          nv_dict[(node, vol)] = inst
1505

    
1506
    if not nv_dict:
1507
      return result
1508

    
1509
    node_lvs = self.rpc.call_lv_list(nodes, vg_name)
1510

    
1511
    for node in nodes:
1512
      # node_volume
1513
      node_res = node_lvs[node]
1514
      if node_res.offline:
1515
        continue
1516
      msg = node_res.fail_msg
1517
      if msg:
1518
        logging.warning("Error enumerating LVs on node %s: %s", node, msg)
1519
        res_nodes[node] = msg
1520
        continue
1521

    
1522
      lvs = node_res.payload
1523
      for lv_name, (_, lv_inactive, lv_online) in lvs.items():
1524
        inst = nv_dict.pop((node, lv_name), None)
1525
        if (not lv_online and inst is not None
1526
            and inst.name not in res_instances):
1527
          res_instances.append(inst.name)
1528

    
1529
    # any leftover items in nv_dict are missing LVs, let's arrange the
1530
    # data better
1531
    for key, inst in nv_dict.iteritems():
1532
      if inst.name not in res_missing:
1533
        res_missing[inst.name] = []
1534
      res_missing[inst.name].append(key)
1535

    
1536
    return result
1537

    
1538

    
1539
class LURepairDiskSizes(NoHooksLU):
1540
  """Verifies the cluster disks sizes.
1541

1542
  """
1543
  _OP_REQP = ["instances"]
1544
  REQ_BGL = False
1545

    
1546
  def ExpandNames(self):
1547

    
1548
    if not isinstance(self.op.instances, list):
1549
      raise errors.OpPrereqError("Invalid argument type 'instances'")
1550

    
1551
    if self.op.instances:
1552
      self.wanted_names = []
1553
      for name in self.op.instances:
1554
        full_name = self.cfg.ExpandInstanceName(name)
1555
        if full_name is None:
1556
          raise errors.OpPrereqError("Instance '%s' not known" % name)
1557
        self.wanted_names.append(full_name)
1558
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
1559
      self.needed_locks = {
1560
        locking.LEVEL_NODE: [],
1561
        locking.LEVEL_INSTANCE: self.wanted_names,
1562
        }
1563
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
1564
    else:
1565
      self.wanted_names = None
1566
      self.needed_locks = {
1567
        locking.LEVEL_NODE: locking.ALL_SET,
1568
        locking.LEVEL_INSTANCE: locking.ALL_SET,
1569
        }
1570
    self.share_locks = dict(((i, 1) for i in locking.LEVELS))
1571

    
1572
  def DeclareLocks(self, level):
1573
    if level == locking.LEVEL_NODE and self.wanted_names is not None:
1574
      self._LockInstancesNodes(primary_only=True)
1575

    
1576
  def CheckPrereq(self):
1577
    """Check prerequisites.
1578

1579
    This only checks the optional instance list against the existing names.
1580

1581
    """
1582
    if self.wanted_names is None:
1583
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
1584

    
1585
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
1586
                             in self.wanted_names]
1587

    
1588
  def Exec(self, feedback_fn):
1589
    """Verify the size of cluster disks.
1590

1591
    """
1592
    # TODO: check child disks too
1593
    # TODO: check differences in size between primary/secondary nodes
1594
    per_node_disks = {}
1595
    for instance in self.wanted_instances:
1596
      pnode = instance.primary_node
1597
      if pnode not in per_node_disks:
1598
        per_node_disks[pnode] = []
1599
      for idx, disk in enumerate(instance.disks):
1600
        per_node_disks[pnode].append((instance, idx, disk))
1601

    
1602
    changed = []
1603
    for node, dskl in per_node_disks.items():
1604
      result = self.rpc.call_blockdev_getsizes(node, [v[2] for v in dskl])
1605
      if result.failed:
1606
        self.LogWarning("Failure in blockdev_getsizes call to node"
1607
                        " %s, ignoring", node)
1608
        continue
1609
      if len(result.data) != len(dskl):
1610
        self.LogWarning("Invalid result from node %s, ignoring node results",
1611
                        node)
1612
        continue
1613
      for ((instance, idx, disk), size) in zip(dskl, result.data):
1614
        if size is None:
1615
          self.LogWarning("Disk %d of instance %s did not return size"
1616
                          " information, ignoring", idx, instance.name)
1617
          continue
1618
        if not isinstance(size, (int, long)):
1619
          self.LogWarning("Disk %d of instance %s did not return valid"
1620
                          " size information, ignoring", idx, instance.name)
1621
          continue
1622
        size = size >> 20
1623
        if size != disk.size:
1624
          self.LogInfo("Disk %d of instance %s has mismatched size,"
1625
                       " correcting: recorded %d, actual %d", idx,
1626
                       instance.name, disk.size, size)
1627
          disk.size = size
1628
          self.cfg.Update(instance)
1629
          changed.append((instance.name, idx, size))
1630
    return changed
1631

    
1632

    
1633
class LURenameCluster(LogicalUnit):
1634
  """Rename the cluster.
1635

1636
  """
1637
  HPATH = "cluster-rename"
1638
  HTYPE = constants.HTYPE_CLUSTER
1639
  _OP_REQP = ["name"]
1640

    
1641
  def BuildHooksEnv(self):
1642
    """Build hooks env.
1643

1644
    """
1645
    env = {
1646
      "OP_TARGET": self.cfg.GetClusterName(),
1647
      "NEW_NAME": self.op.name,
1648
      }
1649
    mn = self.cfg.GetMasterNode()
1650
    return env, [mn], [mn]
1651

    
1652
  def CheckPrereq(self):
1653
    """Verify that the passed name is a valid one.
1654

1655
    """
1656
    hostname = utils.HostInfo(self.op.name)
1657

    
1658
    new_name = hostname.name
1659
    self.ip = new_ip = hostname.ip
1660
    old_name = self.cfg.GetClusterName()
1661
    old_ip = self.cfg.GetMasterIP()
1662
    if new_name == old_name and new_ip == old_ip:
1663
      raise errors.OpPrereqError("Neither the name nor the IP address of the"
1664
                                 " cluster has changed")
1665
    if new_ip != old_ip:
1666
      if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
1667
        raise errors.OpPrereqError("The given cluster IP address (%s) is"
1668
                                   " reachable on the network. Aborting." %
1669
                                   new_ip)
1670

    
1671
    self.op.name = new_name
1672

    
1673
  def Exec(self, feedback_fn):
1674
    """Rename the cluster.
1675

1676
    """
1677
    clustername = self.op.name
1678
    ip = self.ip
1679

    
1680
    # shutdown the master IP
1681
    master = self.cfg.GetMasterNode()
1682
    result = self.rpc.call_node_stop_master(master, False)
1683
    result.Raise("Could not disable the master role")
1684

    
1685
    try:
1686
      cluster = self.cfg.GetClusterInfo()
1687
      cluster.cluster_name = clustername
1688
      cluster.master_ip = ip
1689
      self.cfg.Update(cluster)
1690

    
1691
      # update the known hosts file
1692
      ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
1693
      node_list = self.cfg.GetNodeList()
1694
      try:
1695
        node_list.remove(master)
1696
      except ValueError:
1697
        pass
1698
      result = self.rpc.call_upload_file(node_list,
1699
                                         constants.SSH_KNOWN_HOSTS_FILE)
1700
      for to_node, to_result in result.iteritems():
1701
        msg = to_result.fail_msg
1702
        if msg:
1703
          msg = ("Copy of file %s to node %s failed: %s" %
1704
                 (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
1705
          self.proc.LogWarning(msg)
1706

    
1707
    finally:
1708
      result = self.rpc.call_node_start_master(master, False, False)
1709
      msg = result.fail_msg
1710
      if msg:
1711
        self.LogWarning("Could not re-enable the master role on"
1712
                        " the master, please restart manually: %s", msg)
1713

    
1714

    
1715
def _RecursiveCheckIfLVMBased(disk):
1716
  """Check if the given disk or its children are lvm-based.
1717

1718
  @type disk: L{objects.Disk}
1719
  @param disk: the disk to check
1720
  @rtype: boolean
1721
  @return: boolean indicating whether a LD_LV dev_type was found or not
1722

1723
  """
1724
  if disk.children:
1725
    for chdisk in disk.children:
1726
      if _RecursiveCheckIfLVMBased(chdisk):
1727
        return True
1728
  return disk.dev_type == constants.LD_LV
1729

    
1730

    
1731
class LUSetClusterParams(LogicalUnit):
1732
  """Change the parameters of the cluster.
1733

1734
  """
1735
  HPATH = "cluster-modify"
1736
  HTYPE = constants.HTYPE_CLUSTER
1737
  _OP_REQP = []
1738
  REQ_BGL = False
1739

    
1740
  def CheckArguments(self):
1741
    """Check parameters
1742

1743
    """
1744
    if not hasattr(self.op, "candidate_pool_size"):
1745
      self.op.candidate_pool_size = None
1746
    if self.op.candidate_pool_size is not None:
1747
      try:
1748
        self.op.candidate_pool_size = int(self.op.candidate_pool_size)
1749
      except (ValueError, TypeError), err:
1750
        raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
1751
                                   str(err))
1752
      if self.op.candidate_pool_size < 1:
1753
        raise errors.OpPrereqError("At least one master candidate needed")
1754

    
1755
  def ExpandNames(self):
1756
    # FIXME: in the future maybe other cluster params won't require checking on
1757
    # all nodes to be modified.
1758
    self.needed_locks = {
1759
      locking.LEVEL_NODE: locking.ALL_SET,
1760
    }
1761
    self.share_locks[locking.LEVEL_NODE] = 1
1762

    
1763
  def BuildHooksEnv(self):
1764
    """Build hooks env.
1765

1766
    """
1767
    env = {
1768
      "OP_TARGET": self.cfg.GetClusterName(),
1769
      "NEW_VG_NAME": self.op.vg_name,
1770
      }
1771
    mn = self.cfg.GetMasterNode()
1772
    return env, [mn], [mn]
1773

    
1774
  def CheckPrereq(self):
1775
    """Check prerequisites.
1776

1777
    This checks whether the given params don't conflict and
1778
    if the given volume group is valid.
1779

1780
    """
1781
    if self.op.vg_name is not None and not self.op.vg_name:
1782
      instances = self.cfg.GetAllInstancesInfo().values()
1783
      for inst in instances:
1784
        for disk in inst.disks:
1785
          if _RecursiveCheckIfLVMBased(disk):
1786
            raise errors.OpPrereqError("Cannot disable lvm storage while"
1787
                                       " lvm-based instances exist")
1788

    
1789
    node_list = self.acquired_locks[locking.LEVEL_NODE]
1790

    
1791
    # if vg_name not None, checks given volume group on all nodes
1792
    if self.op.vg_name:
1793
      vglist = self.rpc.call_vg_list(node_list)
1794
      for node in node_list:
1795
        msg = vglist[node].fail_msg
1796
        if msg:
1797
          # ignoring down node
1798
          self.LogWarning("Error while gathering data on node %s"
1799
                          " (ignoring node): %s", node, msg)
1800
          continue
1801
        vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
1802
                                              self.op.vg_name,
1803
                                              constants.MIN_VG_SIZE)
1804
        if vgstatus:
1805
          raise errors.OpPrereqError("Error on node '%s': %s" %
1806
                                     (node, vgstatus))
1807

    
1808
    self.cluster = cluster = self.cfg.GetClusterInfo()
1809
    # validate params changes
1810
    if self.op.beparams:
1811
      utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
1812
      self.new_beparams = objects.FillDict(
1813
        cluster.beparams[constants.PP_DEFAULT], self.op.beparams)
1814

    
1815
    if self.op.nicparams:
1816
      utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
1817
      self.new_nicparams = objects.FillDict(
1818
        cluster.nicparams[constants.PP_DEFAULT], self.op.nicparams)
1819
      objects.NIC.CheckParameterSyntax(self.new_nicparams)
1820

    
1821
    # hypervisor list/parameters
1822
    self.new_hvparams = objects.FillDict(cluster.hvparams, {})
1823
    if self.op.hvparams:
1824
      if not isinstance(self.op.hvparams, dict):
1825
        raise errors.OpPrereqError("Invalid 'hvparams' parameter on input")
1826
      for hv_name, hv_dict in self.op.hvparams.items():
1827
        if hv_name not in self.new_hvparams:
1828
          self.new_hvparams[hv_name] = hv_dict
1829
        else:
1830
          self.new_hvparams[hv_name].update(hv_dict)
1831

    
1832
    if self.op.enabled_hypervisors is not None:
1833
      self.hv_list = self.op.enabled_hypervisors
1834
      if not self.hv_list:
1835
        raise errors.OpPrereqError("Enabled hypervisors list must contain at"
1836
                                   " least one member")
1837
      invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
1838
      if invalid_hvs:
1839
        raise errors.OpPrereqError("Enabled hypervisors contains invalid"
1840
                                   " entries: %s" %
1841
                                   utils.CommaJoin(invalid_hvs))
1842
    else:
1843
      self.hv_list = cluster.enabled_hypervisors
1844

    
1845
    if self.op.hvparams or self.op.enabled_hypervisors is not None:
1846
      # either the enabled list has changed, or the parameters have, validate
1847
      for hv_name, hv_params in self.new_hvparams.items():
1848
        if ((self.op.hvparams and hv_name in self.op.hvparams) or
1849
            (self.op.enabled_hypervisors and
1850
             hv_name in self.op.enabled_hypervisors)):
1851
          # either this is a new hypervisor, or its parameters have changed
1852
          hv_class = hypervisor.GetHypervisor(hv_name)
1853
          utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1854
          hv_class.CheckParameterSyntax(hv_params)
1855
          _CheckHVParams(self, node_list, hv_name, hv_params)
1856

    
1857
  def Exec(self, feedback_fn):
1858
    """Change the parameters of the cluster.
1859

1860
    """
1861
    if self.op.vg_name is not None:
1862
      new_volume = self.op.vg_name
1863
      if not new_volume:
1864
        new_volume = None
1865
      if new_volume != self.cfg.GetVGName():
1866
        self.cfg.SetVGName(new_volume)
1867
      else:
1868
        feedback_fn("Cluster LVM configuration already in desired"
1869
                    " state, not changing")
1870
    if self.op.hvparams:
1871
      self.cluster.hvparams = self.new_hvparams
1872
    if self.op.enabled_hypervisors is not None:
1873
      self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
1874
    if self.op.beparams:
1875
      self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
1876
    if self.op.nicparams:
1877
      self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
1878

    
1879
    if self.op.candidate_pool_size is not None:
1880
      self.cluster.candidate_pool_size = self.op.candidate_pool_size
1881
      # we need to update the pool size here, otherwise the save will fail
1882
      _AdjustCandidatePool(self)
1883

    
1884
    self.cfg.Update(self.cluster)
1885

    
1886

    
1887
def _RedistributeAncillaryFiles(lu, additional_nodes=None):
1888
  """Distribute additional files which are part of the cluster configuration.
1889

1890
  ConfigWriter takes care of distributing the config and ssconf files, but
1891
  there are more files which should be distributed to all nodes. This function
1892
  makes sure those are copied.
1893

1894
  @param lu: calling logical unit
1895
  @param additional_nodes: list of nodes not in the config to distribute to
1896

1897
  """
1898
  # 1. Gather target nodes
1899
  myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
1900
  dist_nodes = lu.cfg.GetNodeList()
1901
  if additional_nodes is not None:
1902
    dist_nodes.extend(additional_nodes)
1903
  if myself.name in dist_nodes:
1904
    dist_nodes.remove(myself.name)
1905
  # 2. Gather files to distribute
1906
  dist_files = set([constants.ETC_HOSTS,
1907
                    constants.SSH_KNOWN_HOSTS_FILE,
1908
                    constants.RAPI_CERT_FILE,
1909
                    constants.RAPI_USERS_FILE,
1910
                    constants.HMAC_CLUSTER_KEY,
1911
                   ])
1912

    
1913
  enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
1914
  for hv_name in enabled_hypervisors:
1915
    hv_class = hypervisor.GetHypervisor(hv_name)
1916
    dist_files.update(hv_class.GetAncillaryFiles())
1917

    
1918
  # 3. Perform the files upload
1919
  for fname in dist_files:
1920
    if os.path.exists(fname):
1921
      result = lu.rpc.call_upload_file(dist_nodes, fname)
1922
      for to_node, to_result in result.items():
1923
        msg = to_result.fail_msg
1924
        if msg:
1925
          msg = ("Copy of file %s to node %s failed: %s" %
1926
                 (fname, to_node, msg))
1927
          lu.proc.LogWarning(msg)
1928

    
1929

    
1930
class LURedistributeConfig(NoHooksLU):
1931
  """Force the redistribution of cluster configuration.
1932

1933
  This is a very simple LU.
1934

1935
  """
1936
  _OP_REQP = []
1937
  REQ_BGL = False
1938

    
1939
  def ExpandNames(self):
1940
    self.needed_locks = {
1941
      locking.LEVEL_NODE: locking.ALL_SET,
1942
    }
1943
    self.share_locks[locking.LEVEL_NODE] = 1
1944

    
1945
  def CheckPrereq(self):
1946
    """Check prerequisites.
1947

1948
    """
1949

    
1950
  def Exec(self, feedback_fn):
1951
    """Redistribute the configuration.
1952

1953
    """
1954
    self.cfg.Update(self.cfg.GetClusterInfo())
1955
    _RedistributeAncillaryFiles(self)
1956

    
1957

    
1958
def _WaitForSync(lu, instance, oneshot=False, unlock=False):
1959
  """Sleep and poll for an instance's disk to sync.
1960

1961
  """
1962
  if not instance.disks:
1963
    return True
1964

    
1965
  if not oneshot:
1966
    lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
1967

    
1968
  node = instance.primary_node
1969

    
1970
  for dev in instance.disks:
1971
    lu.cfg.SetDiskID(dev, node)
1972

    
1973
  retries = 0
1974
  degr_retries = 10 # in seconds, as we sleep 1 second each time
1975
  while True:
1976
    max_time = 0
1977
    done = True
1978
    cumul_degraded = False
1979
    rstats = lu.rpc.call_blockdev_getmirrorstatus(node, instance.disks)
1980
    msg = rstats.fail_msg
1981
    if msg:
1982
      lu.LogWarning("Can't get any data from node %s: %s", node, msg)
1983
      retries += 1
1984
      if retries >= 10:
1985
        raise errors.RemoteError("Can't contact node %s for mirror data,"
1986
                                 " aborting." % node)
1987
      time.sleep(6)
1988
      continue
1989
    rstats = rstats.payload
1990
    retries = 0
1991
    for i, mstat in enumerate(rstats):
1992
      if mstat is None:
1993
        lu.LogWarning("Can't compute data for node %s/%s",
1994
                           node, instance.disks[i].iv_name)
1995
        continue
1996

    
1997
      cumul_degraded = (cumul_degraded or
1998
                        (mstat.is_degraded and mstat.sync_percent is None))
1999
      if mstat.sync_percent is not None:
2000
        done = False
2001
        if mstat.estimated_time is not None:
2002
          rem_time = "%d estimated seconds remaining" % mstat.estimated_time
2003
          max_time = mstat.estimated_time
2004
        else:
2005
          rem_time = "no time estimate"
2006
        lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2007
                        (instance.disks[i].iv_name, mstat.sync_percent, rem_time))
2008

    
2009
    # if we're done but degraded, let's do a few small retries, to
2010
    # make sure we see a stable and not transient situation; therefore
2011
    # we force restart of the loop
2012
    if (done or oneshot) and cumul_degraded and degr_retries > 0:
2013
      logging.info("Degraded disks found, %d retries left", degr_retries)
2014
      degr_retries -= 1
2015
      time.sleep(1)
2016
      continue
2017

    
2018
    if done or oneshot:
2019
      break
2020

    
2021
    time.sleep(min(60, max_time))
2022

    
2023
  if done:
2024
    lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2025
  return not cumul_degraded
2026

    
2027

    
2028
def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2029
  """Check that mirrors are not degraded.
2030

2031
  The ldisk parameter, if True, will change the test from the
2032
  is_degraded attribute (which represents overall non-ok status for
2033
  the device(s)) to the ldisk (representing the local storage status).
2034

2035
  """
2036
  lu.cfg.SetDiskID(dev, node)
2037

    
2038
  result = True
2039

    
2040
  if on_primary or dev.AssembleOnSecondary():
2041
    rstats = lu.rpc.call_blockdev_find(node, dev)
2042
    msg = rstats.fail_msg
2043
    if msg:
2044
      lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2045
      result = False
2046
    elif not rstats.payload:
2047
      lu.LogWarning("Can't find disk on node %s", node)
2048
      result = False
2049
    else:
2050
      if ldisk:
2051
        result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2052
      else:
2053
        result = result and not rstats.payload.is_degraded
2054

    
2055
  if dev.children:
2056
    for child in dev.children:
2057
      result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2058

    
2059
  return result
2060

    
2061

    
2062
class LUDiagnoseOS(NoHooksLU):
2063
  """Logical unit for OS diagnose/query.
2064

2065
  """
2066
  _OP_REQP = ["output_fields", "names"]
2067
  REQ_BGL = False
2068
  _FIELDS_STATIC = utils.FieldSet()
2069
  _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status")
2070

    
2071
  def ExpandNames(self):
2072
    if self.op.names:
2073
      raise errors.OpPrereqError("Selective OS query not supported")
2074

    
2075
    _CheckOutputFields(static=self._FIELDS_STATIC,
2076
                       dynamic=self._FIELDS_DYNAMIC,
2077
                       selected=self.op.output_fields)
2078

    
2079
    # Lock all nodes, in shared mode
2080
    # Temporary removal of locks, should be reverted later
2081
    # TODO: reintroduce locks when they are lighter-weight
2082
    self.needed_locks = {}
2083
    #self.share_locks[locking.LEVEL_NODE] = 1
2084
    #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2085

    
2086
  def CheckPrereq(self):
2087
    """Check prerequisites.
2088

2089
    """
2090

    
2091
  @staticmethod
2092
  def _DiagnoseByOS(node_list, rlist):
2093
    """Remaps a per-node return list into an a per-os per-node dictionary
2094

2095
    @param node_list: a list with the names of all nodes
2096
    @param rlist: a map with node names as keys and OS objects as values
2097

2098
    @rtype: dict
2099
    @return: a dictionary with osnames as keys and as value another map, with
2100
        nodes as keys and tuples of (path, status, diagnose) as values, eg::
2101

2102
          {"debian-etch": {"node1": [(/usr/lib/..., True, ""),
2103
                                     (/srv/..., False, "invalid api")],
2104
                           "node2": [(/srv/..., True, "")]}
2105
          }
2106

2107
    """
2108
    all_os = {}
2109
    # we build here the list of nodes that didn't fail the RPC (at RPC
2110
    # level), so that nodes with a non-responding node daemon don't
2111
    # make all OSes invalid
2112
    good_nodes = [node_name for node_name in rlist
2113
                  if not rlist[node_name].fail_msg]
2114
    for node_name, nr in rlist.items():
2115
      if nr.fail_msg or not nr.payload:
2116
        continue
2117
      for name, path, status, diagnose in nr.payload:
2118
        if name not in all_os:
2119
          # build a list of nodes for this os containing empty lists
2120
          # for each node in node_list
2121
          all_os[name] = {}
2122
          for nname in good_nodes:
2123
            all_os[name][nname] = []
2124
        all_os[name][node_name].append((path, status, diagnose))
2125
    return all_os
2126

    
2127
  def Exec(self, feedback_fn):
2128
    """Compute the list of OSes.
2129

2130
    """
2131
    valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2132
    node_data = self.rpc.call_os_diagnose(valid_nodes)
2133
    pol = self._DiagnoseByOS(valid_nodes, node_data)
2134
    output = []
2135
    for os_name, os_data in pol.items():
2136
      row = []
2137
      for field in self.op.output_fields:
2138
        if field == "name":
2139
          val = os_name
2140
        elif field == "valid":
2141
          val = utils.all([osl and osl[0][1] for osl in os_data.values()])
2142
        elif field == "node_status":
2143
          # this is just a copy of the dict
2144
          val = {}
2145
          for node_name, nos_list in os_data.items():
2146
            val[node_name] = nos_list
2147
        else:
2148
          raise errors.ParameterError(field)
2149
        row.append(val)
2150
      output.append(row)
2151

    
2152
    return output
2153

    
2154

    
2155
class LURemoveNode(LogicalUnit):
2156
  """Logical unit for removing a node.
2157

2158
  """
2159
  HPATH = "node-remove"
2160
  HTYPE = constants.HTYPE_NODE
2161
  _OP_REQP = ["node_name"]
2162

    
2163
  def BuildHooksEnv(self):
2164
    """Build hooks env.
2165

2166
    This doesn't run on the target node in the pre phase as a failed
2167
    node would then be impossible to remove.
2168

2169
    """
2170
    env = {
2171
      "OP_TARGET": self.op.node_name,
2172
      "NODE_NAME": self.op.node_name,
2173
      }
2174
    all_nodes = self.cfg.GetNodeList()
2175
    if self.op.node_name in all_nodes:
2176
      all_nodes.remove(self.op.node_name)
2177
    return env, all_nodes, all_nodes
2178

    
2179
  def CheckPrereq(self):
2180
    """Check prerequisites.
2181

2182
    This checks:
2183
     - the node exists in the configuration
2184
     - it does not have primary or secondary instances
2185
     - it's not the master
2186

2187
    Any errors are signaled by raising errors.OpPrereqError.
2188

2189
    """
2190
    node = self.cfg.GetNodeInfo(self.cfg.ExpandNodeName(self.op.node_name))
2191
    if node is None:
2192
      raise errors.OpPrereqError, ("Node '%s' is unknown." % self.op.node_name)
2193

    
2194
    instance_list = self.cfg.GetInstanceList()
2195

    
2196
    masternode = self.cfg.GetMasterNode()
2197
    if node.name == masternode:
2198
      raise errors.OpPrereqError("Node is the master node,"
2199
                                 " you need to failover first.")
2200

    
2201
    for instance_name in instance_list:
2202
      instance = self.cfg.GetInstanceInfo(instance_name)
2203
      if node.name in instance.all_nodes:
2204
        raise errors.OpPrereqError("Instance %s is still running on the node,"
2205
                                   " please remove first." % instance_name)
2206
    self.op.node_name = node.name
2207
    self.node = node
2208

    
2209
  def Exec(self, feedback_fn):
2210
    """Removes the node from the cluster.
2211

2212
    """
2213
    node = self.node
2214
    logging.info("Stopping the node daemon and removing configs from node %s",
2215
                 node.name)
2216

    
2217
    self.context.RemoveNode(node.name)
2218

    
2219
    # Run post hooks on the node before it's removed
2220
    hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2221
    try:
2222
      h_results = hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2223
    except:
2224
      self.LogWarning("Errors occurred running hooks on %s" % node.name)
2225

    
2226
    result = self.rpc.call_node_leave_cluster(node.name)
2227
    msg = result.fail_msg
2228
    if msg:
2229
      self.LogWarning("Errors encountered on the remote node while leaving"
2230
                      " the cluster: %s", msg)
2231

    
2232
    # Promote nodes to master candidate as needed
2233
    _AdjustCandidatePool(self)
2234

    
2235

    
2236
class LUQueryNodes(NoHooksLU):
2237
  """Logical unit for querying nodes.
2238

2239
  """
2240
  _OP_REQP = ["output_fields", "names", "use_locking"]
2241
  REQ_BGL = False
2242
  _FIELDS_DYNAMIC = utils.FieldSet(
2243
    "dtotal", "dfree",
2244
    "mtotal", "mnode", "mfree",
2245
    "bootid",
2246
    "ctotal", "cnodes", "csockets",
2247
    )
2248

    
2249
  _FIELDS_STATIC = utils.FieldSet(
2250
    "name", "pinst_cnt", "sinst_cnt",
2251
    "pinst_list", "sinst_list",
2252
    "pip", "sip", "tags",
2253
    "serial_no", "ctime", "mtime",
2254
    "master_candidate",
2255
    "master",
2256
    "offline",
2257
    "drained",
2258
    "role",
2259
    )
2260

    
2261
  def ExpandNames(self):
2262
    _CheckOutputFields(static=self._FIELDS_STATIC,
2263
                       dynamic=self._FIELDS_DYNAMIC,
2264
                       selected=self.op.output_fields)
2265

    
2266
    self.needed_locks = {}
2267
    self.share_locks[locking.LEVEL_NODE] = 1
2268

    
2269
    if self.op.names:
2270
      self.wanted = _GetWantedNodes(self, self.op.names)
2271
    else:
2272
      self.wanted = locking.ALL_SET
2273

    
2274
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2275
    self.do_locking = self.do_node_query and self.op.use_locking
2276
    if self.do_locking:
2277
      # if we don't request only static fields, we need to lock the nodes
2278
      self.needed_locks[locking.LEVEL_NODE] = self.wanted
2279

    
2280

    
2281
  def CheckPrereq(self):
2282
    """Check prerequisites.
2283

2284
    """
2285
    # The validation of the node list is done in the _GetWantedNodes,
2286
    # if non empty, and if empty, there's no validation to do
2287
    pass
2288

    
2289
  def Exec(self, feedback_fn):
2290
    """Computes the list of nodes and their attributes.
2291

2292
    """
2293
    all_info = self.cfg.GetAllNodesInfo()
2294
    if self.do_locking:
2295
      nodenames = self.acquired_locks[locking.LEVEL_NODE]
2296
    elif self.wanted != locking.ALL_SET:
2297
      nodenames = self.wanted
2298
      missing = set(nodenames).difference(all_info.keys())
2299
      if missing:
2300
        raise errors.OpExecError(
2301
          "Some nodes were removed before retrieving their data: %s" % missing)
2302
    else:
2303
      nodenames = all_info.keys()
2304

    
2305
    nodenames = utils.NiceSort(nodenames)
2306
    nodelist = [all_info[name] for name in nodenames]
2307

    
2308
    # begin data gathering
2309

    
2310
    if self.do_node_query:
2311
      live_data = {}
2312
      node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
2313
                                          self.cfg.GetHypervisorType())
2314
      for name in nodenames:
2315
        nodeinfo = node_data[name]
2316
        if not nodeinfo.fail_msg and nodeinfo.payload:
2317
          nodeinfo = nodeinfo.payload
2318
          fn = utils.TryConvert
2319
          live_data[name] = {
2320
            "mtotal": fn(int, nodeinfo.get('memory_total', None)),
2321
            "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
2322
            "mfree": fn(int, nodeinfo.get('memory_free', None)),
2323
            "dtotal": fn(int, nodeinfo.get('vg_size', None)),
2324
            "dfree": fn(int, nodeinfo.get('vg_free', None)),
2325
            "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
2326
            "bootid": nodeinfo.get('bootid', None),
2327
            "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
2328
            "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
2329
            }
2330
        else:
2331
          live_data[name] = {}
2332
    else:
2333
      live_data = dict.fromkeys(nodenames, {})
2334

    
2335
    node_to_primary = dict([(name, set()) for name in nodenames])
2336
    node_to_secondary = dict([(name, set()) for name in nodenames])
2337

    
2338
    inst_fields = frozenset(("pinst_cnt", "pinst_list",
2339
                             "sinst_cnt", "sinst_list"))
2340
    if inst_fields & frozenset(self.op.output_fields):
2341
      instancelist = self.cfg.GetInstanceList()
2342

    
2343
      for instance_name in instancelist:
2344
        inst = self.cfg.GetInstanceInfo(instance_name)
2345
        if inst.primary_node in node_to_primary:
2346
          node_to_primary[inst.primary_node].add(inst.name)
2347
        for secnode in inst.secondary_nodes:
2348
          if secnode in node_to_secondary:
2349
            node_to_secondary[secnode].add(inst.name)
2350

    
2351
    master_node = self.cfg.GetMasterNode()
2352

    
2353
    # end data gathering
2354

    
2355
    output = []
2356
    for node in nodelist:
2357
      node_output = []
2358
      for field in self.op.output_fields:
2359
        if field == "name":
2360
          val = node.name
2361
        elif field == "pinst_list":
2362
          val = list(node_to_primary[node.name])
2363
        elif field == "sinst_list":
2364
          val = list(node_to_secondary[node.name])
2365
        elif field == "pinst_cnt":
2366
          val = len(node_to_primary[node.name])
2367
        elif field == "sinst_cnt":
2368
          val = len(node_to_secondary[node.name])
2369
        elif field == "pip":
2370
          val = node.primary_ip
2371
        elif field == "sip":
2372
          val = node.secondary_ip
2373
        elif field == "tags":
2374
          val = list(node.GetTags())
2375
        elif field == "serial_no":
2376
          val = node.serial_no
2377
        elif field == "ctime":
2378
          val = node.ctime
2379
        elif field == "mtime":
2380
          val = node.mtime
2381
        elif field == "master_candidate":
2382
          val = node.master_candidate
2383
        elif field == "master":
2384
          val = node.name == master_node
2385
        elif field == "offline":
2386
          val = node.offline
2387
        elif field == "drained":
2388
          val = node.drained
2389
        elif self._FIELDS_DYNAMIC.Matches(field):
2390
          val = live_data[node.name].get(field, None)
2391
        elif field == "role":
2392
          if node.name == master_node:
2393
            val = "M"
2394
          elif node.master_candidate:
2395
            val = "C"
2396
          elif node.drained:
2397
            val = "D"
2398
          elif node.offline:
2399
            val = "O"
2400
          else:
2401
            val = "R"
2402
        else:
2403
          raise errors.ParameterError(field)
2404
        node_output.append(val)
2405
      output.append(node_output)
2406

    
2407
    return output
2408

    
2409

    
2410
class LUQueryNodeVolumes(NoHooksLU):
2411
  """Logical unit for getting volumes on node(s).
2412

2413
  """
2414
  _OP_REQP = ["nodes", "output_fields"]
2415
  REQ_BGL = False
2416
  _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
2417
  _FIELDS_STATIC = utils.FieldSet("node")
2418

    
2419
  def ExpandNames(self):
2420
    _CheckOutputFields(static=self._FIELDS_STATIC,
2421
                       dynamic=self._FIELDS_DYNAMIC,
2422
                       selected=self.op.output_fields)
2423

    
2424
    self.needed_locks = {}
2425
    self.share_locks[locking.LEVEL_NODE] = 1
2426
    if not self.op.nodes:
2427
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2428
    else:
2429
      self.needed_locks[locking.LEVEL_NODE] = \
2430
        _GetWantedNodes(self, self.op.nodes)
2431

    
2432
  def CheckPrereq(self):
2433
    """Check prerequisites.
2434

2435
    This checks that the fields required are valid output fields.
2436

2437
    """
2438
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2439

    
2440
  def Exec(self, feedback_fn):
2441
    """Computes the list of nodes and their attributes.
2442

2443
    """
2444
    nodenames = self.nodes
2445
    volumes = self.rpc.call_node_volumes(nodenames)
2446

    
2447
    ilist = [self.cfg.GetInstanceInfo(iname) for iname
2448
             in self.cfg.GetInstanceList()]
2449

    
2450
    lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
2451

    
2452
    output = []
2453
    for node in nodenames:
2454
      nresult = volumes[node]
2455
      if nresult.offline:
2456
        continue
2457
      msg = nresult.fail_msg
2458
      if msg:
2459
        self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
2460
        continue
2461

    
2462
      node_vols = nresult.payload[:]
2463
      node_vols.sort(key=lambda vol: vol['dev'])
2464

    
2465
      for vol in node_vols:
2466
        node_output = []
2467
        for field in self.op.output_fields:
2468
          if field == "node":
2469
            val = node
2470
          elif field == "phys":
2471
            val = vol['dev']
2472
          elif field == "vg":
2473
            val = vol['vg']
2474
          elif field == "name":
2475
            val = vol['name']
2476
          elif field == "size":
2477
            val = int(float(vol['size']))
2478
          elif field == "instance":
2479
            for inst in ilist:
2480
              if node not in lv_by_node[inst]:
2481
                continue
2482
              if vol['name'] in lv_by_node[inst][node]:
2483
                val = inst.name
2484
                break
2485
            else:
2486
              val = '-'
2487
          else:
2488
            raise errors.ParameterError(field)
2489
          node_output.append(str(val))
2490

    
2491
        output.append(node_output)
2492

    
2493
    return output
2494

    
2495

    
2496
class LUQueryNodeStorage(NoHooksLU):
2497
  """Logical unit for getting information on storage units on node(s).
2498

2499
  """
2500
  _OP_REQP = ["nodes", "storage_type", "output_fields"]
2501
  REQ_BGL = False
2502
  _FIELDS_STATIC = utils.FieldSet("node")
2503

    
2504
  def ExpandNames(self):
2505
    storage_type = self.op.storage_type
2506

    
2507
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2508
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2509

    
2510
    dynamic_fields = constants.VALID_STORAGE_FIELDS[storage_type]
2511

    
2512
    _CheckOutputFields(static=self._FIELDS_STATIC,
2513
                       dynamic=utils.FieldSet(*dynamic_fields),
2514
                       selected=self.op.output_fields)
2515

    
2516
    self.needed_locks = {}
2517
    self.share_locks[locking.LEVEL_NODE] = 1
2518

    
2519
    if self.op.nodes:
2520
      self.needed_locks[locking.LEVEL_NODE] = \
2521
        _GetWantedNodes(self, self.op.nodes)
2522
    else:
2523
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2524

    
2525
  def CheckPrereq(self):
2526
    """Check prerequisites.
2527

2528
    This checks that the fields required are valid output fields.
2529

2530
    """
2531
    self.op.name = getattr(self.op, "name", None)
2532

    
2533
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
2534

    
2535
  def Exec(self, feedback_fn):
2536
    """Computes the list of nodes and their attributes.
2537

2538
    """
2539
    # Always get name to sort by
2540
    if constants.SF_NAME in self.op.output_fields:
2541
      fields = self.op.output_fields[:]
2542
    else:
2543
      fields = [constants.SF_NAME] + self.op.output_fields
2544

    
2545
    # Never ask for node as it's only known to the LU
2546
    while "node" in fields:
2547
      fields.remove("node")
2548

    
2549
    field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
2550
    name_idx = field_idx[constants.SF_NAME]
2551

    
2552
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2553
    data = self.rpc.call_storage_list(self.nodes,
2554
                                      self.op.storage_type, st_args,
2555
                                      self.op.name, fields)
2556

    
2557
    result = []
2558

    
2559
    for node in utils.NiceSort(self.nodes):
2560
      nresult = data[node]
2561
      if nresult.offline:
2562
        continue
2563

    
2564
      msg = nresult.fail_msg
2565
      if msg:
2566
        self.LogWarning("Can't get storage data from node %s: %s", node, msg)
2567
        continue
2568

    
2569
      rows = dict([(row[name_idx], row) for row in nresult.payload])
2570

    
2571
      for name in utils.NiceSort(rows.keys()):
2572
        row = rows[name]
2573

    
2574
        out = []
2575

    
2576
        for field in self.op.output_fields:
2577
          if field == "node":
2578
            val = node
2579
          elif field in field_idx:
2580
            val = row[field_idx[field]]
2581
          else:
2582
            raise errors.ParameterError(field)
2583

    
2584
          out.append(val)
2585

    
2586
        result.append(out)
2587

    
2588
    return result
2589

    
2590

    
2591
class LUModifyNodeStorage(NoHooksLU):
2592
  """Logical unit for modifying a storage volume on a node.
2593

2594
  """
2595
  _OP_REQP = ["node_name", "storage_type", "name", "changes"]
2596
  REQ_BGL = False
2597

    
2598
  def CheckArguments(self):
2599
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2600
    if node_name is None:
2601
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2602

    
2603
    self.op.node_name = node_name
2604

    
2605
    storage_type = self.op.storage_type
2606
    if storage_type not in constants.VALID_STORAGE_FIELDS:
2607
      raise errors.OpPrereqError("Unknown storage type: %s" % storage_type)
2608

    
2609
  def ExpandNames(self):
2610
    self.needed_locks = {
2611
      locking.LEVEL_NODE: self.op.node_name,
2612
      }
2613

    
2614
  def CheckPrereq(self):
2615
    """Check prerequisites.
2616

2617
    """
2618
    storage_type = self.op.storage_type
2619

    
2620
    try:
2621
      modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
2622
    except KeyError:
2623
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
2624
                                 " modified" % storage_type)
2625

    
2626
    diff = set(self.op.changes.keys()) - modifiable
2627
    if diff:
2628
      raise errors.OpPrereqError("The following fields can not be modified for"
2629
                                 " storage units of type '%s': %r" %
2630
                                 (storage_type, list(diff)))
2631

    
2632
  def Exec(self, feedback_fn):
2633
    """Computes the list of nodes and their attributes.
2634

2635
    """
2636
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
2637
    result = self.rpc.call_storage_modify(self.op.node_name,
2638
                                          self.op.storage_type, st_args,
2639
                                          self.op.name, self.op.changes)
2640
    result.Raise("Failed to modify storage unit '%s' on %s" %
2641
                 (self.op.name, self.op.node_name))
2642

    
2643

    
2644
class LUAddNode(LogicalUnit):
2645
  """Logical unit for adding node to the cluster.
2646

2647
  """
2648
  HPATH = "node-add"
2649
  HTYPE = constants.HTYPE_NODE
2650
  _OP_REQP = ["node_name"]
2651

    
2652
  def BuildHooksEnv(self):
2653
    """Build hooks env.
2654

2655
    This will run on all nodes before, and on all nodes + the new node after.
2656

2657
    """
2658
    env = {
2659
      "OP_TARGET": self.op.node_name,
2660
      "NODE_NAME": self.op.node_name,
2661
      "NODE_PIP": self.op.primary_ip,
2662
      "NODE_SIP": self.op.secondary_ip,
2663
      }
2664
    nodes_0 = self.cfg.GetNodeList()
2665
    nodes_1 = nodes_0 + [self.op.node_name, ]
2666
    return env, nodes_0, nodes_1
2667

    
2668
  def CheckPrereq(self):
2669
    """Check prerequisites.
2670

2671
    This checks:
2672
     - the new node is not already in the config
2673
     - it is resolvable
2674
     - its parameters (single/dual homed) matches the cluster
2675

2676
    Any errors are signaled by raising errors.OpPrereqError.
2677

2678
    """
2679
    node_name = self.op.node_name
2680
    cfg = self.cfg
2681

    
2682
    dns_data = utils.HostInfo(node_name)
2683

    
2684
    node = dns_data.name
2685
    primary_ip = self.op.primary_ip = dns_data.ip
2686
    secondary_ip = getattr(self.op, "secondary_ip", None)
2687
    if secondary_ip is None:
2688
      secondary_ip = primary_ip
2689
    if not utils.IsValidIP(secondary_ip):
2690
      raise errors.OpPrereqError("Invalid secondary IP given")
2691
    self.op.secondary_ip = secondary_ip
2692

    
2693
    node_list = cfg.GetNodeList()
2694
    if not self.op.readd and node in node_list:
2695
      raise errors.OpPrereqError("Node %s is already in the configuration" %
2696
                                 node)
2697
    elif self.op.readd and node not in node_list:
2698
      raise errors.OpPrereqError("Node %s is not in the configuration" % node)
2699

    
2700
    for existing_node_name in node_list:
2701
      existing_node = cfg.GetNodeInfo(existing_node_name)
2702

    
2703
      if self.op.readd and node == existing_node_name:
2704
        if (existing_node.primary_ip != primary_ip or
2705
            existing_node.secondary_ip != secondary_ip):
2706
          raise errors.OpPrereqError("Readded node doesn't have the same IP"
2707
                                     " address configuration as before")
2708
        continue
2709

    
2710
      if (existing_node.primary_ip == primary_ip or
2711
          existing_node.secondary_ip == primary_ip or
2712
          existing_node.primary_ip == secondary_ip or
2713
          existing_node.secondary_ip == secondary_ip):
2714
        raise errors.OpPrereqError("New node ip address(es) conflict with"
2715
                                   " existing node %s" % existing_node.name)
2716

    
2717
    # check that the type of the node (single versus dual homed) is the
2718
    # same as for the master
2719
    myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
2720
    master_singlehomed = myself.secondary_ip == myself.primary_ip
2721
    newbie_singlehomed = secondary_ip == primary_ip
2722
    if master_singlehomed != newbie_singlehomed:
2723
      if master_singlehomed:
2724
        raise errors.OpPrereqError("The master has no private ip but the"
2725
                                   " new node has one")
2726
      else:
2727
        raise errors.OpPrereqError("The master has a private ip but the"
2728
                                   " new node doesn't have one")
2729

    
2730
    # checks reachability
2731
    if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
2732
      raise errors.OpPrereqError("Node not reachable by ping")
2733

    
2734
    if not newbie_singlehomed:
2735
      # check reachability from my secondary ip to newbie's secondary ip
2736
      if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
2737
                           source=myself.secondary_ip):
2738
        raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
2739
                                   " based ping to noded port")
2740

    
2741
    cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2742
    if self.op.readd:
2743
      exceptions = [node]
2744
    else:
2745
      exceptions = []
2746
    mc_now, mc_max = self.cfg.GetMasterCandidateStats(exceptions)
2747
    # the new node will increase mc_max with one, so:
2748
    mc_max = min(mc_max + 1, cp_size)
2749
    self.master_candidate = mc_now < mc_max
2750

    
2751
    if self.op.readd:
2752
      self.new_node = self.cfg.GetNodeInfo(node)
2753
      assert self.new_node is not None, "Can't retrieve locked node %s" % node
2754
    else:
2755
      self.new_node = objects.Node(name=node,
2756
                                   primary_ip=primary_ip,
2757
                                   secondary_ip=secondary_ip,
2758
                                   master_candidate=self.master_candidate,
2759
                                   offline=False, drained=False)
2760

    
2761
  def Exec(self, feedback_fn):
2762
    """Adds the new node to the cluster.
2763

2764
    """
2765
    new_node = self.new_node
2766
    node = new_node.name
2767

    
2768
    # for re-adds, reset the offline/drained/master-candidate flags;
2769
    # we need to reset here, otherwise offline would prevent RPC calls
2770
    # later in the procedure; this also means that if the re-add
2771
    # fails, we are left with a non-offlined, broken node
2772
    if self.op.readd:
2773
      new_node.drained = new_node.offline = False
2774
      self.LogInfo("Readding a node, the offline/drained flags were reset")
2775
      # if we demote the node, we do cleanup later in the procedure
2776
      new_node.master_candidate = self.master_candidate
2777

    
2778
    # notify the user about any possible mc promotion
2779
    if new_node.master_candidate:
2780
      self.LogInfo("Node will be a master candidate")
2781

    
2782
    # check connectivity
2783
    result = self.rpc.call_version([node])[node]
2784
    result.Raise("Can't get version information from node %s" % node)
2785
    if constants.PROTOCOL_VERSION == result.payload:
2786
      logging.info("Communication to node %s fine, sw version %s match",
2787
                   node, result.payload)
2788
    else:
2789
      raise errors.OpExecError("Version mismatch master version %s,"
2790
                               " node version %s" %
2791
                               (constants.PROTOCOL_VERSION, result.payload))
2792

    
2793
    # setup ssh on node
2794
    logging.info("Copy ssh key to node %s", node)
2795
    priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
2796
    keyarray = []
2797
    keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
2798
                constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
2799
                priv_key, pub_key]
2800

    
2801
    for i in keyfiles:
2802
      f = open(i, 'r')
2803
      try:
2804
        keyarray.append(f.read())
2805
      finally:
2806
        f.close()
2807

    
2808
    result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
2809
                                    keyarray[2],
2810
                                    keyarray[3], keyarray[4], keyarray[5])
2811
    result.Raise("Cannot transfer ssh keys to the new node")
2812

    
2813
    # Add node to our /etc/hosts, and add key to known_hosts
2814
    if self.cfg.GetClusterInfo().modify_etc_hosts:
2815
      utils.AddHostToEtcHosts(new_node.name)
2816

    
2817
    if new_node.secondary_ip != new_node.primary_ip:
2818
      result = self.rpc.call_node_has_ip_address(new_node.name,
2819
                                                 new_node.secondary_ip)
2820
      result.Raise("Failure checking secondary ip on node %s" % new_node.name,
2821
                   prereq=True)
2822
      if not result.payload:
2823
        raise errors.OpExecError("Node claims it doesn't have the secondary ip"
2824
                                 " you gave (%s). Please fix and re-run this"
2825
                                 " command." % new_node.secondary_ip)
2826

    
2827
    node_verify_list = [self.cfg.GetMasterNode()]
2828
    node_verify_param = {
2829
      'nodelist': [node],
2830
      # TODO: do a node-net-test as well?
2831
    }
2832

    
2833
    result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
2834
                                       self.cfg.GetClusterName())
2835
    for verifier in node_verify_list:
2836
      result[verifier].Raise("Cannot communicate with node %s" % verifier)
2837
      nl_payload = result[verifier].payload['nodelist']
2838
      if nl_payload:
2839
        for failed in nl_payload:
2840
          feedback_fn("ssh/hostname verification failed %s -> %s" %
2841
                      (verifier, nl_payload[failed]))
2842
        raise errors.OpExecError("ssh/hostname verification failed.")
2843

    
2844
    if self.op.readd:
2845
      _RedistributeAncillaryFiles(self)
2846
      self.context.ReaddNode(new_node)
2847
      # make sure we redistribute the config
2848
      self.cfg.Update(new_node)
2849
      # and make sure the new node will not have old files around
2850
      if not new_node.master_candidate:
2851
        result = self.rpc.call_node_demote_from_mc(new_node.name)
2852
        msg = result.RemoteFailMsg()
2853
        if msg:
2854
          self.LogWarning("Node failed to demote itself from master"
2855
                          " candidate status: %s" % msg)
2856
    else:
2857
      _RedistributeAncillaryFiles(self, additional_nodes=[node])
2858
      self.context.AddNode(new_node)
2859

    
2860

    
2861
class LUSetNodeParams(LogicalUnit):
2862
  """Modifies the parameters of a node.
2863

2864
  """
2865
  HPATH = "node-modify"
2866
  HTYPE = constants.HTYPE_NODE
2867
  _OP_REQP = ["node_name"]
2868
  REQ_BGL = False
2869

    
2870
  def CheckArguments(self):
2871
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
2872
    if node_name is None:
2873
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
2874
    self.op.node_name = node_name
2875
    _CheckBooleanOpField(self.op, 'master_candidate')
2876
    _CheckBooleanOpField(self.op, 'offline')
2877
    _CheckBooleanOpField(self.op, 'drained')
2878
    all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
2879
    if all_mods.count(None) == 3:
2880
      raise errors.OpPrereqError("Please pass at least one modification")
2881
    if all_mods.count(True) > 1:
2882
      raise errors.OpPrereqError("Can't set the node into more than one"
2883
                                 " state at the same time")
2884

    
2885
  def ExpandNames(self):
2886
    self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
2887

    
2888
  def BuildHooksEnv(self):
2889
    """Build hooks env.
2890

2891
    This runs on the master node.
2892

2893
    """
2894
    env = {
2895
      "OP_TARGET": self.op.node_name,
2896
      "MASTER_CANDIDATE": str(self.op.master_candidate),
2897
      "OFFLINE": str(self.op.offline),
2898
      "DRAINED": str(self.op.drained),
2899
      }
2900
    nl = [self.cfg.GetMasterNode(),
2901
          self.op.node_name]
2902
    return env, nl, nl
2903

    
2904
  def CheckPrereq(self):
2905
    """Check prerequisites.
2906

2907
    This only checks the instance list against the existing names.
2908

2909
    """
2910
    node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
2911

    
2912
    if ((self.op.master_candidate == False or self.op.offline == True or
2913
         self.op.drained == True) and node.master_candidate):
2914
      # we will demote the node from master_candidate
2915
      if self.op.node_name == self.cfg.GetMasterNode():
2916
        raise errors.OpPrereqError("The master node has to be a"
2917
                                   " master candidate, online and not drained")
2918
      cp_size = self.cfg.GetClusterInfo().candidate_pool_size
2919
      num_candidates, _ = self.cfg.GetMasterCandidateStats()
2920
      if num_candidates <= cp_size:
2921
        msg = ("Not enough master candidates (desired"
2922
               " %d, new value will be %d)" % (cp_size, num_candidates-1))
2923
        if self.op.force:
2924
          self.LogWarning(msg)
2925
        else:
2926
          raise errors.OpPrereqError(msg)
2927

    
2928
    if (self.op.master_candidate == True and
2929
        ((node.offline and not self.op.offline == False) or
2930
         (node.drained and not self.op.drained == False))):
2931
      raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
2932
                                 " to master_candidate" % node.name)
2933

    
2934
    return
2935

    
2936
  def Exec(self, feedback_fn):
2937
    """Modifies a node.
2938

2939
    """
2940
    node = self.node
2941

    
2942
    result = []
2943
    changed_mc = False
2944

    
2945
    if self.op.offline is not None:
2946
      node.offline = self.op.offline
2947
      result.append(("offline", str(self.op.offline)))
2948
      if self.op.offline == True:
2949
        if node.master_candidate:
2950
          node.master_candidate = False
2951
          changed_mc = True
2952
          result.append(("master_candidate", "auto-demotion due to offline"))
2953
        if node.drained:
2954
          node.drained = False
2955
          result.append(("drained", "clear drained status due to offline"))
2956

    
2957
    if self.op.master_candidate is not None:
2958
      node.master_candidate = self.op.master_candidate
2959
      changed_mc = True
2960
      result.append(("master_candidate", str(self.op.master_candidate)))
2961
      if self.op.master_candidate == False:
2962
        rrc = self.rpc.call_node_demote_from_mc(node.name)
2963
        msg = rrc.fail_msg
2964
        if msg:
2965
          self.LogWarning("Node failed to demote itself: %s" % msg)
2966

    
2967
    if self.op.drained is not None:
2968
      node.drained = self.op.drained
2969
      result.append(("drained", str(self.op.drained)))
2970
      if self.op.drained == True:
2971
        if node.master_candidate:
2972
          node.master_candidate = False
2973
          changed_mc = True
2974
          result.append(("master_candidate", "auto-demotion due to drain"))
2975
          rrc = self.rpc.call_node_demote_from_mc(node.name)
2976
          msg = rrc.RemoteFailMsg()
2977
          if msg:
2978
            self.LogWarning("Node failed to demote itself: %s" % msg)
2979
        if node.offline:
2980
          node.offline = False
2981
          result.append(("offline", "clear offline status due to drain"))
2982

    
2983
    # this will trigger configuration file update, if needed
2984
    self.cfg.Update(node)
2985
    # this will trigger job queue propagation or cleanup
2986
    if changed_mc:
2987
      self.context.ReaddNode(node)
2988

    
2989
    return result
2990

    
2991

    
2992
class LUPowercycleNode(NoHooksLU):
2993
  """Powercycles a node.
2994

2995
  """
2996
  _OP_REQP = ["node_name", "force"]
2997
  REQ_BGL = False
2998

    
2999
  def CheckArguments(self):
3000
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
3001
    if node_name is None:
3002
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
3003
    self.op.node_name = node_name
3004
    if node_name == self.cfg.GetMasterNode() and not self.op.force:
3005
      raise errors.OpPrereqError("The node is the master and the force"
3006
                                 " parameter was not set")
3007

    
3008
  def ExpandNames(self):
3009
    """Locking for PowercycleNode.
3010

3011
    This is a last-resort option and shouldn't block on other
3012
    jobs. Therefore, we grab no locks.
3013

3014
    """
3015
    self.needed_locks = {}
3016

    
3017
  def CheckPrereq(self):
3018
    """Check prerequisites.
3019

3020
    This LU has no prereqs.
3021

3022
    """
3023
    pass
3024

    
3025
  def Exec(self, feedback_fn):
3026
    """Reboots a node.
3027

3028
    """
3029
    result = self.rpc.call_node_powercycle(self.op.node_name,
3030
                                           self.cfg.GetHypervisorType())
3031
    result.Raise("Failed to schedule the reboot")
3032
    return result.payload
3033

    
3034

    
3035
class LUQueryClusterInfo(NoHooksLU):
3036
  """Query cluster configuration.
3037

3038
  """
3039
  _OP_REQP = []
3040
  REQ_BGL = False
3041

    
3042
  def ExpandNames(self):
3043
    self.needed_locks = {}
3044

    
3045
  def CheckPrereq(self):
3046
    """No prerequsites needed for this LU.
3047

3048
    """
3049
    pass
3050

    
3051
  def Exec(self, feedback_fn):
3052
    """Return cluster config.
3053

3054
    """
3055
    cluster = self.cfg.GetClusterInfo()
3056
    result = {
3057
      "software_version": constants.RELEASE_VERSION,
3058
      "protocol_version": constants.PROTOCOL_VERSION,
3059
      "config_version": constants.CONFIG_VERSION,
3060
      "os_api_version": max(constants.OS_API_VERSIONS),
3061
      "export_version": constants.EXPORT_VERSION,
3062
      "architecture": (platform.architecture()[0], platform.machine()),
3063
      "name": cluster.cluster_name,
3064
      "master": cluster.master_node,
3065
      "default_hypervisor": cluster.enabled_hypervisors[0],
3066
      "enabled_hypervisors": cluster.enabled_hypervisors,
3067
      "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3068
                        for hypervisor_name in cluster.enabled_hypervisors]),
3069
      "beparams": cluster.beparams,
3070
      "nicparams": cluster.nicparams,
3071
      "candidate_pool_size": cluster.candidate_pool_size,
3072
      "master_netdev": cluster.master_netdev,
3073
      "volume_group_name": cluster.volume_group_name,
3074
      "file_storage_dir": cluster.file_storage_dir,
3075
      "ctime": cluster.ctime,
3076
      "mtime": cluster.mtime,
3077
      }
3078

    
3079
    return result
3080

    
3081

    
3082
class LUQueryConfigValues(NoHooksLU):
3083
  """Return configuration values.
3084

3085
  """
3086
  _OP_REQP = []
3087
  REQ_BGL = False
3088
  _FIELDS_DYNAMIC = utils.FieldSet()
3089
  _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3090
                                  "watcher_pause")
3091

    
3092
  def ExpandNames(self):
3093
    self.needed_locks = {}
3094

    
3095
    _CheckOutputFields(static=self._FIELDS_STATIC,
3096
                       dynamic=self._FIELDS_DYNAMIC,
3097
                       selected=self.op.output_fields)
3098

    
3099
  def CheckPrereq(self):
3100
    """No prerequisites.
3101

3102
    """
3103
    pass
3104

    
3105
  def Exec(self, feedback_fn):
3106
    """Dump a representation of the cluster config to the standard output.
3107

3108
    """
3109
    values = []
3110
    for field in self.op.output_fields:
3111
      if field == "cluster_name":
3112
        entry = self.cfg.GetClusterName()
3113
      elif field == "master_node":
3114
        entry = self.cfg.GetMasterNode()
3115
      elif field == "drain_flag":
3116
        entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3117
      elif field == "watcher_pause":
3118
        return utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3119
      else:
3120
        raise errors.ParameterError(field)
3121
      values.append(entry)
3122
    return values
3123

    
3124

    
3125
class LUActivateInstanceDisks(NoHooksLU):
3126
  """Bring up an instance's disks.
3127

3128
  """
3129
  _OP_REQP = ["instance_name"]
3130
  REQ_BGL = False
3131

    
3132
  def ExpandNames(self):
3133
    self._ExpandAndLockInstance()
3134
    self.needed_locks[locking.LEVEL_NODE] = []
3135
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3136

    
3137
  def DeclareLocks(self, level):
3138
    if level == locking.LEVEL_NODE:
3139
      self._LockInstancesNodes()
3140

    
3141
  def CheckPrereq(self):
3142
    """Check prerequisites.
3143

3144
    This checks that the instance is in the cluster.
3145

3146
    """
3147
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3148
    assert self.instance is not None, \
3149
      "Cannot retrieve locked instance %s" % self.op.instance_name
3150
    _CheckNodeOnline(self, self.instance.primary_node)
3151
    if not hasattr(self.op, "ignore_size"):
3152
      self.op.ignore_size = False
3153

    
3154
  def Exec(self, feedback_fn):
3155
    """Activate the disks.
3156

3157
    """
3158
    disks_ok, disks_info = \
3159
              _AssembleInstanceDisks(self, self.instance,
3160
                                     ignore_size=self.op.ignore_size)
3161
    if not disks_ok:
3162
      raise errors.OpExecError("Cannot activate block devices")
3163

    
3164
    return disks_info
3165

    
3166

    
3167
def _AssembleInstanceDisks(lu, instance, ignore_secondaries=False,
3168
                           ignore_size=False):
3169
  """Prepare the block devices for an instance.
3170

3171
  This sets up the block devices on all nodes.
3172

3173
  @type lu: L{LogicalUnit}
3174
  @param lu: the logical unit on whose behalf we execute
3175
  @type instance: L{objects.Instance}
3176
  @param instance: the instance for whose disks we assemble
3177
  @type ignore_secondaries: boolean
3178
  @param ignore_secondaries: if true, errors on secondary nodes
3179
      won't result in an error return from the function
3180
  @type ignore_size: boolean
3181
  @param ignore_size: if true, the current known size of the disk
3182
      will not be used during the disk activation, useful for cases
3183
      when the size is wrong
3184
  @return: False if the operation failed, otherwise a list of
3185
      (host, instance_visible_name, node_visible_name)
3186
      with the mapping from node devices to instance devices
3187

3188
  """
3189
  device_info = []
3190
  disks_ok = True
3191
  iname = instance.name
3192
  # With the two passes mechanism we try to reduce the window of
3193
  # opportunity for the race condition of switching DRBD to primary
3194
  # before handshaking occured, but we do not eliminate it
3195

    
3196
  # The proper fix would be to wait (with some limits) until the
3197
  # connection has been made and drbd transitions from WFConnection
3198
  # into any other network-connected state (Connected, SyncTarget,
3199
  # SyncSource, etc.)
3200

    
3201
  # 1st pass, assemble on all nodes in secondary mode
3202
  for inst_disk in instance.disks:
3203
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3204
      if ignore_size:
3205
        node_disk = node_disk.Copy()
3206
        node_disk.UnsetSize()
3207
      lu.cfg.SetDiskID(node_disk, node)
3208
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3209
      msg = result.fail_msg
3210
      if msg:
3211
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3212
                           " (is_primary=False, pass=1): %s",
3213
                           inst_disk.iv_name, node, msg)
3214
        if not ignore_secondaries:
3215
          disks_ok = False
3216

    
3217
  # FIXME: race condition on drbd migration to primary
3218

    
3219
  # 2nd pass, do only the primary node
3220
  for inst_disk in instance.disks:
3221
    for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3222
      if node != instance.primary_node:
3223
        continue
3224
      if ignore_size:
3225
        node_disk = node_disk.Copy()
3226
        node_disk.UnsetSize()
3227
      lu.cfg.SetDiskID(node_disk, node)
3228
      result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3229
      msg = result.fail_msg
3230
      if msg:
3231
        lu.proc.LogWarning("Could not prepare block device %s on node %s"
3232
                           " (is_primary=True, pass=2): %s",
3233
                           inst_disk.iv_name, node, msg)
3234
        disks_ok = False
3235
    device_info.append((instance.primary_node, inst_disk.iv_name,
3236
                        result.payload))
3237

    
3238
  # leave the disks configured for the primary node
3239
  # this is a workaround that would be fixed better by
3240
  # improving the logical/physical id handling
3241
  for disk in instance.disks:
3242
    lu.cfg.SetDiskID(disk, instance.primary_node)
3243

    
3244
  return disks_ok, device_info
3245

    
3246

    
3247
def _StartInstanceDisks(lu, instance, force):
3248
  """Start the disks of an instance.
3249

3250
  """
3251
  disks_ok, _ = _AssembleInstanceDisks(lu, instance,
3252
                                           ignore_secondaries=force)
3253
  if not disks_ok:
3254
    _ShutdownInstanceDisks(lu, instance)
3255
    if force is not None and not force:
3256
      lu.proc.LogWarning("", hint="If the message above refers to a"
3257
                         " secondary node,"
3258
                         " you can retry the operation using '--force'.")
3259
    raise errors.OpExecError("Disk consistency error")
3260

    
3261

    
3262
class LUDeactivateInstanceDisks(NoHooksLU):
3263
  """Shutdown an instance's disks.
3264

3265
  """
3266
  _OP_REQP = ["instance_name"]
3267
  REQ_BGL = False
3268

    
3269
  def ExpandNames(self):
3270
    self._ExpandAndLockInstance()
3271
    self.needed_locks[locking.LEVEL_NODE] = []
3272
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3273

    
3274
  def DeclareLocks(self, level):
3275
    if level == locking.LEVEL_NODE:
3276
      self._LockInstancesNodes()
3277

    
3278
  def CheckPrereq(self):
3279
    """Check prerequisites.
3280

3281
    This checks that the instance is in the cluster.
3282

3283
    """
3284
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3285
    assert self.instance is not None, \
3286
      "Cannot retrieve locked instance %s" % self.op.instance_name
3287

    
3288
  def Exec(self, feedback_fn):
3289
    """Deactivate the disks
3290

3291
    """
3292
    instance = self.instance
3293
    _SafeShutdownInstanceDisks(self, instance)
3294

    
3295

    
3296
def _SafeShutdownInstanceDisks(lu, instance):
3297
  """Shutdown block devices of an instance.
3298

3299
  This function checks if an instance is running, before calling
3300
  _ShutdownInstanceDisks.
3301

3302
  """
3303
  pnode = instance.primary_node
3304
  ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
3305
  ins_l.Raise("Can't contact node %s" % pnode)
3306

    
3307
  if instance.name in ins_l.payload:
3308
    raise errors.OpExecError("Instance is running, can't shutdown"
3309
                             " block devices.")
3310

    
3311
  _ShutdownInstanceDisks(lu, instance)
3312

    
3313

    
3314
def _ShutdownInstanceDisks(lu, instance, ignore_primary=False):
3315
  """Shutdown block devices of an instance.
3316

3317
  This does the shutdown on all nodes of the instance.
3318

3319
  If the ignore_primary is false, errors on the primary node are
3320
  ignored.
3321

3322
  """
3323
  all_result = True
3324
  for disk in instance.disks:
3325
    for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
3326
      lu.cfg.SetDiskID(top_disk, node)
3327
      result = lu.rpc.call_blockdev_shutdown(node, top_disk)
3328
      msg = result.fail_msg
3329
      if msg:
3330
        lu.LogWarning("Could not shutdown block device %s on node %s: %s",
3331
                      disk.iv_name, node, msg)
3332
        if not ignore_primary or node != instance.primary_node:
3333
          all_result = False
3334
  return all_result
3335

    
3336

    
3337
def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
3338
  """Checks if a node has enough free memory.
3339

3340
  This function check if a given node has the needed amount of free
3341
  memory. In case the node has less memory or we cannot get the
3342
  information from the node, this function raise an OpPrereqError
3343
  exception.
3344

3345
  @type lu: C{LogicalUnit}
3346
  @param lu: a logical unit from which we get configuration data
3347
  @type node: C{str}
3348
  @param node: the node to check
3349
  @type reason: C{str}
3350
  @param reason: string to use in the error message
3351
  @type requested: C{int}
3352
  @param requested: the amount of memory in MiB to check for
3353
  @type hypervisor_name: C{str}
3354
  @param hypervisor_name: the hypervisor to ask for memory stats
3355
  @raise errors.OpPrereqError: if the node doesn't have enough memory, or
3356
      we cannot check the node
3357

3358
  """
3359
  nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
3360
  nodeinfo[node].Raise("Can't get data from node %s" % node, prereq=True)
3361
  free_mem = nodeinfo[node].payload.get('memory_free', None)
3362
  if not isinstance(free_mem, int):
3363
    raise errors.OpPrereqError("Can't compute free memory on node %s, result"
3364
                               " was '%s'" % (node, free_mem))
3365
  if requested > free_mem:
3366
    raise errors.OpPrereqError("Not enough memory on node %s for %s:"
3367
                               " needed %s MiB, available %s MiB" %
3368
                               (node, reason, requested, free_mem))
3369

    
3370

    
3371
class LUStartupInstance(LogicalUnit):
3372
  """Starts an instance.
3373

3374
  """
3375
  HPATH = "instance-start"
3376
  HTYPE = constants.HTYPE_INSTANCE
3377
  _OP_REQP = ["instance_name", "force"]
3378
  REQ_BGL = False
3379

    
3380
  def ExpandNames(self):
3381
    self._ExpandAndLockInstance()
3382

    
3383
  def BuildHooksEnv(self):
3384
    """Build hooks env.
3385

3386
    This runs on master, primary and secondary nodes of the instance.
3387

3388
    """
3389
    env = {
3390
      "FORCE": self.op.force,
3391
      }
3392
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3393
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3394
    return env, nl, nl
3395

    
3396
  def CheckPrereq(self):
3397
    """Check prerequisites.
3398

3399
    This checks that the instance is in the cluster.
3400

3401
    """
3402
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3403
    assert self.instance is not None, \
3404
      "Cannot retrieve locked instance %s" % self.op.instance_name
3405

    
3406
    # extra beparams
3407
    self.beparams = getattr(self.op, "beparams", {})
3408
    if self.beparams:
3409
      if not isinstance(self.beparams, dict):
3410
        raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
3411
                                   " dict" % (type(self.beparams), ))
3412
      # fill the beparams dict
3413
      utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
3414
      self.op.beparams = self.beparams
3415

    
3416
    # extra hvparams
3417
    self.hvparams = getattr(self.op, "hvparams", {})
3418
    if self.hvparams:
3419
      if not isinstance(self.hvparams, dict):
3420
        raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
3421
                                   " dict" % (type(self.hvparams), ))
3422

    
3423
      # check hypervisor parameter syntax (locally)
3424
      cluster = self.cfg.GetClusterInfo()
3425
      utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
3426
      filled_hvp = objects.FillDict(cluster.hvparams[instance.hypervisor],
3427
                                    instance.hvparams)
3428
      filled_hvp.update(self.hvparams)
3429
      hv_type = hypervisor.GetHypervisor(instance.hypervisor)
3430
      hv_type.CheckParameterSyntax(filled_hvp)
3431
      _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
3432
      self.op.hvparams = self.hvparams
3433

    
3434
    _CheckNodeOnline(self, instance.primary_node)
3435

    
3436
    bep = self.cfg.GetClusterInfo().FillBE(instance)
3437
    # check bridges existence
3438
    _CheckInstanceBridgesExist(self, instance)
3439

    
3440
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3441
                                              instance.name,
3442
                                              instance.hypervisor)
3443
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3444
                      prereq=True)
3445
    if not remote_info.payload: # not running already
3446
      _CheckNodeFreeMemory(self, instance.primary_node,
3447
                           "starting instance %s" % instance.name,
3448
                           bep[constants.BE_MEMORY], instance.hypervisor)
3449

    
3450
  def Exec(self, feedback_fn):
3451
    """Start the instance.
3452

3453
    """
3454
    instance = self.instance
3455
    force = self.op.force
3456

    
3457
    self.cfg.MarkInstanceUp(instance.name)
3458

    
3459
    node_current = instance.primary_node
3460

    
3461
    _StartInstanceDisks(self, instance, force)
3462

    
3463
    result = self.rpc.call_instance_start(node_current, instance,
3464
                                          self.hvparams, self.beparams)
3465
    msg = result.fail_msg
3466
    if msg:
3467
      _ShutdownInstanceDisks(self, instance)
3468
      raise errors.OpExecError("Could not start instance: %s" % msg)
3469

    
3470

    
3471
class LURebootInstance(LogicalUnit):
3472
  """Reboot an instance.
3473

3474
  """
3475
  HPATH = "instance-reboot"
3476
  HTYPE = constants.HTYPE_INSTANCE
3477
  _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
3478
  REQ_BGL = False
3479

    
3480
  def ExpandNames(self):
3481
    if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
3482
                                   constants.INSTANCE_REBOOT_HARD,
3483
                                   constants.INSTANCE_REBOOT_FULL]:
3484
      raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
3485
                                  (constants.INSTANCE_REBOOT_SOFT,
3486
                                   constants.INSTANCE_REBOOT_HARD,
3487
                                   constants.INSTANCE_REBOOT_FULL))
3488
    self._ExpandAndLockInstance()
3489

    
3490
  def BuildHooksEnv(self):
3491
    """Build hooks env.
3492

3493
    This runs on master, primary and secondary nodes of the instance.
3494

3495
    """
3496
    env = {
3497
      "IGNORE_SECONDARIES": self.op.ignore_secondaries,
3498
      "REBOOT_TYPE": self.op.reboot_type,
3499
      }
3500
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
3501
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3502
    return env, nl, nl
3503

    
3504
  def CheckPrereq(self):
3505
    """Check prerequisites.
3506

3507
    This checks that the instance is in the cluster.
3508

3509
    """
3510
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3511
    assert self.instance is not None, \
3512
      "Cannot retrieve locked instance %s" % self.op.instance_name
3513

    
3514
    _CheckNodeOnline(self, instance.primary_node)
3515

    
3516
    # check bridges existence
3517
    _CheckInstanceBridgesExist(self, instance)
3518

    
3519
  def Exec(self, feedback_fn):
3520
    """Reboot the instance.
3521

3522
    """
3523
    instance = self.instance
3524
    ignore_secondaries = self.op.ignore_secondaries
3525
    reboot_type = self.op.reboot_type
3526

    
3527
    node_current = instance.primary_node
3528

    
3529
    if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
3530
                       constants.INSTANCE_REBOOT_HARD]:
3531
      for disk in instance.disks:
3532
        self.cfg.SetDiskID(disk, node_current)
3533
      result = self.rpc.call_instance_reboot(node_current, instance,
3534
                                             reboot_type)
3535
      result.Raise("Could not reboot instance")
3536
    else:
3537
      result = self.rpc.call_instance_shutdown(node_current, instance)
3538
      result.Raise("Could not shutdown instance for full reboot")
3539
      _ShutdownInstanceDisks(self, instance)
3540
      _StartInstanceDisks(self, instance, ignore_secondaries)
3541
      result = self.rpc.call_instance_start(node_current, instance, None, None)
3542
      msg = result.fail_msg
3543
      if msg:
3544
        _ShutdownInstanceDisks(self, instance)
3545
        raise errors.OpExecError("Could not start instance for"
3546
                                 " full reboot: %s" % msg)
3547

    
3548
    self.cfg.MarkInstanceUp(instance.name)
3549

    
3550

    
3551
class LUShutdownInstance(LogicalUnit):
3552
  """Shutdown an instance.
3553

3554
  """
3555
  HPATH = "instance-stop"
3556
  HTYPE = constants.HTYPE_INSTANCE
3557
  _OP_REQP = ["instance_name"]
3558
  REQ_BGL = False
3559

    
3560
  def ExpandNames(self):
3561
    self._ExpandAndLockInstance()
3562

    
3563
  def BuildHooksEnv(self):
3564
    """Build hooks env.
3565

3566
    This runs on master, primary and secondary nodes of the instance.
3567

3568
    """
3569
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3570
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3571
    return env, nl, nl
3572

    
3573
  def CheckPrereq(self):
3574
    """Check prerequisites.
3575

3576
    This checks that the instance is in the cluster.
3577

3578
    """
3579
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3580
    assert self.instance is not None, \
3581
      "Cannot retrieve locked instance %s" % self.op.instance_name
3582
    _CheckNodeOnline(self, self.instance.primary_node)
3583

    
3584
  def Exec(self, feedback_fn):
3585
    """Shutdown the instance.
3586

3587
    """
3588
    instance = self.instance
3589
    node_current = instance.primary_node
3590
    self.cfg.MarkInstanceDown(instance.name)
3591
    result = self.rpc.call_instance_shutdown(node_current, instance)
3592
    msg = result.fail_msg
3593
    if msg:
3594
      self.proc.LogWarning("Could not shutdown instance: %s" % msg)
3595

    
3596
    _ShutdownInstanceDisks(self, instance)
3597

    
3598

    
3599
class LUReinstallInstance(LogicalUnit):
3600
  """Reinstall an instance.
3601

3602
  """
3603
  HPATH = "instance-reinstall"
3604
  HTYPE = constants.HTYPE_INSTANCE
3605
  _OP_REQP = ["instance_name"]
3606
  REQ_BGL = False
3607

    
3608
  def ExpandNames(self):
3609
    self._ExpandAndLockInstance()
3610

    
3611
  def BuildHooksEnv(self):
3612
    """Build hooks env.
3613

3614
    This runs on master, primary and secondary nodes of the instance.
3615

3616
    """
3617
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3618
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3619
    return env, nl, nl
3620

    
3621
  def CheckPrereq(self):
3622
    """Check prerequisites.
3623

3624
    This checks that the instance is in the cluster and is not running.
3625

3626
    """
3627
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3628
    assert instance is not None, \
3629
      "Cannot retrieve locked instance %s" % self.op.instance_name
3630
    _CheckNodeOnline(self, instance.primary_node)
3631

    
3632
    if instance.disk_template == constants.DT_DISKLESS:
3633
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3634
                                 self.op.instance_name)
3635
    if instance.admin_up:
3636
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3637
                                 self.op.instance_name)
3638
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3639
                                              instance.name,
3640
                                              instance.hypervisor)
3641
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3642
                      prereq=True)
3643
    if remote_info.payload:
3644
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3645
                                 (self.op.instance_name,
3646
                                  instance.primary_node))
3647

    
3648
    self.op.os_type = getattr(self.op, "os_type", None)
3649
    if self.op.os_type is not None:
3650
      # OS verification
3651
      pnode = self.cfg.GetNodeInfo(
3652
        self.cfg.ExpandNodeName(instance.primary_node))
3653
      if pnode is None:
3654
        raise errors.OpPrereqError("Primary node '%s' is unknown" %
3655
                                   self.op.pnode)
3656
      result = self.rpc.call_os_get(pnode.name, self.op.os_type)
3657
      result.Raise("OS '%s' not in supported OS list for primary node %s" %
3658
                   (self.op.os_type, pnode.name), prereq=True)
3659

    
3660
    self.instance = instance
3661

    
3662
  def Exec(self, feedback_fn):
3663
    """Reinstall the instance.
3664

3665
    """
3666
    inst = self.instance
3667

    
3668
    if self.op.os_type is not None:
3669
      feedback_fn("Changing OS to '%s'..." % self.op.os_type)
3670
      inst.os = self.op.os_type
3671
      self.cfg.Update(inst)
3672

    
3673
    _StartInstanceDisks(self, inst, None)
3674
    try:
3675
      feedback_fn("Running the instance OS create scripts...")
3676
      result = self.rpc.call_instance_os_add(inst.primary_node, inst, True)
3677
      result.Raise("Could not install OS for instance %s on node %s" %
3678
                   (inst.name, inst.primary_node))
3679
    finally:
3680
      _ShutdownInstanceDisks(self, inst)
3681

    
3682

    
3683
class LURecreateInstanceDisks(LogicalUnit):
3684
  """Recreate an instance's missing disks.
3685

3686
  """
3687
  HPATH = "instance-recreate-disks"
3688
  HTYPE = constants.HTYPE_INSTANCE
3689
  _OP_REQP = ["instance_name", "disks"]
3690
  REQ_BGL = False
3691

    
3692
  def CheckArguments(self):
3693
    """Check the arguments.
3694

3695
    """
3696
    if not isinstance(self.op.disks, list):
3697
      raise errors.OpPrereqError("Invalid disks parameter")
3698
    for item in self.op.disks:
3699
      if (not isinstance(item, int) or
3700
          item < 0):
3701
        raise errors.OpPrereqError("Invalid disk specification '%s'" %
3702
                                   str(item))
3703

    
3704
  def ExpandNames(self):
3705
    self._ExpandAndLockInstance()
3706

    
3707
  def BuildHooksEnv(self):
3708
    """Build hooks env.
3709

3710
    This runs on master, primary and secondary nodes of the instance.
3711

3712
    """
3713
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3714
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3715
    return env, nl, nl
3716

    
3717
  def CheckPrereq(self):
3718
    """Check prerequisites.
3719

3720
    This checks that the instance is in the cluster and is not running.
3721

3722
    """
3723
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3724
    assert instance is not None, \
3725
      "Cannot retrieve locked instance %s" % self.op.instance_name
3726
    _CheckNodeOnline(self, instance.primary_node)
3727

    
3728
    if instance.disk_template == constants.DT_DISKLESS:
3729
      raise errors.OpPrereqError("Instance '%s' has no disks" %
3730
                                 self.op.instance_name)
3731
    if instance.admin_up:
3732
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3733
                                 self.op.instance_name)
3734
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3735
                                              instance.name,
3736
                                              instance.hypervisor)
3737
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3738
                      prereq=True)
3739
    if remote_info.payload:
3740
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3741
                                 (self.op.instance_name,
3742
                                  instance.primary_node))
3743

    
3744
    if not self.op.disks:
3745
      self.op.disks = range(len(instance.disks))
3746
    else:
3747
      for idx in self.op.disks:
3748
        if idx >= len(instance.disks):
3749
          raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx)
3750

    
3751
    self.instance = instance
3752

    
3753
  def Exec(self, feedback_fn):
3754
    """Recreate the disks.
3755

3756
    """
3757
    to_skip = []
3758
    for idx, disk in enumerate(self.instance.disks):
3759
      if idx not in self.op.disks: # disk idx has not been passed in
3760
        to_skip.append(idx)
3761
        continue
3762

    
3763
    _CreateDisks(self, self.instance, to_skip=to_skip)
3764

    
3765

    
3766
class LURenameInstance(LogicalUnit):
3767
  """Rename an instance.
3768

3769
  """
3770
  HPATH = "instance-rename"
3771
  HTYPE = constants.HTYPE_INSTANCE
3772
  _OP_REQP = ["instance_name", "new_name"]
3773

    
3774
  def BuildHooksEnv(self):
3775
    """Build hooks env.
3776

3777
    This runs on master, primary and secondary nodes of the instance.
3778

3779
    """
3780
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3781
    env["INSTANCE_NEW_NAME"] = self.op.new_name
3782
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
3783
    return env, nl, nl
3784

    
3785
  def CheckPrereq(self):
3786
    """Check prerequisites.
3787

3788
    This checks that the instance is in the cluster and is not running.
3789

3790
    """
3791
    instance = self.cfg.GetInstanceInfo(
3792
      self.cfg.ExpandInstanceName(self.op.instance_name))
3793
    if instance is None:
3794
      raise errors.OpPrereqError("Instance '%s' not known" %
3795
                                 self.op.instance_name)
3796
    _CheckNodeOnline(self, instance.primary_node)
3797

    
3798
    if instance.admin_up:
3799
      raise errors.OpPrereqError("Instance '%s' is marked to be up" %
3800
                                 self.op.instance_name)
3801
    remote_info = self.rpc.call_instance_info(instance.primary_node,
3802
                                              instance.name,
3803
                                              instance.hypervisor)
3804
    remote_info.Raise("Error checking node %s" % instance.primary_node,
3805
                      prereq=True)
3806
    if remote_info.payload:
3807
      raise errors.OpPrereqError("Instance '%s' is running on the node %s" %
3808
                                 (self.op.instance_name,
3809
                                  instance.primary_node))
3810
    self.instance = instance
3811

    
3812
    # new name verification
3813
    name_info = utils.HostInfo(self.op.new_name)
3814

    
3815
    self.op.new_name = new_name = name_info.name
3816
    instance_list = self.cfg.GetInstanceList()
3817
    if new_name in instance_list:
3818
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
3819
                                 new_name)
3820

    
3821
    if not getattr(self.op, "ignore_ip", False):
3822
      if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
3823
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
3824
                                   (name_info.ip, new_name))
3825

    
3826

    
3827
  def Exec(self, feedback_fn):
3828
    """Reinstall the instance.
3829

3830
    """
3831
    inst = self.instance
3832
    old_name = inst.name
3833

    
3834
    if inst.disk_template == constants.DT_FILE:
3835
      old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3836

    
3837
    self.cfg.RenameInstance(inst.name, self.op.new_name)
3838
    # Change the instance lock. This is definitely safe while we hold the BGL
3839
    self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
3840
    self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
3841

    
3842
    # re-read the instance from the configuration after rename
3843
    inst = self.cfg.GetInstanceInfo(self.op.new_name)
3844

    
3845
    if inst.disk_template == constants.DT_FILE:
3846
      new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
3847
      result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
3848
                                                     old_file_storage_dir,
3849
                                                     new_file_storage_dir)
3850
      result.Raise("Could not rename on node %s directory '%s' to '%s'"
3851
                   " (but the instance has been renamed in Ganeti)" %
3852
                   (inst.primary_node, old_file_storage_dir,
3853
                    new_file_storage_dir))
3854

    
3855
    _StartInstanceDisks(self, inst, None)
3856
    try:
3857
      result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
3858
                                                 old_name)
3859
      msg = result.fail_msg
3860
      if msg:
3861
        msg = ("Could not run OS rename script for instance %s on node %s"
3862
               " (but the instance has been renamed in Ganeti): %s" %
3863
               (inst.name, inst.primary_node, msg))
3864
        self.proc.LogWarning(msg)
3865
    finally:
3866
      _ShutdownInstanceDisks(self, inst)
3867

    
3868

    
3869
class LURemoveInstance(LogicalUnit):
3870
  """Remove an instance.
3871

3872
  """
3873
  HPATH = "instance-remove"
3874
  HTYPE = constants.HTYPE_INSTANCE
3875
  _OP_REQP = ["instance_name", "ignore_failures"]
3876
  REQ_BGL = False
3877

    
3878
  def ExpandNames(self):
3879
    self._ExpandAndLockInstance()
3880
    self.needed_locks[locking.LEVEL_NODE] = []
3881
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3882

    
3883
  def DeclareLocks(self, level):
3884
    if level == locking.LEVEL_NODE:
3885
      self._LockInstancesNodes()
3886

    
3887
  def BuildHooksEnv(self):
3888
    """Build hooks env.
3889

3890
    This runs on master, primary and secondary nodes of the instance.
3891

3892
    """
3893
    env = _BuildInstanceHookEnvByObject(self, self.instance)
3894
    nl = [self.cfg.GetMasterNode()]
3895
    return env, nl, nl
3896

    
3897
  def CheckPrereq(self):
3898
    """Check prerequisites.
3899

3900
    This checks that the instance is in the cluster.
3901

3902
    """
3903
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3904
    assert self.instance is not None, \
3905
      "Cannot retrieve locked instance %s" % self.op.instance_name
3906

    
3907
  def Exec(self, feedback_fn):
3908
    """Remove the instance.
3909

3910
    """
3911
    instance = self.instance
3912
    logging.info("Shutting down instance %s on node %s",
3913
                 instance.name, instance.primary_node)
3914

    
3915
    result = self.rpc.call_instance_shutdown(instance.primary_node, instance)
3916
    msg = result.fail_msg
3917
    if msg:
3918
      if self.op.ignore_failures:
3919
        feedback_fn("Warning: can't shutdown instance: %s" % msg)
3920
      else:
3921
        raise errors.OpExecError("Could not shutdown instance %s on"
3922
                                 " node %s: %s" %
3923
                                 (instance.name, instance.primary_node, msg))
3924

    
3925
    logging.info("Removing block devices for instance %s", instance.name)
3926

    
3927
    if not _RemoveDisks(self, instance):
3928
      if self.op.ignore_failures:
3929
        feedback_fn("Warning: can't remove instance's disks")
3930
      else:
3931
        raise errors.OpExecError("Can't remove instance's disks")
3932

    
3933
    logging.info("Removing instance %s out of cluster config", instance.name)
3934

    
3935
    self.cfg.RemoveInstance(instance.name)
3936
    self.remove_locks[locking.LEVEL_INSTANCE] = instance.name
3937

    
3938

    
3939
class LUQueryInstances(NoHooksLU):
3940
  """Logical unit for querying instances.
3941

3942
  """
3943
  _OP_REQP = ["output_fields", "names", "use_locking"]
3944
  REQ_BGL = False
3945
  _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
3946
                                    "admin_state",
3947
                                    "disk_template", "ip", "mac", "bridge",
3948
                                    "nic_mode", "nic_link",
3949
                                    "sda_size", "sdb_size", "vcpus", "tags",
3950
                                    "network_port", "beparams",
3951
                                    r"(disk)\.(size)/([0-9]+)",
3952
                                    r"(disk)\.(sizes)", "disk_usage",
3953
                                    r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
3954
                                    r"(nic)\.(bridge)/([0-9]+)",
3955
                                    r"(nic)\.(macs|ips|modes|links|bridges)",
3956
                                    r"(disk|nic)\.(count)",
3957
                                    "serial_no", "hypervisor", "hvparams",
3958
                                    "ctime", "mtime",
3959
                                    ] +
3960
                                  ["hv/%s" % name
3961
                                   for name in constants.HVS_PARAMETERS] +
3962
                                  ["be/%s" % name
3963
                                   for name in constants.BES_PARAMETERS])
3964
  _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
3965

    
3966

    
3967
  def ExpandNames(self):
3968
    _CheckOutputFields(static=self._FIELDS_STATIC,
3969
                       dynamic=self._FIELDS_DYNAMIC,
3970
                       selected=self.op.output_fields)
3971

    
3972
    self.needed_locks = {}
3973
    self.share_locks[locking.LEVEL_INSTANCE] = 1
3974
    self.share_locks[locking.LEVEL_NODE] = 1
3975

    
3976
    if self.op.names:
3977
      self.wanted = _GetWantedInstances(self, self.op.names)
3978
    else:
3979
      self.wanted = locking.ALL_SET
3980

    
3981
    self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3982
    self.do_locking = self.do_node_query and self.op.use_locking
3983
    if self.do_locking:
3984
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
3985
      self.needed_locks[locking.LEVEL_NODE] = []
3986
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3987

    
3988
  def DeclareLocks(self, level):
3989
    if level == locking.LEVEL_NODE and self.do_locking:
3990
      self._LockInstancesNodes()
3991

    
3992
  def CheckPrereq(self):
3993
    """Check prerequisites.
3994

3995
    """
3996
    pass
3997

    
3998
  def Exec(self, feedback_fn):
3999
    """Computes the list of nodes and their attributes.
4000

4001
    """
4002
    all_info = self.cfg.GetAllInstancesInfo()
4003
    if self.wanted == locking.ALL_SET:
4004
      # caller didn't specify instance names, so ordering is not important
4005
      if self.do_locking:
4006
        instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4007
      else:
4008
        instance_names = all_info.keys()
4009
      instance_names = utils.NiceSort(instance_names)
4010
    else:
4011
      # caller did specify names, so we must keep the ordering
4012
      if self.do_locking:
4013
        tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4014
      else:
4015
        tgt_set = all_info.keys()
4016
      missing = set(self.wanted).difference(tgt_set)
4017
      if missing:
4018
        raise errors.OpExecError("Some instances were removed before"
4019
                                 " retrieving their data: %s" % missing)
4020
      instance_names = self.wanted
4021

    
4022
    instance_list = [all_info[iname] for iname in instance_names]
4023

    
4024
    # begin data gathering
4025

    
4026
    nodes = frozenset([inst.primary_node for inst in instance_list])
4027
    hv_list = list(set([inst.hypervisor for inst in instance_list]))
4028

    
4029
    bad_nodes = []
4030
    off_nodes = []
4031
    if self.do_node_query:
4032
      live_data = {}
4033
      node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4034
      for name in nodes:
4035
        result = node_data[name]
4036
        if result.offline:
4037
          # offline nodes will be in both lists
4038
          off_nodes.append(name)
4039
        if result.failed or result.fail_msg:
4040
          bad_nodes.append(name)
4041
        else:
4042
          if result.payload:
4043
            live_data.update(result.payload)
4044
          # else no instance is alive
4045
    else:
4046
      live_data = dict([(name, {}) for name in instance_names])
4047

    
4048
    # end data gathering
4049

    
4050
    HVPREFIX = "hv/"
4051
    BEPREFIX = "be/"
4052
    output = []
4053
    cluster = self.cfg.GetClusterInfo()
4054
    for instance in instance_list:
4055
      iout = []
4056
      i_hv = cluster.FillHV(instance)
4057
      i_be = cluster.FillBE(instance)
4058
      i_nicp = [objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
4059
                                 nic.nicparams) for nic in instance.nics]
4060
      for field in self.op.output_fields:
4061
        st_match = self._FIELDS_STATIC.Matches(field)
4062
        if field == "name":
4063
          val = instance.name
4064
        elif field == "os":
4065
          val = instance.os
4066
        elif field == "pnode":
4067
          val = instance.primary_node
4068
        elif field == "snodes":
4069
          val = list(instance.secondary_nodes)
4070
        elif field == "admin_state":
4071
          val = instance.admin_up
4072
        elif field == "oper_state":
4073
          if instance.primary_node in bad_nodes:
4074
            val = None
4075
          else:
4076
            val = bool(live_data.get(instance.name))
4077
        elif field == "status":
4078
          if instance.primary_node in off_nodes:
4079
            val = "ERROR_nodeoffline"
4080
          elif instance.primary_node in bad_nodes:
4081
            val = "ERROR_nodedown"
4082
          else:
4083
            running = bool(live_data.get(instance.name))
4084
            if running:
4085
              if instance.admin_up:
4086
                val = "running"
4087
              else:
4088
                val = "ERROR_up"
4089
            else:
4090
              if instance.admin_up:
4091
                val = "ERROR_down"
4092
              else:
4093
                val = "ADMIN_down"
4094
        elif field == "oper_ram":
4095
          if instance.primary_node in bad_nodes:
4096
            val = None
4097
          elif instance.name in live_data:
4098
            val = live_data[instance.name].get("memory", "?")
4099
          else:
4100
            val = "-"
4101
        elif field == "vcpus":
4102
          val = i_be[constants.BE_VCPUS]
4103
        elif field == "disk_template":
4104
          val = instance.disk_template
4105
        elif field == "ip":
4106
          if instance.nics:
4107
            val = instance.nics[0].ip
4108
          else:
4109
            val = None
4110
        elif field == "nic_mode":
4111
          if instance.nics:
4112
            val = i_nicp[0][constants.NIC_MODE]
4113
          else:
4114
            val = None
4115
        elif field == "nic_link":
4116
          if instance.nics:
4117
            val = i_nicp[0][constants.NIC_LINK]
4118
          else:
4119
            val = None
4120
        elif field == "bridge":
4121
          if (instance.nics and
4122
              i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4123
            val = i_nicp[0][constants.NIC_LINK]
4124
          else:
4125
            val = None
4126
        elif field == "mac":
4127
          if instance.nics:
4128
            val = instance.nics[0].mac
4129
          else:
4130
            val = None
4131
        elif field == "sda_size" or field == "sdb_size":
4132
          idx = ord(field[2]) - ord('a')
4133
          try:
4134
            val = instance.FindDisk(idx).size
4135
          except errors.OpPrereqError:
4136
            val = None
4137
        elif field == "disk_usage": # total disk usage per node
4138
          disk_sizes = [{'size': disk.size} for disk in instance.disks]
4139
          val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4140
        elif field == "tags":
4141
          val = list(instance.GetTags())
4142
        elif field == "serial_no":
4143
          val = instance.serial_no
4144
        elif field == "ctime":
4145
          val = instance.ctime
4146
        elif field == "mtime":
4147
          val = instance.mtime
4148
        elif field == "network_port":
4149
          val = instance.network_port
4150
        elif field == "hypervisor":
4151
          val = instance.hypervisor
4152
        elif field == "hvparams":
4153
          val = i_hv
4154
        elif (field.startswith(HVPREFIX) and
4155
              field[len(HVPREFIX):] in constants.HVS_PARAMETERS):
4156
          val = i_hv.get(field[len(HVPREFIX):], None)
4157
        elif field == "beparams":
4158
          val = i_be
4159
        elif (field.startswith(BEPREFIX) and
4160
              field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4161
          val = i_be.get(field[len(BEPREFIX):], None)
4162
        elif st_match and st_match.groups():
4163
          # matches a variable list
4164
          st_groups = st_match.groups()
4165
          if st_groups and st_groups[0] == "disk":
4166
            if st_groups[1] == "count":
4167
              val = len(instance.disks)
4168
            elif st_groups[1] == "sizes":
4169
              val = [disk.size for disk in instance.disks]
4170
            elif st_groups[1] == "size":
4171
              try:
4172
                val = instance.FindDisk(st_groups[2]).size
4173
              except errors.OpPrereqError:
4174
                val = None
4175
            else:
4176
              assert False, "Unhandled disk parameter"
4177
          elif st_groups[0] == "nic":
4178
            if st_groups[1] == "count":
4179
              val = len(instance.nics)
4180
            elif st_groups[1] == "macs":
4181
              val = [nic.mac for nic in instance.nics]
4182
            elif st_groups[1] == "ips":
4183
              val = [nic.ip for nic in instance.nics]
4184
            elif st_groups[1] == "modes":
4185
              val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
4186
            elif st_groups[1] == "links":
4187
              val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
4188
            elif st_groups[1] == "bridges":
4189
              val = []
4190
              for nicp in i_nicp:
4191
                if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
4192
                  val.append(nicp[constants.NIC_LINK])
4193
                else:
4194
                  val.append(None)
4195
            else:
4196
              # index-based item
4197
              nic_idx = int(st_groups[2])
4198
              if nic_idx >= len(instance.nics):
4199
                val = None
4200
              else:
4201
                if st_groups[1] == "mac":
4202
                  val = instance.nics[nic_idx].mac
4203
                elif st_groups[1] == "ip":
4204
                  val = instance.nics[nic_idx].ip
4205
                elif st_groups[1] == "mode":
4206
                  val = i_nicp[nic_idx][constants.NIC_MODE]
4207
                elif st_groups[1] == "link":
4208
                  val = i_nicp[nic_idx][constants.NIC_LINK]
4209
                elif st_groups[1] == "bridge":
4210
                  nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
4211
                  if nic_mode == constants.NIC_MODE_BRIDGED:
4212
                    val = i_nicp[nic_idx][constants.NIC_LINK]
4213
                  else:
4214
                    val = None
4215
                else:
4216
                  assert False, "Unhandled NIC parameter"
4217
          else:
4218
            assert False, ("Declared but unhandled variable parameter '%s'" %
4219
                           field)
4220
        else:
4221
          assert False, "Declared but unhandled parameter '%s'" % field
4222
        iout.append(val)
4223
      output.append(iout)
4224

    
4225
    return output
4226

    
4227

    
4228
class LUFailoverInstance(LogicalUnit):
4229
  """Failover an instance.
4230

4231
  """
4232
  HPATH = "instance-failover"
4233
  HTYPE = constants.HTYPE_INSTANCE
4234
  _OP_REQP = ["instance_name", "ignore_consistency"]
4235
  REQ_BGL = False
4236

    
4237
  def ExpandNames(self):
4238
    self._ExpandAndLockInstance()
4239
    self.needed_locks[locking.LEVEL_NODE] = []
4240
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4241

    
4242
  def DeclareLocks(self, level):
4243
    if level == locking.LEVEL_NODE:
4244
      self._LockInstancesNodes()
4245

    
4246
  def BuildHooksEnv(self):
4247
    """Build hooks env.
4248

4249
    This runs on master, primary and secondary nodes of the instance.
4250

4251
    """
4252
    env = {
4253
      "IGNORE_CONSISTENCY": self.op.ignore_consistency,
4254
      }
4255
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4256
    nl = [self.cfg.GetMasterNode()] + list(self.instance.secondary_nodes)
4257
    return env, nl, nl
4258

    
4259
  def CheckPrereq(self):
4260
    """Check prerequisites.
4261

4262
    This checks that the instance is in the cluster.
4263

4264
    """
4265
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4266
    assert self.instance is not None, \
4267
      "Cannot retrieve locked instance %s" % self.op.instance_name
4268

    
4269
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4270
    if instance.disk_template not in constants.DTS_NET_MIRROR:
4271
      raise errors.OpPrereqError("Instance's disk layout is not"
4272
                                 " network mirrored, cannot failover.")
4273

    
4274
    secondary_nodes = instance.secondary_nodes
4275
    if not secondary_nodes:
4276
      raise errors.ProgrammerError("no secondary node but using "
4277
                                   "a mirrored disk template")
4278

    
4279
    target_node = secondary_nodes[0]
4280
    _CheckNodeOnline(self, target_node)
4281
    _CheckNodeNotDrained(self, target_node)
4282
    if instance.admin_up:
4283
      # check memory requirements on the secondary node
4284
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4285
                           instance.name, bep[constants.BE_MEMORY],
4286
                           instance.hypervisor)
4287
    else:
4288
      self.LogInfo("Not checking memory on the secondary node as"
4289
                   " instance will not be started")
4290

    
4291
    # check bridge existance
4292
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4293

    
4294
  def Exec(self, feedback_fn):
4295
    """Failover an instance.
4296

4297
    The failover is done by shutting it down on its present node and
4298
    starting it on the secondary.
4299

4300
    """
4301
    instance = self.instance
4302

    
4303
    source_node = instance.primary_node
4304
    target_node = instance.secondary_nodes[0]
4305

    
4306
    feedback_fn("* checking disk consistency between source and target")
4307
    for dev in instance.disks:
4308
      # for drbd, these are drbd over lvm
4309
      if not _CheckDiskConsistency(self, dev, target_node, False):
4310
        if instance.admin_up and not self.op.ignore_consistency:
4311
          raise errors.OpExecError("Disk %s is degraded on target node,"
4312
                                   " aborting failover." % dev.iv_name)
4313

    
4314
    feedback_fn("* shutting down instance on source node")
4315
    logging.info("Shutting down instance %s on node %s",
4316
                 instance.name, source_node)
4317

    
4318
    result = self.rpc.call_instance_shutdown(source_node, instance)
4319
    msg = result.fail_msg
4320
    if msg:
4321
      if self.op.ignore_consistency:
4322
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4323
                             " Proceeding anyway. Please make sure node"
4324
                             " %s is down. Error details: %s",
4325
                             instance.name, source_node, source_node, msg)
4326
      else:
4327
        raise errors.OpExecError("Could not shutdown instance %s on"
4328
                                 " node %s: %s" %
4329
                                 (instance.name, source_node, msg))
4330

    
4331
    feedback_fn("* deactivating the instance's disks on source node")
4332
    if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
4333
      raise errors.OpExecError("Can't shut down the instance's disks.")
4334

    
4335
    instance.primary_node = target_node
4336
    # distribute new instance config to the other nodes
4337
    self.cfg.Update(instance)
4338

    
4339
    # Only start the instance if it's marked as up
4340
    if instance.admin_up:
4341
      feedback_fn("* activating the instance's disks on target node")
4342
      logging.info("Starting instance %s on node %s",
4343
                   instance.name, target_node)
4344

    
4345
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4346
                                               ignore_secondaries=True)
4347
      if not disks_ok:
4348
        _ShutdownInstanceDisks(self, instance)
4349
        raise errors.OpExecError("Can't activate the instance's disks")
4350

    
4351
      feedback_fn("* starting the instance on the target node")
4352
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4353
      msg = result.fail_msg
4354
      if msg:
4355
        _ShutdownInstanceDisks(self, instance)
4356
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4357
                                 (instance.name, target_node, msg))
4358

    
4359

    
4360
class LUMigrateInstance(LogicalUnit):
4361
  """Migrate an instance.
4362

4363
  This is migration without shutting down, compared to the failover,
4364
  which is done with shutdown.
4365

4366
  """
4367
  HPATH = "instance-migrate"
4368
  HTYPE = constants.HTYPE_INSTANCE
4369
  _OP_REQP = ["instance_name", "live", "cleanup"]
4370

    
4371
  REQ_BGL = False
4372

    
4373
  def ExpandNames(self):
4374
    self._ExpandAndLockInstance()
4375

    
4376
    self.needed_locks[locking.LEVEL_NODE] = []
4377
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4378

    
4379
    self._migrater = TLMigrateInstance(self, self.op.instance_name,
4380
                                       self.op.live, self.op.cleanup)
4381
    self.tasklets = [self._migrater]
4382

    
4383
  def DeclareLocks(self, level):
4384
    if level == locking.LEVEL_NODE:
4385
      self._LockInstancesNodes()
4386

    
4387
  def BuildHooksEnv(self):
4388
    """Build hooks env.
4389

4390
    This runs on master, primary and secondary nodes of the instance.
4391

4392
    """
4393
    instance = self._migrater.instance
4394
    env = _BuildInstanceHookEnvByObject(self, instance)
4395
    env["MIGRATE_LIVE"] = self.op.live
4396
    env["MIGRATE_CLEANUP"] = self.op.cleanup
4397
    nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
4398
    return env, nl, nl
4399

    
4400

    
4401
class LUMoveInstance(LogicalUnit):
4402
  """Move an instance by data-copying.
4403

4404
  """
4405
  HPATH = "instance-move"
4406
  HTYPE = constants.HTYPE_INSTANCE
4407
  _OP_REQP = ["instance_name", "target_node"]
4408
  REQ_BGL = False
4409

    
4410
  def ExpandNames(self):
4411
    self._ExpandAndLockInstance()
4412
    target_node = self.cfg.ExpandNodeName(self.op.target_node)
4413
    if target_node is None:
4414
      raise errors.OpPrereqError("Node '%s' not known" %
4415
                                  self.op.target_node)
4416
    self.op.target_node = target_node
4417
    self.needed_locks[locking.LEVEL_NODE] = [target_node]
4418
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4419

    
4420
  def DeclareLocks(self, level):
4421
    if level == locking.LEVEL_NODE:
4422
      self._LockInstancesNodes(primary_only=True)
4423

    
4424
  def BuildHooksEnv(self):
4425
    """Build hooks env.
4426

4427
    This runs on master, primary and secondary nodes of the instance.
4428

4429
    """
4430
    env = {
4431
      "TARGET_NODE": self.op.target_node,
4432
      }
4433
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4434
    nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
4435
                                       self.op.target_node]
4436
    return env, nl, nl
4437

    
4438
  def CheckPrereq(self):
4439
    """Check prerequisites.
4440

4441
    This checks that the instance is in the cluster.
4442

4443
    """
4444
    self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4445
    assert self.instance is not None, \
4446
      "Cannot retrieve locked instance %s" % self.op.instance_name
4447

    
4448
    node = self.cfg.GetNodeInfo(self.op.target_node)
4449
    assert node is not None, \
4450
      "Cannot retrieve locked node %s" % self.op.target_node
4451

    
4452
    self.target_node = target_node = node.name
4453

    
4454
    if target_node == instance.primary_node:
4455
      raise errors.OpPrereqError("Instance %s is already on the node %s" %
4456
                                 (instance.name, target_node))
4457

    
4458
    bep = self.cfg.GetClusterInfo().FillBE(instance)
4459

    
4460
    for idx, dsk in enumerate(instance.disks):
4461
      if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
4462
        raise errors.OpPrereqError("Instance disk %d has a complex layout,"
4463
                                   " cannot copy")
4464

    
4465
    _CheckNodeOnline(self, target_node)
4466
    _CheckNodeNotDrained(self, target_node)
4467

    
4468
    if instance.admin_up:
4469
      # check memory requirements on the secondary node
4470
      _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
4471
                           instance.name, bep[constants.BE_MEMORY],
4472
                           instance.hypervisor)
4473
    else:
4474
      self.LogInfo("Not checking memory on the secondary node as"
4475
                   " instance will not be started")
4476

    
4477
    # check bridge existance
4478
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4479

    
4480
  def Exec(self, feedback_fn):
4481
    """Move an instance.
4482

4483
    The move is done by shutting it down on its present node, copying
4484
    the data over (slow) and starting it on the new node.
4485

4486
    """
4487
    instance = self.instance
4488

    
4489
    source_node = instance.primary_node
4490
    target_node = self.target_node
4491

    
4492
    self.LogInfo("Shutting down instance %s on source node %s",
4493
                 instance.name, source_node)
4494

    
4495
    result = self.rpc.call_instance_shutdown(source_node, instance)
4496
    msg = result.fail_msg
4497
    if msg:
4498
      if self.op.ignore_consistency:
4499
        self.proc.LogWarning("Could not shutdown instance %s on node %s."
4500
                             " Proceeding anyway. Please make sure node"
4501
                             " %s is down. Error details: %s",
4502
                             instance.name, source_node, source_node, msg)
4503
      else:
4504
        raise errors.OpExecError("Could not shutdown instance %s on"
4505
                                 " node %s: %s" %
4506
                                 (instance.name, source_node, msg))
4507

    
4508
    # create the target disks
4509
    try:
4510
      _CreateDisks(self, instance, target_node=target_node)
4511
    except errors.OpExecError:
4512
      self.LogWarning("Device creation failed, reverting...")
4513
      try:
4514
        _RemoveDisks(self, instance, target_node=target_node)
4515
      finally:
4516
        self.cfg.ReleaseDRBDMinors(instance.name)
4517
        raise
4518

    
4519
    cluster_name = self.cfg.GetClusterInfo().cluster_name
4520

    
4521
    errs = []
4522
    # activate, get path, copy the data over
4523
    for idx, disk in enumerate(instance.disks):
4524
      self.LogInfo("Copying data for disk %d", idx)
4525
      result = self.rpc.call_blockdev_assemble(target_node, disk,
4526
                                               instance.name, True)
4527
      if result.fail_msg:
4528
        self.LogWarning("Can't assemble newly created disk %d: %s",
4529
                        idx, result.fail_msg)
4530
        errs.append(result.fail_msg)
4531
        break
4532
      dev_path = result.payload
4533
      result = self.rpc.call_blockdev_export(source_node, disk,
4534
                                             target_node, dev_path,
4535
                                             cluster_name)
4536
      if result.fail_msg:
4537
        self.LogWarning("Can't copy data over for disk %d: %s",
4538
                        idx, result.fail_msg)
4539
        errs.append(result.fail_msg)
4540
        break
4541

    
4542
    if errs:
4543
      self.LogWarning("Some disks failed to copy, aborting")
4544
      try:
4545
        _RemoveDisks(self, instance, target_node=target_node)
4546
      finally:
4547
        self.cfg.ReleaseDRBDMinors(instance.name)
4548
        raise errors.OpExecError("Errors during disk copy: %s" %
4549
                                 (",".join(errs),))
4550

    
4551
    instance.primary_node = target_node
4552
    self.cfg.Update(instance)
4553

    
4554
    self.LogInfo("Removing the disks on the original node")
4555
    _RemoveDisks(self, instance, target_node=source_node)
4556

    
4557
    # Only start the instance if it's marked as up
4558
    if instance.admin_up:
4559
      self.LogInfo("Starting instance %s on node %s",
4560
                   instance.name, target_node)
4561

    
4562
      disks_ok, _ = _AssembleInstanceDisks(self, instance,
4563
                                           ignore_secondaries=True)
4564
      if not disks_ok:
4565
        _ShutdownInstanceDisks(self, instance)
4566
        raise errors.OpExecError("Can't activate the instance's disks")
4567

    
4568
      result = self.rpc.call_instance_start(target_node, instance, None, None)
4569
      msg = result.fail_msg
4570
      if msg:
4571
        _ShutdownInstanceDisks(self, instance)
4572
        raise errors.OpExecError("Could not start instance %s on node %s: %s" %
4573
                                 (instance.name, target_node, msg))
4574

    
4575

    
4576
class LUMigrateNode(LogicalUnit):
4577
  """Migrate all instances from a node.
4578

4579
  """
4580
  HPATH = "node-migrate"
4581
  HTYPE = constants.HTYPE_NODE
4582
  _OP_REQP = ["node_name", "live"]
4583
  REQ_BGL = False
4584

    
4585
  def ExpandNames(self):
4586
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
4587
    if self.op.node_name is None:
4588
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
4589

    
4590
    self.needed_locks = {
4591
      locking.LEVEL_NODE: [self.op.node_name],
4592
      }
4593

    
4594
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
4595

    
4596
    # Create tasklets for migrating instances for all instances on this node
4597
    names = []
4598
    tasklets = []
4599

    
4600
    for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
4601
      logging.debug("Migrating instance %s", inst.name)
4602
      names.append(inst.name)
4603

    
4604
      tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
4605

    
4606
    self.tasklets = tasklets
4607

    
4608
    # Declare instance locks
4609
    self.needed_locks[locking.LEVEL_INSTANCE] = names
4610

    
4611
  def DeclareLocks(self, level):
4612
    if level == locking.LEVEL_NODE:
4613
      self._LockInstancesNodes()
4614

    
4615
  def BuildHooksEnv(self):
4616
    """Build hooks env.
4617

4618
    This runs on the master, the primary and all the secondaries.
4619

4620
    """
4621
    env = {
4622
      "NODE_NAME": self.op.node_name,
4623
      }
4624

    
4625
    nl = [self.cfg.GetMasterNode()]
4626

    
4627
    return (env, nl, nl)
4628

    
4629

    
4630
class TLMigrateInstance(Tasklet):
4631
  def __init__(self, lu, instance_name, live, cleanup):
4632
    """Initializes this class.
4633

4634
    """
4635
    Tasklet.__init__(self, lu)
4636

    
4637
    # Parameters
4638
    self.instance_name = instance_name
4639
    self.live = live
4640
    self.cleanup = cleanup
4641

    
4642
  def CheckPrereq(self):
4643
    """Check prerequisites.
4644

4645
    This checks that the instance is in the cluster.
4646

4647
    """
4648
    instance = self.cfg.GetInstanceInfo(
4649
      self.cfg.ExpandInstanceName(self.instance_name))
4650
    if instance is None:
4651
      raise errors.OpPrereqError("Instance '%s' not known" %
4652
                                 self.instance_name)
4653

    
4654
    if instance.disk_template != constants.DT_DRBD8:
4655
      raise errors.OpPrereqError("Instance's disk layout is not"
4656
                                 " drbd8, cannot migrate.")
4657

    
4658
    secondary_nodes = instance.secondary_nodes
4659
    if not secondary_nodes:
4660
      raise errors.ConfigurationError("No secondary node but using"
4661
                                      " drbd8 disk template")
4662

    
4663
    i_be = self.cfg.GetClusterInfo().FillBE(instance)
4664

    
4665
    target_node = secondary_nodes[0]
4666
    # check memory requirements on the secondary node
4667
    _CheckNodeFreeMemory(self, target_node, "migrating instance %s" %
4668
                         instance.name, i_be[constants.BE_MEMORY],
4669
                         instance.hypervisor)
4670

    
4671
    # check bridge existance
4672
    _CheckInstanceBridgesExist(self, instance, node=target_node)
4673

    
4674
    if not self.cleanup:
4675
      _CheckNodeNotDrained(self, target_node)
4676
      result = self.rpc.call_instance_migratable(instance.primary_node,
4677
                                                 instance)
4678
      result.Raise("Can't migrate, please use failover", prereq=True)
4679

    
4680
    self.instance = instance
4681

    
4682
  def _WaitUntilSync(self):
4683
    """Poll with custom rpc for disk sync.
4684

4685
    This uses our own step-based rpc call.
4686

4687
    """
4688
    self.feedback_fn("* wait until resync is done")
4689
    all_done = False
4690
    while not all_done:
4691
      all_done = True
4692
      result = self.rpc.call_drbd_wait_sync(self.all_nodes,
4693
                                            self.nodes_ip,
4694
                                            self.instance.disks)
4695
      min_percent = 100
4696
      for node, nres in result.items():
4697
        nres.Raise("Cannot resync disks on node %s" % node)
4698
        node_done, node_percent = nres.payload
4699
        all_done = all_done and node_done
4700
        if node_percent is not None:
4701
          min_percent = min(min_percent, node_percent)
4702
      if not all_done:
4703
        if min_percent < 100:
4704
          self.feedback_fn("   - progress: %.1f%%" % min_percent)
4705
        time.sleep(2)
4706

    
4707
  def _EnsureSecondary(self, node):
4708
    """Demote a node to secondary.
4709

4710
    """
4711
    self.feedback_fn("* switching node %s to secondary mode" % node)
4712

    
4713
    for dev in self.instance.disks:
4714
      self.cfg.SetDiskID(dev, node)
4715

    
4716
    result = self.rpc.call_blockdev_close(node, self.instance.name,
4717
                                          self.instance.disks)
4718
    result.Raise("Cannot change disk to secondary on node %s" % node)
4719

    
4720
  def _GoStandalone(self):
4721
    """Disconnect from the network.
4722

4723
    """
4724
    self.feedback_fn("* changing into standalone mode")
4725
    result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
4726
                                               self.instance.disks)
4727
    for node, nres in result.items():
4728
      nres.Raise("Cannot disconnect disks node %s" % node)
4729

    
4730
  def _GoReconnect(self, multimaster):
4731
    """Reconnect to the network.
4732

4733
    """
4734
    if multimaster:
4735
      msg = "dual-master"
4736
    else:
4737
      msg = "single-master"
4738
    self.feedback_fn("* changing disks into %s mode" % msg)
4739
    result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
4740
                                           self.instance.disks,
4741
                                           self.instance.name, multimaster)
4742
    for node, nres in result.items():
4743
      nres.Raise("Cannot change disks config on node %s" % node)
4744

    
4745
  def _ExecCleanup(self):
4746
    """Try to cleanup after a failed migration.
4747

4748
    The cleanup is done by:
4749
      - check that the instance is running only on one node
4750
        (and update the config if needed)
4751
      - change disks on its secondary node to secondary
4752
      - wait until disks are fully synchronized
4753
      - disconnect from the network
4754
      - change disks into single-master mode
4755
      - wait again until disks are fully synchronized
4756

4757
    """
4758
    instance = self.instance
4759
    target_node = self.target_node
4760
    source_node = self.source_node
4761

    
4762
    # check running on only one node
4763
    self.feedback_fn("* checking where the instance actually runs"
4764
                     " (if this hangs, the hypervisor might be in"
4765
                     " a bad state)")
4766
    ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
4767
    for node, result in ins_l.items():
4768
      result.Raise("Can't contact node %s" % node)
4769

    
4770
    runningon_source = instance.name in ins_l[source_node].payload
4771
    runningon_target = instance.name in ins_l[target_node].payload
4772

    
4773
    if runningon_source and runningon_target:
4774
      raise errors.OpExecError("Instance seems to be running on two nodes,"
4775
                               " or the hypervisor is confused. You will have"
4776
                               " to ensure manually that it runs only on one"
4777
                               " and restart this operation.")
4778

    
4779
    if not (runningon_source or runningon_target):
4780
      raise errors.OpExecError("Instance does not seem to be running at all."
4781
                               " In this case, it's safer to repair by"
4782
                               " running 'gnt-instance stop' to ensure disk"
4783
                               " shutdown, and then restarting it.")
4784

    
4785
    if runningon_target:
4786
      # the migration has actually succeeded, we need to update the config
4787
      self.feedback_fn("* instance running on secondary node (%s),"
4788
                       " updating config" % target_node)
4789
      instance.primary_node = target_node
4790
      self.cfg.Update(instance)
4791
      demoted_node = source_node
4792
    else:
4793
      self.feedback_fn("* instance confirmed to be running on its"
4794
                       " primary node (%s)" % source_node)
4795
      demoted_node = target_node
4796

    
4797
    self._EnsureSecondary(demoted_node)
4798
    try:
4799
      self._WaitUntilSync()
4800
    except errors.OpExecError:
4801
      # we ignore here errors, since if the device is standalone, it
4802
      # won't be able to sync
4803
      pass
4804
    self._GoStandalone()
4805
    self._GoReconnect(False)
4806
    self._WaitUntilSync()
4807

    
4808
    self.feedback_fn("* done")
4809

    
4810
  def _RevertDiskStatus(self):
4811
    """Try to revert the disk status after a failed migration.
4812

4813
    """
4814
    target_node = self.target_node
4815
    try:
4816
      self._EnsureSecondary(target_node)
4817
      self._GoStandalone()
4818
      self._GoReconnect(False)
4819
      self._WaitUntilSync()
4820
    except errors.OpExecError, err:
4821
      self.lu.LogWarning("Migration failed and I can't reconnect the"
4822
                         " drives: error '%s'\n"
4823
                         "Please look and recover the instance status" %
4824
                         str(err))
4825

    
4826
  def _AbortMigration(self):
4827
    """Call the hypervisor code to abort a started migration.
4828

4829
    """
4830
    instance = self.instance
4831
    target_node = self.target_node
4832
    migration_info = self.migration_info
4833

    
4834
    abort_result = self.rpc.call_finalize_migration(target_node,
4835
                                                    instance,
4836
                                                    migration_info,
4837
                                                    False)
4838
    abort_msg = abort_result.fail_msg
4839
    if abort_msg:
4840
      logging.error("Aborting migration failed on target node %s: %s" %
4841
                    (target_node, abort_msg))
4842
      # Don't raise an exception here, as we stil have to try to revert the
4843
      # disk status, even if this step failed.
4844

    
4845
  def _ExecMigration(self):
4846
    """Migrate an instance.
4847

4848
    The migrate is done by:
4849
      - change the disks into dual-master mode
4850
      - wait until disks are fully synchronized again
4851
      - migrate the instance
4852
      - change disks on the new secondary node (the old primary) to secondary
4853
      - wait until disks are fully synchronized
4854
      - change disks into single-master mode
4855

4856
    """
4857
    instance = self.instance
4858
    target_node = self.target_node
4859
    source_node = self.source_node
4860

    
4861
    self.feedback_fn("* checking disk consistency between source and target")
4862
    for dev in instance.disks:
4863
      if not _CheckDiskConsistency(self, dev, target_node, False):
4864
        raise errors.OpExecError("Disk %s is degraded or not fully"
4865
                                 " synchronized on target node,"
4866
                                 " aborting migrate." % dev.iv_name)
4867

    
4868
    # First get the migration information from the remote node
4869
    result = self.rpc.call_migration_info(source_node, instance)
4870
    msg = result.fail_msg
4871
    if msg:
4872
      log_err = ("Failed fetching source migration information from %s: %s" %
4873
                 (source_node, msg))
4874
      logging.error(log_err)
4875
      raise errors.OpExecError(log_err)
4876

    
4877
    self.migration_info = migration_info = result.payload
4878

    
4879
    # Then switch the disks to master/master mode
4880
    self._EnsureSecondary(target_node)
4881
    self._GoStandalone()
4882
    self._GoReconnect(True)
4883
    self._WaitUntilSync()
4884

    
4885
    self.feedback_fn("* preparing %s to accept the instance" % target_node)
4886
    result = self.rpc.call_accept_instance(target_node,
4887
                                           instance,
4888
                                           migration_info,
4889
                                           self.nodes_ip[target_node])
4890

    
4891
    msg = result.fail_msg
4892
    if msg:
4893
      logging.error("Instance pre-migration failed, trying to revert"
4894
                    " disk status: %s", msg)
4895
      self._AbortMigration()
4896
      self._RevertDiskStatus()
4897
      raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
4898
                               (instance.name, msg))
4899

    
4900
    self.feedback_fn("* migrating instance to %s" % target_node)
4901
    time.sleep(10)
4902
    result = self.rpc.call_instance_migrate(source_node, instance,
4903
                                            self.nodes_ip[target_node],
4904
                                            self.live)
4905
    msg = result.fail_msg
4906
    if msg:
4907
      logging.error("Instance migration failed, trying to revert"
4908
                    " disk status: %s", msg)
4909
      self._AbortMigration()
4910
      self._RevertDiskStatus()
4911
      raise errors.OpExecError("Could not migrate instance %s: %s" %
4912
                               (instance.name, msg))
4913
    time.sleep(10)
4914

    
4915
    instance.primary_node = target_node
4916
    # distribute new instance config to the other nodes
4917
    self.cfg.Update(instance)
4918

    
4919
    result = self.rpc.call_finalize_migration(target_node,
4920
                                              instance,
4921
                                              migration_info,
4922
                                              True)
4923
    msg = result.fail_msg
4924
    if msg:
4925
      logging.error("Instance migration succeeded, but finalization failed:"
4926
                    " %s" % msg)
4927
      raise errors.OpExecError("Could not finalize instance migration: %s" %
4928
                               msg)
4929

    
4930
    self._EnsureSecondary(source_node)
4931
    self._WaitUntilSync()
4932
    self._GoStandalone()
4933
    self._GoReconnect(False)
4934
    self._WaitUntilSync()
4935

    
4936
    self.feedback_fn("* done")
4937

    
4938
  def Exec(self, feedback_fn):
4939
    """Perform the migration.
4940

4941
    """
4942
    feedback_fn("Migrating instance %s" % self.instance.name)
4943

    
4944
    self.feedback_fn = feedback_fn
4945

    
4946
    self.source_node = self.instance.primary_node
4947
    self.target_node = self.instance.secondary_nodes[0]
4948
    self.all_nodes = [self.source_node, self.target_node]
4949
    self.nodes_ip = {
4950
      self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
4951
      self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
4952
      }
4953

    
4954
    if self.cleanup:
4955
      return self._ExecCleanup()
4956
    else:
4957
      return self._ExecMigration()
4958

    
4959

    
4960
def _CreateBlockDev(lu, node, instance, device, force_create,
4961
                    info, force_open):
4962
  """Create a tree of block devices on a given node.
4963

4964
  If this device type has to be created on secondaries, create it and
4965
  all its children.
4966

4967
  If not, just recurse to children keeping the same 'force' value.
4968

4969
  @param lu: the lu on whose behalf we execute
4970
  @param node: the node on which to create the device
4971
  @type instance: L{objects.Instance}
4972
  @param instance: the instance which owns the device
4973
  @type device: L{objects.Disk}
4974
  @param device: the device to create
4975
  @type force_create: boolean
4976
  @param force_create: whether to force creation of this device; this
4977
      will be change to True whenever we find a device which has
4978
      CreateOnSecondary() attribute
4979
  @param info: the extra 'metadata' we should attach to the device
4980
      (this will be represented as a LVM tag)
4981
  @type force_open: boolean
4982
  @param force_open: this parameter will be passes to the
4983
      L{backend.BlockdevCreate} function where it specifies
4984
      whether we run on primary or not, and it affects both
4985
      the child assembly and the device own Open() execution
4986

4987
  """
4988
  if device.CreateOnSecondary():
4989
    force_create = True
4990

    
4991
  if device.children:
4992
    for child in device.children:
4993
      _CreateBlockDev(lu, node, instance, child, force_create,
4994
                      info, force_open)
4995

    
4996
  if not force_create:
4997
    return
4998

    
4999
  _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5000

    
5001

    
5002
def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5003
  """Create a single block device on a given node.
5004

5005
  This will not recurse over children of the device, so they must be
5006
  created in advance.
5007

5008
  @param lu: the lu on whose behalf we execute
5009
  @param node: the node on which to create the device
5010
  @type instance: L{objects.Instance}
5011
  @param instance: the instance which owns the device
5012
  @type device: L{objects.Disk}
5013
  @param device: the device to create
5014
  @param info: the extra 'metadata' we should attach to the device
5015
      (this will be represented as a LVM tag)
5016
  @type force_open: boolean
5017
  @param force_open: this parameter will be passes to the
5018
      L{backend.BlockdevCreate} function where it specifies
5019
      whether we run on primary or not, and it affects both
5020
      the child assembly and the device own Open() execution
5021

5022
  """
5023
  lu.cfg.SetDiskID(device, node)
5024
  result = lu.rpc.call_blockdev_create(node, device, device.size,
5025
                                       instance.name, force_open, info)
5026
  result.Raise("Can't create block device %s on"
5027
               " node %s for instance %s" % (device, node, instance.name))
5028
  if device.physical_id is None:
5029
    device.physical_id = result.payload
5030

    
5031

    
5032
def _GenerateUniqueNames(lu, exts):
5033
  """Generate a suitable LV name.
5034

5035
  This will generate a logical volume name for the given instance.
5036

5037
  """
5038
  results = []
5039
  for val in exts:
5040
    new_id = lu.cfg.GenerateUniqueID()
5041
    results.append("%s%s" % (new_id, val))
5042
  return results
5043

    
5044

    
5045
def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5046
                         p_minor, s_minor):
5047
  """Generate a drbd8 device complete with its children.
5048

5049
  """
5050
  port = lu.cfg.AllocatePort()
5051
  vgname = lu.cfg.GetVGName()
5052
  shared_secret = lu.cfg.GenerateDRBDSecret()
5053
  dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5054
                          logical_id=(vgname, names[0]))
5055
  dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5056
                          logical_id=(vgname, names[1]))
5057
  drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5058
                          logical_id=(primary, secondary, port,
5059
                                      p_minor, s_minor,
5060
                                      shared_secret),
5061
                          children=[dev_data, dev_meta],
5062
                          iv_name=iv_name)
5063
  return drbd_dev
5064

    
5065

    
5066
def _GenerateDiskTemplate(lu, template_name,
5067
                          instance_name, primary_node,
5068
                          secondary_nodes, disk_info,
5069
                          file_storage_dir, file_driver,
5070
                          base_index):
5071
  """Generate the entire disk layout for a given template type.
5072

5073
  """
5074
  #TODO: compute space requirements
5075

    
5076
  vgname = lu.cfg.GetVGName()
5077
  disk_count = len(disk_info)
5078
  disks = []
5079
  if template_name == constants.DT_DISKLESS:
5080
    pass
5081
  elif template_name == constants.DT_PLAIN:
5082
    if len(secondary_nodes) != 0:
5083
      raise errors.ProgrammerError("Wrong template configuration")
5084

    
5085
    names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5086
                                      for i in range(disk_count)])
5087
    for idx, disk in enumerate(disk_info):
5088
      disk_index = idx + base_index
5089
      disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5090
                              logical_id=(vgname, names[idx]),
5091
                              iv_name="disk/%d" % disk_index,
5092
                              mode=disk["mode"])
5093
      disks.append(disk_dev)
5094
  elif template_name == constants.DT_DRBD8:
5095
    if len(secondary_nodes) != 1:
5096
      raise errors.ProgrammerError("Wrong template configuration")
5097
    remote_node = secondary_nodes[0]
5098
    minors = lu.cfg.AllocateDRBDMinor(
5099
      [primary_node, remote_node] * len(disk_info), instance_name)
5100

    
5101
    names = []
5102
    for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5103
                                               for i in range(disk_count)]):
5104
      names.append(lv_prefix + "_data")
5105
      names.append(lv_prefix + "_meta")
5106
    for idx, disk in enumerate(disk_info):
5107
      disk_index = idx + base_index
5108
      disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5109
                                      disk["size"], names[idx*2:idx*2+2],
5110
                                      "disk/%d" % disk_index,
5111
                                      minors[idx*2], minors[idx*2+1])
5112
      disk_dev.mode = disk["mode"]
5113
      disks.append(disk_dev)
5114
  elif template_name == constants.DT_FILE:
5115
    if len(secondary_nodes) != 0:
5116
      raise errors.ProgrammerError("Wrong template configuration")
5117

    
5118
    for idx, disk in enumerate(disk_info):
5119
      disk_index = idx + base_index
5120
      disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5121
                              iv_name="disk/%d" % disk_index,
5122
                              logical_id=(file_driver,
5123
                                          "%s/disk%d" % (file_storage_dir,
5124
                                                         disk_index)),
5125
                              mode=disk["mode"])
5126
      disks.append(disk_dev)
5127
  else:
5128
    raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5129
  return disks
5130

    
5131

    
5132
def _GetInstanceInfoText(instance):
5133
  """Compute that text that should be added to the disk's metadata.
5134

5135
  """
5136
  return "originstname+%s" % instance.name
5137

    
5138

    
5139
def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5140
  """Create all disks for an instance.
5141

5142
  This abstracts away some work from AddInstance.
5143

5144
  @type lu: L{LogicalUnit}
5145
  @param lu: the logical unit on whose behalf we execute
5146
  @type instance: L{objects.Instance}
5147
  @param instance: the instance whose disks we should create
5148
  @type to_skip: list
5149
  @param to_skip: list of indices to skip
5150
  @type target_node: string
5151
  @param target_node: if passed, overrides the target node for creation
5152
  @rtype: boolean
5153
  @return: the success of the creation
5154

5155
  """
5156
  info = _GetInstanceInfoText(instance)
5157
  if target_node is None:
5158
    pnode = instance.primary_node
5159
    all_nodes = instance.all_nodes
5160
  else:
5161
    pnode = target_node
5162
    all_nodes = [pnode]
5163

    
5164
  if instance.disk_template == constants.DT_FILE:
5165
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5166
    result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
5167

    
5168
    result.Raise("Failed to create directory '%s' on"
5169
                 " node %s: %s" % (file_storage_dir, pnode))
5170

    
5171
  # Note: this needs to be kept in sync with adding of disks in
5172
  # LUSetInstanceParams
5173
  for idx, device in enumerate(instance.disks):
5174
    if to_skip and idx in to_skip:
5175
      continue
5176
    logging.info("Creating volume %s for instance %s",
5177
                 device.iv_name, instance.name)
5178
    #HARDCODE
5179
    for node in all_nodes:
5180
      f_create = node == pnode
5181
      _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
5182

    
5183

    
5184
def _RemoveDisks(lu, instance, target_node=None):
5185
  """Remove all disks for an instance.
5186

5187
  This abstracts away some work from `AddInstance()` and
5188
  `RemoveInstance()`. Note that in case some of the devices couldn't
5189
  be removed, the removal will continue with the other ones (compare
5190
  with `_CreateDisks()`).
5191

5192
  @type lu: L{LogicalUnit}
5193
  @param lu: the logical unit on whose behalf we execute
5194
  @type instance: L{objects.Instance}
5195
  @param instance: the instance whose disks we should remove
5196
  @type target_node: string
5197
  @param target_node: used to override the node on which to remove the disks
5198
  @rtype: boolean
5199
  @return: the success of the removal
5200

5201
  """
5202
  logging.info("Removing block devices for instance %s", instance.name)
5203

    
5204
  all_result = True
5205
  for device in instance.disks:
5206
    if target_node:
5207
      edata = [(target_node, device)]
5208
    else:
5209
      edata = device.ComputeNodeTree(instance.primary_node)
5210
    for node, disk in edata:
5211
      lu.cfg.SetDiskID(disk, node)
5212
      msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
5213
      if msg:
5214
        lu.LogWarning("Could not remove block device %s on node %s,"
5215
                      " continuing anyway: %s", device.iv_name, node, msg)
5216
        all_result = False
5217

    
5218
  if instance.disk_template == constants.DT_FILE:
5219
    file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
5220
    if target_node is node:
5221
      tgt = instance.primary_node
5222
    else:
5223
      tgt = instance.target_node
5224
    result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
5225
    if result.fail_msg:
5226
      lu.LogWarning("Could not remove directory '%s' on node %s: %s",
5227
                    file_storage_dir, instance.primary_node, result.fail_msg)
5228
      all_result = False
5229

    
5230
  return all_result
5231

    
5232

    
5233
def _ComputeDiskSize(disk_template, disks):
5234
  """Compute disk size requirements in the volume group
5235

5236
  """
5237
  # Required free disk space as a function of disk and swap space
5238
  req_size_dict = {
5239
    constants.DT_DISKLESS: None,
5240
    constants.DT_PLAIN: sum(d["size"] for d in disks),
5241
    # 128 MB are added for drbd metadata for each disk
5242
    constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
5243
    constants.DT_FILE: None,
5244
  }
5245

    
5246
  if disk_template not in req_size_dict:
5247
    raise errors.ProgrammerError("Disk template '%s' size requirement"
5248
                                 " is unknown" %  disk_template)
5249

    
5250
  return req_size_dict[disk_template]
5251

    
5252

    
5253
def _CheckHVParams(lu, nodenames, hvname, hvparams):
5254
  """Hypervisor parameter validation.
5255

5256
  This function abstract the hypervisor parameter validation to be
5257
  used in both instance create and instance modify.
5258

5259
  @type lu: L{LogicalUnit}
5260
  @param lu: the logical unit for which we check
5261
  @type nodenames: list
5262
  @param nodenames: the list of nodes on which we should check
5263
  @type hvname: string
5264
  @param hvname: the name of the hypervisor we should use
5265
  @type hvparams: dict
5266
  @param hvparams: the parameters which we need to check
5267
  @raise errors.OpPrereqError: if the parameters are not valid
5268

5269
  """
5270
  hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
5271
                                                  hvname,
5272
                                                  hvparams)
5273
  for node in nodenames:
5274
    info = hvinfo[node]
5275
    if info.offline:
5276
      continue
5277
    info.Raise("Hypervisor parameter validation failed on node %s" % node)
5278

    
5279

    
5280
class LUCreateInstance(LogicalUnit):
5281
  """Create an instance.
5282

5283
  """
5284
  HPATH = "instance-add"
5285
  HTYPE = constants.HTYPE_INSTANCE
5286
  _OP_REQP = ["instance_name", "disks", "disk_template",
5287
              "mode", "start",
5288
              "wait_for_sync", "ip_check", "nics",
5289
              "hvparams", "beparams"]
5290
  REQ_BGL = False
5291

    
5292
  def _ExpandNode(self, node):
5293
    """Expands and checks one node name.
5294

5295
    """
5296
    node_full = self.cfg.ExpandNodeName(node)
5297
    if node_full is None:
5298
      raise errors.OpPrereqError("Unknown node %s" % node)
5299
    return node_full
5300

    
5301
  def ExpandNames(self):
5302
    """ExpandNames for CreateInstance.
5303

5304
    Figure out the right locks for instance creation.
5305

5306
    """
5307
    self.needed_locks = {}
5308

    
5309
    # set optional parameters to none if they don't exist
5310
    for attr in ["pnode", "snode", "iallocator", "hypervisor"]:
5311
      if not hasattr(self.op, attr):
5312
        setattr(self.op, attr, None)
5313

    
5314
    # cheap checks, mostly valid constants given
5315

    
5316
    # verify creation mode
5317
    if self.op.mode not in (constants.INSTANCE_CREATE,
5318
                            constants.INSTANCE_IMPORT):
5319
      raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
5320
                                 self.op.mode)
5321

    
5322
    # disk template and mirror node verification
5323
    if self.op.disk_template not in constants.DISK_TEMPLATES:
5324
      raise errors.OpPrereqError("Invalid disk template name")
5325

    
5326
    if self.op.hypervisor is None:
5327
      self.op.hypervisor = self.cfg.GetHypervisorType()
5328

    
5329
    cluster = self.cfg.GetClusterInfo()
5330
    enabled_hvs = cluster.enabled_hypervisors
5331
    if self.op.hypervisor not in enabled_hvs:
5332
      raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
5333
                                 " cluster (%s)" % (self.op.hypervisor,
5334
                                  ",".join(enabled_hvs)))
5335

    
5336
    # check hypervisor parameter syntax (locally)
5337
    utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5338
    filled_hvp = objects.FillDict(cluster.hvparams[self.op.hypervisor],
5339
                                  self.op.hvparams)
5340
    hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
5341
    hv_type.CheckParameterSyntax(filled_hvp)
5342
    self.hv_full = filled_hvp
5343

    
5344
    # fill and remember the beparams dict
5345
    utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5346
    self.be_full = objects.FillDict(cluster.beparams[constants.PP_DEFAULT],
5347
                                    self.op.beparams)
5348

    
5349
    #### instance parameters check
5350

    
5351
    # instance name verification
5352
    hostname1 = utils.HostInfo(self.op.instance_name)
5353
    self.op.instance_name = instance_name = hostname1.name
5354

    
5355
    # this is just a preventive check, but someone might still add this
5356
    # instance in the meantime, and creation will fail at lock-add time
5357
    if instance_name in self.cfg.GetInstanceList():
5358
      raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5359
                                 instance_name)
5360

    
5361
    self.add_locks[locking.LEVEL_INSTANCE] = instance_name
5362

    
5363
    # NIC buildup
5364
    self.nics = []
5365
    for idx, nic in enumerate(self.op.nics):
5366
      nic_mode_req = nic.get("mode", None)
5367
      nic_mode = nic_mode_req
5368
      if nic_mode is None:
5369
        nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
5370

    
5371
      # in routed mode, for the first nic, the default ip is 'auto'
5372
      if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
5373
        default_ip_mode = constants.VALUE_AUTO
5374
      else:
5375
        default_ip_mode = constants.VALUE_NONE
5376

    
5377
      # ip validity checks
5378
      ip = nic.get("ip", default_ip_mode)
5379
      if ip is None or ip.lower() == constants.VALUE_NONE:
5380
        nic_ip = None
5381
      elif ip.lower() == constants.VALUE_AUTO:
5382
        nic_ip = hostname1.ip
5383
      else:
5384
        if not utils.IsValidIP(ip):
5385
          raise errors.OpPrereqError("Given IP address '%s' doesn't look"
5386
                                     " like a valid IP" % ip)
5387
        nic_ip = ip
5388

    
5389
      # TODO: check the ip for uniqueness !!
5390
      if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
5391
        raise errors.OpPrereqError("Routed nic mode requires an ip address")
5392

    
5393
      # MAC address verification
5394
      mac = nic.get("mac", constants.VALUE_AUTO)
5395
      if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5396
        if not utils.IsValidMac(mac.lower()):
5397
          raise errors.OpPrereqError("Invalid MAC address specified: %s" %
5398
                                     mac)
5399
      # bridge verification
5400
      bridge = nic.get("bridge", None)
5401
      link = nic.get("link", None)
5402
      if bridge and link:
5403
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
5404
                                   " at the same time")
5405
      elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
5406
        raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic")
5407
      elif bridge:
5408
        link = bridge
5409

    
5410
      nicparams = {}
5411
      if nic_mode_req:
5412
        nicparams[constants.NIC_MODE] = nic_mode_req
5413
      if link:
5414
        nicparams[constants.NIC_LINK] = link
5415

    
5416
      check_params = objects.FillDict(cluster.nicparams[constants.PP_DEFAULT],
5417
                                      nicparams)
5418
      objects.NIC.CheckParameterSyntax(check_params)
5419
      self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
5420

    
5421
    # disk checks/pre-build
5422
    self.disks = []
5423
    for disk in self.op.disks:
5424
      mode = disk.get("mode", constants.DISK_RDWR)
5425
      if mode not in constants.DISK_ACCESS_SET:
5426
        raise errors.OpPrereqError("Invalid disk access mode '%s'" %
5427
                                   mode)
5428
      size = disk.get("size", None)
5429
      if size is None:
5430
        raise errors.OpPrereqError("Missing disk size")
5431
      try:
5432
        size = int(size)
5433
      except ValueError:
5434
        raise errors.OpPrereqError("Invalid disk size '%s'" % size)
5435
      self.disks.append({"size": size, "mode": mode})
5436

    
5437
    # used in CheckPrereq for ip ping check
5438
    self.check_ip = hostname1.ip
5439

    
5440
    # file storage checks
5441
    if (self.op.file_driver and
5442
        not self.op.file_driver in constants.FILE_DRIVER):
5443
      raise errors.OpPrereqError("Invalid file driver name '%s'" %
5444
                                 self.op.file_driver)
5445

    
5446
    if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
5447
      raise errors.OpPrereqError("File storage directory path not absolute")
5448

    
5449
    ### Node/iallocator related checks
5450
    if [self.op.iallocator, self.op.pnode].count(None) != 1:
5451
      raise errors.OpPrereqError("One and only one of iallocator and primary"
5452
                                 " node must be given")
5453

    
5454
    if self.op.iallocator:
5455
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5456
    else:
5457
      self.op.pnode = self._ExpandNode(self.op.pnode)
5458
      nodelist = [self.op.pnode]
5459
      if self.op.snode is not None:
5460
        self.op.snode = self._ExpandNode(self.op.snode)
5461
        nodelist.append(self.op.snode)
5462
      self.needed_locks[locking.LEVEL_NODE] = nodelist
5463

    
5464
    # in case of import lock the source node too
5465
    if self.op.mode == constants.INSTANCE_IMPORT:
5466
      src_node = getattr(self.op, "src_node", None)
5467
      src_path = getattr(self.op, "src_path", None)
5468

    
5469
      if src_path is None:
5470
        self.op.src_path = src_path = self.op.instance_name
5471

    
5472
      if src_node is None:
5473
        self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5474
        self.op.src_node = None
5475
        if os.path.isabs(src_path):
5476
          raise errors.OpPrereqError("Importing an instance from an absolute"
5477
                                     " path requires a source node option.")
5478
      else:
5479
        self.op.src_node = src_node = self._ExpandNode(src_node)
5480
        if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5481
          self.needed_locks[locking.LEVEL_NODE].append(src_node)
5482
        if not os.path.isabs(src_path):
5483
          self.op.src_path = src_path = \
5484
            os.path.join(constants.EXPORT_DIR, src_path)
5485

    
5486
    else: # INSTANCE_CREATE
5487
      if getattr(self.op, "os_type", None) is None:
5488
        raise errors.OpPrereqError("No guest OS specified")
5489

    
5490
  def _RunAllocator(self):
5491
    """Run the allocator based on input opcode.
5492

5493
    """
5494
    nics = [n.ToDict() for n in self.nics]
5495
    ial = IAllocator(self.cfg, self.rpc,
5496
                     mode=constants.IALLOCATOR_MODE_ALLOC,
5497
                     name=self.op.instance_name,
5498
                     disk_template=self.op.disk_template,
5499
                     tags=[],
5500
                     os=self.op.os_type,
5501
                     vcpus=self.be_full[constants.BE_VCPUS],
5502
                     mem_size=self.be_full[constants.BE_MEMORY],
5503
                     disks=self.disks,
5504
                     nics=nics,
5505
                     hypervisor=self.op.hypervisor,
5506
                     )
5507

    
5508
    ial.Run(self.op.iallocator)
5509

    
5510
    if not ial.success:
5511
      raise errors.OpPrereqError("Can't compute nodes using"
5512
                                 " iallocator '%s': %s" % (self.op.iallocator,
5513
                                                           ial.info))
5514
    if len(ial.nodes) != ial.required_nodes:
5515
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
5516
                                 " of nodes (%s), required %s" %
5517
                                 (self.op.iallocator, len(ial.nodes),
5518
                                  ial.required_nodes))
5519
    self.op.pnode = ial.nodes[0]
5520
    self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
5521
                 self.op.instance_name, self.op.iallocator,
5522
                 ", ".join(ial.nodes))
5523
    if ial.required_nodes == 2:
5524
      self.op.snode = ial.nodes[1]
5525

    
5526
  def BuildHooksEnv(self):
5527
    """Build hooks env.
5528

5529
    This runs on master, primary and secondary nodes of the instance.
5530

5531
    """
5532
    env = {
5533
      "ADD_MODE": self.op.mode,
5534
      }
5535
    if self.op.mode == constants.INSTANCE_IMPORT:
5536
      env["SRC_NODE"] = self.op.src_node
5537
      env["SRC_PATH"] = self.op.src_path
5538
      env["SRC_IMAGES"] = self.src_images
5539

    
5540
    env.update(_BuildInstanceHookEnv(
5541
      name=self.op.instance_name,
5542
      primary_node=self.op.pnode,
5543
      secondary_nodes=self.secondaries,
5544
      status=self.op.start,
5545
      os_type=self.op.os_type,
5546
      memory=self.be_full[constants.BE_MEMORY],
5547
      vcpus=self.be_full[constants.BE_VCPUS],
5548
      nics=_NICListToTuple(self, self.nics),
5549
      disk_template=self.op.disk_template,
5550
      disks=[(d["size"], d["mode"]) for d in self.disks],
5551
      bep=self.be_full,
5552
      hvp=self.hv_full,
5553
      hypervisor_name=self.op.hypervisor,
5554
    ))
5555

    
5556
    nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
5557
          self.secondaries)
5558
    return env, nl, nl
5559

    
5560

    
5561
  def CheckPrereq(self):
5562
    """Check prerequisites.
5563

5564
    """
5565
    if (not self.cfg.GetVGName() and
5566
        self.op.disk_template not in constants.DTS_NOT_LVM):
5567
      raise errors.OpPrereqError("Cluster does not support lvm-based"
5568
                                 " instances")
5569

    
5570
    if self.op.mode == constants.INSTANCE_IMPORT:
5571
      src_node = self.op.src_node
5572
      src_path = self.op.src_path
5573

    
5574
      if src_node is None:
5575
        locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
5576
        exp_list = self.rpc.call_export_list(locked_nodes)
5577
        found = False
5578
        for node in exp_list:
5579
          if exp_list[node].fail_msg:
5580
            continue
5581
          if src_path in exp_list[node].payload:
5582
            found = True
5583
            self.op.src_node = src_node = node
5584
            self.op.src_path = src_path = os.path.join(constants.EXPORT_DIR,
5585
                                                       src_path)
5586
            break
5587
        if not found:
5588
          raise errors.OpPrereqError("No export found for relative path %s" %
5589
                                      src_path)
5590

    
5591
      _CheckNodeOnline(self, src_node)
5592
      result = self.rpc.call_export_info(src_node, src_path)
5593
      result.Raise("No export or invalid export found in dir %s" % src_path)
5594

    
5595
      export_info = objects.SerializableConfigParser.Loads(str(result.payload))
5596
      if not export_info.has_section(constants.INISECT_EXP):
5597
        raise errors.ProgrammerError("Corrupted export config")
5598

    
5599
      ei_version = export_info.get(constants.INISECT_EXP, 'version')
5600
      if (int(ei_version) != constants.EXPORT_VERSION):
5601
        raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
5602
                                   (ei_version, constants.EXPORT_VERSION))
5603

    
5604
      # Check that the new instance doesn't have less disks than the export
5605
      instance_disks = len(self.disks)
5606
      export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
5607
      if instance_disks < export_disks:
5608
        raise errors.OpPrereqError("Not enough disks to import."
5609
                                   " (instance: %d, export: %d)" %
5610
                                   (instance_disks, export_disks))
5611

    
5612
      self.op.os_type = export_info.get(constants.INISECT_EXP, 'os')
5613
      disk_images = []
5614
      for idx in range(export_disks):
5615
        option = 'disk%d_dump' % idx
5616
        if export_info.has_option(constants.INISECT_INS, option):
5617
          # FIXME: are the old os-es, disk sizes, etc. useful?
5618
          export_name = export_info.get(constants.INISECT_INS, option)
5619
          image = os.path.join(src_path, export_name)
5620
          disk_images.append(image)
5621
        else:
5622
          disk_images.append(False)
5623

    
5624
      self.src_images = disk_images
5625

    
5626
      old_name = export_info.get(constants.INISECT_INS, 'name')
5627
      # FIXME: int() here could throw a ValueError on broken exports
5628
      exp_nic_count = int(export_info.get(constants.INISECT_INS, 'nic_count'))
5629
      if self.op.instance_name == old_name:
5630
        for idx, nic in enumerate(self.nics):
5631
          if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
5632
            nic_mac_ini = 'nic%d_mac' % idx
5633
            nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
5634

    
5635
    # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
5636
    # ip ping checks (we use the same ip that was resolved in ExpandNames)
5637
    if self.op.start and not self.op.ip_check:
5638
      raise errors.OpPrereqError("Cannot ignore IP address conflicts when"
5639
                                 " adding an instance in start mode")
5640

    
5641
    if self.op.ip_check:
5642
      if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
5643
        raise errors.OpPrereqError("IP %s of instance %s already in use" %
5644
                                   (self.check_ip, self.op.instance_name))
5645

    
5646
    #### mac address generation
5647
    # By generating here the mac address both the allocator and the hooks get
5648
    # the real final mac address rather than the 'auto' or 'generate' value.
5649
    # There is a race condition between the generation and the instance object
5650
    # creation, which means that we know the mac is valid now, but we're not
5651
    # sure it will be when we actually add the instance. If things go bad
5652
    # adding the instance will abort because of a duplicate mac, and the
5653
    # creation job will fail.
5654
    for nic in self.nics:
5655
      if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
5656
        nic.mac = self.cfg.GenerateMAC()
5657

    
5658
    #### allocator run
5659

    
5660
    if self.op.iallocator is not None:
5661
      self._RunAllocator()
5662

    
5663
    #### node related checks
5664

    
5665
    # check primary node
5666
    self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
5667
    assert self.pnode is not None, \
5668
      "Cannot retrieve locked node %s" % self.op.pnode
5669
    if pnode.offline:
5670
      raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
5671
                                 pnode.name)
5672
    if pnode.drained:
5673
      raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
5674
                                 pnode.name)
5675

    
5676
    self.secondaries = []
5677

    
5678
    # mirror node verification
5679
    if self.op.disk_template in constants.DTS_NET_MIRROR:
5680
      if self.op.snode is None:
5681
        raise errors.OpPrereqError("The networked disk templates need"
5682
                                   " a mirror node")
5683
      if self.op.snode == pnode.name:
5684
        raise errors.OpPrereqError("The secondary node cannot be"
5685
                                   " the primary node.")
5686
      _CheckNodeOnline(self, self.op.snode)
5687
      _CheckNodeNotDrained(self, self.op.snode)
5688
      self.secondaries.append(self.op.snode)
5689

    
5690
    nodenames = [pnode.name] + self.secondaries
5691

    
5692
    req_size = _ComputeDiskSize(self.op.disk_template,
5693
                                self.disks)
5694

    
5695
    # Check lv size requirements
5696
    if req_size is not None:
5697
      nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
5698
                                         self.op.hypervisor)
5699
      for node in nodenames:
5700
        info = nodeinfo[node]
5701
        info.Raise("Cannot get current information from node %s" % node)
5702
        info = info.payload
5703
        vg_free = info.get('vg_free', None)
5704
        if not isinstance(vg_free, int):
5705
          raise errors.OpPrereqError("Can't compute free disk space on"
5706
                                     " node %s" % node)
5707
        if req_size > vg_free:
5708
          raise errors.OpPrereqError("Not enough disk space on target node %s."
5709
                                     " %d MB available, %d MB required" %
5710
                                     (node, vg_free, req_size))
5711

    
5712
    _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
5713

    
5714
    # os verification
5715
    result = self.rpc.call_os_get(pnode.name, self.op.os_type)
5716
    result.Raise("OS '%s' not in supported os list for primary node %s" %
5717
                 (self.op.os_type, pnode.name), prereq=True)
5718

    
5719
    _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
5720

    
5721
    # memory check on primary node
5722
    if self.op.start:
5723
      _CheckNodeFreeMemory(self, self.pnode.name,
5724
                           "creating instance %s" % self.op.instance_name,
5725
                           self.be_full[constants.BE_MEMORY],
5726
                           self.op.hypervisor)
5727

    
5728
    self.dry_run_result = list(nodenames)
5729

    
5730
  def Exec(self, feedback_fn):
5731
    """Create and add the instance to the cluster.
5732

5733
    """
5734
    instance = self.op.instance_name
5735
    pnode_name = self.pnode.name
5736

    
5737
    ht_kind = self.op.hypervisor
5738
    if ht_kind in constants.HTS_REQ_PORT:
5739
      network_port = self.cfg.AllocatePort()
5740
    else:
5741
      network_port = None
5742

    
5743
    ##if self.op.vnc_bind_address is None:
5744
    ##  self.op.vnc_bind_address = constants.VNC_DEFAULT_BIND_ADDRESS
5745

    
5746
    # this is needed because os.path.join does not accept None arguments
5747
    if self.op.file_storage_dir is None:
5748
      string_file_storage_dir = ""
5749
    else:
5750
      string_file_storage_dir = self.op.file_storage_dir
5751

    
5752
    # build the full file storage dir path
5753
    file_storage_dir = os.path.normpath(os.path.join(
5754
                                        self.cfg.GetFileStorageDir(),
5755
                                        string_file_storage_dir, instance))
5756

    
5757

    
5758
    disks = _GenerateDiskTemplate(self,
5759
                                  self.op.disk_template,
5760
                                  instance, pnode_name,
5761
                                  self.secondaries,
5762
                                  self.disks,
5763
                                  file_storage_dir,
5764
                                  self.op.file_driver,
5765
                                  0)
5766

    
5767
    iobj = objects.Instance(name=instance, os=self.op.os_type,
5768
                            primary_node=pnode_name,
5769
                            nics=self.nics, disks=disks,
5770
                            disk_template=self.op.disk_template,
5771
                            admin_up=False,
5772
                            network_port=network_port,
5773
                            beparams=self.op.beparams,
5774
                            hvparams=self.op.hvparams,
5775
                            hypervisor=self.op.hypervisor,
5776
                            )
5777

    
5778
    feedback_fn("* creating instance disks...")
5779
    try:
5780
      _CreateDisks(self, iobj)
5781
    except errors.OpExecError:
5782
      self.LogWarning("Device creation failed, reverting...")
5783
      try:
5784
        _RemoveDisks(self, iobj)
5785
      finally:
5786
        self.cfg.ReleaseDRBDMinors(instance)
5787
        raise
5788

    
5789
    feedback_fn("adding instance %s to cluster config" % instance)
5790

    
5791
    self.cfg.AddInstance(iobj)
5792
    # Declare that we don't want to remove the instance lock anymore, as we've
5793
    # added the instance to the config
5794
    del self.remove_locks[locking.LEVEL_INSTANCE]
5795
    # Unlock all the nodes
5796
    if self.op.mode == constants.INSTANCE_IMPORT:
5797
      nodes_keep = [self.op.src_node]
5798
      nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
5799
                       if node != self.op.src_node]
5800
      self.context.glm.release(locking.LEVEL_NODE, nodes_release)
5801
      self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
5802
    else:
5803
      self.context.glm.release(locking.LEVEL_NODE)
5804
      del self.acquired_locks[locking.LEVEL_NODE]
5805

    
5806
    if self.op.wait_for_sync:
5807
      disk_abort = not _WaitForSync(self, iobj)
5808
    elif iobj.disk_template in constants.DTS_NET_MIRROR:
5809
      # make sure the disks are not degraded (still sync-ing is ok)
5810
      time.sleep(15)
5811
      feedback_fn("* checking mirrors status")
5812
      disk_abort = not _WaitForSync(self, iobj, oneshot=True)
5813
    else:
5814
      disk_abort = False
5815

    
5816
    if disk_abort:
5817
      _RemoveDisks(self, iobj)
5818
      self.cfg.RemoveInstance(iobj.name)
5819
      # Make sure the instance lock gets removed
5820
      self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
5821
      raise errors.OpExecError("There are some degraded disks for"
5822
                               " this instance")
5823

    
5824
    feedback_fn("creating os for instance %s on node %s" %
5825
                (instance, pnode_name))
5826

    
5827
    if iobj.disk_template != constants.DT_DISKLESS:
5828
      if self.op.mode == constants.INSTANCE_CREATE:
5829
        feedback_fn("* running the instance OS create scripts...")
5830
        result = self.rpc.call_instance_os_add(pnode_name, iobj, False)
5831
        result.Raise("Could not add os for instance %s"
5832
                     " on node %s" % (instance, pnode_name))
5833

    
5834
      elif self.op.mode == constants.INSTANCE_IMPORT:
5835
        feedback_fn("* running the instance OS import scripts...")
5836
        src_node = self.op.src_node
5837
        src_images = self.src_images
5838
        cluster_name = self.cfg.GetClusterName()
5839
        import_result = self.rpc.call_instance_os_import(pnode_name, iobj,
5840
                                                         src_node, src_images,
5841
                                                         cluster_name)
5842
        msg = import_result.fail_msg
5843
        if msg:
5844
          self.LogWarning("Error while importing the disk images for instance"
5845
                          " %s on node %s: %s" % (instance, pnode_name, msg))
5846
      else:
5847
        # also checked in the prereq part
5848
        raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
5849
                                     % self.op.mode)
5850

    
5851
    if self.op.start:
5852
      iobj.admin_up = True
5853
      self.cfg.Update(iobj)
5854
      logging.info("Starting instance %s on node %s", instance, pnode_name)
5855
      feedback_fn("* starting instance...")
5856
      result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
5857
      result.Raise("Could not start instance")
5858

    
5859
    return list(iobj.all_nodes)
5860

    
5861

    
5862
class LUConnectConsole(NoHooksLU):
5863
  """Connect to an instance's console.
5864

5865
  This is somewhat special in that it returns the command line that
5866
  you need to run on the master node in order to connect to the
5867
  console.
5868

5869
  """
5870
  _OP_REQP = ["instance_name"]
5871
  REQ_BGL = False
5872

    
5873
  def ExpandNames(self):
5874
    self._ExpandAndLockInstance()
5875

    
5876
  def CheckPrereq(self):
5877
    """Check prerequisites.
5878

5879
    This checks that the instance is in the cluster.
5880

5881
    """
5882
    self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5883
    assert self.instance is not None, \
5884
      "Cannot retrieve locked instance %s" % self.op.instance_name
5885
    _CheckNodeOnline(self, self.instance.primary_node)
5886

    
5887
  def Exec(self, feedback_fn):
5888
    """Connect to the console of an instance
5889

5890
    """
5891
    instance = self.instance
5892
    node = instance.primary_node
5893

    
5894
    node_insts = self.rpc.call_instance_list([node],
5895
                                             [instance.hypervisor])[node]
5896
    node_insts.Raise("Can't get node information from %s" % node)
5897

    
5898
    if instance.name not in node_insts.payload:
5899
      raise errors.OpExecError("Instance %s is not running." % instance.name)
5900

    
5901
    logging.debug("Connecting to console of %s on %s", instance.name, node)
5902

    
5903
    hyper = hypervisor.GetHypervisor(instance.hypervisor)
5904
    cluster = self.cfg.GetClusterInfo()
5905
    # beparams and hvparams are passed separately, to avoid editing the
5906
    # instance and then saving the defaults in the instance itself.
5907
    hvparams = cluster.FillHV(instance)
5908
    beparams = cluster.FillBE(instance)
5909
    console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
5910

    
5911
    # build ssh cmdline
5912
    return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
5913

    
5914

    
5915
class LUReplaceDisks(LogicalUnit):
5916
  """Replace the disks of an instance.
5917

5918
  """
5919
  HPATH = "mirrors-replace"
5920
  HTYPE = constants.HTYPE_INSTANCE
5921
  _OP_REQP = ["instance_name", "mode", "disks"]
5922
  REQ_BGL = False
5923

    
5924
  def CheckArguments(self):
5925
    if not hasattr(self.op, "remote_node"):
5926
      self.op.remote_node = None
5927
    if not hasattr(self.op, "iallocator"):
5928
      self.op.iallocator = None
5929

    
5930
    TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
5931
                                  self.op.iallocator)
5932

    
5933
  def ExpandNames(self):
5934
    self._ExpandAndLockInstance()
5935

    
5936
    if self.op.iallocator is not None:
5937
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
5938

    
5939
    elif self.op.remote_node is not None:
5940
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
5941
      if remote_node is None:
5942
        raise errors.OpPrereqError("Node '%s' not known" %
5943
                                   self.op.remote_node)
5944

    
5945
      self.op.remote_node = remote_node
5946

    
5947
      # Warning: do not remove the locking of the new secondary here
5948
      # unless DRBD8.AddChildren is changed to work in parallel;
5949
      # currently it doesn't since parallel invocations of
5950
      # FindUnusedMinor will conflict
5951
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
5952
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5953

    
5954
    else:
5955
      self.needed_locks[locking.LEVEL_NODE] = []
5956
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5957

    
5958
    self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
5959
                                   self.op.iallocator, self.op.remote_node,
5960
                                   self.op.disks)
5961

    
5962
    self.tasklets = [self.replacer]
5963

    
5964
  def DeclareLocks(self, level):
5965
    # If we're not already locking all nodes in the set we have to declare the
5966
    # instance's primary/secondary nodes.
5967
    if (level == locking.LEVEL_NODE and
5968
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
5969
      self._LockInstancesNodes()
5970

    
5971
  def BuildHooksEnv(self):
5972
    """Build hooks env.
5973

5974
    This runs on the master, the primary and all the secondaries.
5975

5976
    """
5977
    instance = self.replacer.instance
5978
    env = {
5979
      "MODE": self.op.mode,
5980
      "NEW_SECONDARY": self.op.remote_node,
5981
      "OLD_SECONDARY": instance.secondary_nodes[0],
5982
      }
5983
    env.update(_BuildInstanceHookEnvByObject(self, instance))
5984
    nl = [
5985
      self.cfg.GetMasterNode(),
5986
      instance.primary_node,
5987
      ]
5988
    if self.op.remote_node is not None:
5989
      nl.append(self.op.remote_node)
5990
    return env, nl, nl
5991

    
5992

    
5993
class LUEvacuateNode(LogicalUnit):
5994
  """Relocate the secondary instances from a node.
5995

5996
  """
5997
  HPATH = "node-evacuate"
5998
  HTYPE = constants.HTYPE_NODE
5999
  _OP_REQP = ["node_name"]
6000
  REQ_BGL = False
6001

    
6002
  def CheckArguments(self):
6003
    if not hasattr(self.op, "remote_node"):
6004
      self.op.remote_node = None
6005
    if not hasattr(self.op, "iallocator"):
6006
      self.op.iallocator = None
6007

    
6008
    TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
6009
                                  self.op.remote_node,
6010
                                  self.op.iallocator)
6011

    
6012
  def ExpandNames(self):
6013
    self.op.node_name = self.cfg.ExpandNodeName(self.op.node_name)
6014
    if self.op.node_name is None:
6015
      raise errors.OpPrereqError("Node '%s' not known" % self.op.node_name)
6016

    
6017
    self.needed_locks = {}
6018

    
6019
    # Declare node locks
6020
    if self.op.iallocator is not None:
6021
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6022

    
6023
    elif self.op.remote_node is not None:
6024
      remote_node = self.cfg.ExpandNodeName(self.op.remote_node)
6025
      if remote_node is None:
6026
        raise errors.OpPrereqError("Node '%s' not known" %
6027
                                   self.op.remote_node)
6028

    
6029
      self.op.remote_node = remote_node
6030

    
6031
      # Warning: do not remove the locking of the new secondary here
6032
      # unless DRBD8.AddChildren is changed to work in parallel;
6033
      # currently it doesn't since parallel invocations of
6034
      # FindUnusedMinor will conflict
6035
      self.needed_locks[locking.LEVEL_NODE] = [remote_node]
6036
      self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6037

    
6038
    else:
6039
      raise errors.OpPrereqError("Invalid parameters")
6040

    
6041
    # Create tasklets for replacing disks for all secondary instances on this
6042
    # node
6043
    names = []
6044
    tasklets = []
6045

    
6046
    for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
6047
      logging.debug("Replacing disks for instance %s", inst.name)
6048
      names.append(inst.name)
6049

    
6050
      replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
6051
                                self.op.iallocator, self.op.remote_node, [])
6052
      tasklets.append(replacer)
6053

    
6054
    self.tasklets = tasklets
6055
    self.instance_names = names
6056

    
6057
    # Declare instance locks
6058
    self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
6059

    
6060
  def DeclareLocks(self, level):
6061
    # If we're not already locking all nodes in the set we have to declare the
6062
    # instance's primary/secondary nodes.
6063
    if (level == locking.LEVEL_NODE and
6064
        self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
6065
      self._LockInstancesNodes()
6066

    
6067
  def BuildHooksEnv(self):
6068
    """Build hooks env.
6069

6070
    This runs on the master, the primary and all the secondaries.
6071

6072
    """
6073
    env = {
6074
      "NODE_NAME": self.op.node_name,
6075
      }
6076

    
6077
    nl = [self.cfg.GetMasterNode()]
6078

    
6079
    if self.op.remote_node is not None:
6080
      env["NEW_SECONDARY"] = self.op.remote_node
6081
      nl.append(self.op.remote_node)
6082

    
6083
    return (env, nl, nl)
6084

    
6085

    
6086
class TLReplaceDisks(Tasklet):
6087
  """Replaces disks for an instance.
6088

6089
  Note: Locking is not within the scope of this class.
6090

6091
  """
6092
  def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
6093
               disks):
6094
    """Initializes this class.
6095

6096
    """
6097
    Tasklet.__init__(self, lu)
6098

    
6099
    # Parameters
6100
    self.instance_name = instance_name
6101
    self.mode = mode
6102
    self.iallocator_name = iallocator_name
6103
    self.remote_node = remote_node
6104
    self.disks = disks
6105

    
6106
    # Runtime data
6107
    self.instance = None
6108
    self.new_node = None
6109
    self.target_node = None
6110
    self.other_node = None
6111
    self.remote_node_info = None
6112
    self.node_secondary_ip = None
6113

    
6114
  @staticmethod
6115
  def CheckArguments(mode, remote_node, iallocator):
6116
    """Helper function for users of this class.
6117

6118
    """
6119
    # check for valid parameter combination
6120
    if mode == constants.REPLACE_DISK_CHG:
6121
      if remote_node is None and iallocator is None:
6122
        raise errors.OpPrereqError("When changing the secondary either an"
6123
                                   " iallocator script must be used or the"
6124
                                   " new node given")
6125

    
6126
      if remote_node is not None and iallocator is not None:
6127
        raise errors.OpPrereqError("Give either the iallocator or the new"
6128
                                   " secondary, not both")
6129

    
6130
    elif remote_node is not None or iallocator is not None:
6131
      # Not replacing the secondary
6132
      raise errors.OpPrereqError("The iallocator and new node options can"
6133
                                 " only be used when changing the"
6134
                                 " secondary node")
6135

    
6136
  @staticmethod
6137
  def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
6138
    """Compute a new secondary node using an IAllocator.
6139

6140
    """
6141
    ial = IAllocator(lu.cfg, lu.rpc,
6142
                     mode=constants.IALLOCATOR_MODE_RELOC,
6143
                     name=instance_name,
6144
                     relocate_from=relocate_from)
6145

    
6146
    ial.Run(iallocator_name)
6147

    
6148
    if not ial.success:
6149
      raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
6150
                                 " %s" % (iallocator_name, ial.info))
6151

    
6152
    if len(ial.nodes) != ial.required_nodes:
6153
      raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6154
                                 " of nodes (%s), required %s" %
6155
                                 (len(ial.nodes), ial.required_nodes))
6156

    
6157
    remote_node_name = ial.nodes[0]
6158

    
6159
    lu.LogInfo("Selected new secondary for instance '%s': %s",
6160
               instance_name, remote_node_name)
6161

    
6162
    return remote_node_name
6163

    
6164
  def _FindFaultyDisks(self, node_name):
6165
    return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
6166
                                    node_name, True)
6167

    
6168
  def CheckPrereq(self):
6169
    """Check prerequisites.
6170

6171
    This checks that the instance is in the cluster.
6172

6173
    """
6174
    self.instance = self.cfg.GetInstanceInfo(self.instance_name)
6175
    assert self.instance is not None, \
6176
      "Cannot retrieve locked instance %s" % self.instance_name
6177

    
6178
    if self.instance.disk_template != constants.DT_DRBD8:
6179
      raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
6180
                                 " instances")
6181

    
6182
    if len(self.instance.secondary_nodes) != 1:
6183
      raise errors.OpPrereqError("The instance has a strange layout,"
6184
                                 " expected one secondary but found %d" %
6185
                                 len(self.instance.secondary_nodes))
6186

    
6187
    secondary_node = self.instance.secondary_nodes[0]
6188

    
6189
    if self.iallocator_name is None:
6190
      remote_node = self.remote_node
6191
    else:
6192
      remote_node = self._RunAllocator(self.lu, self.iallocator_name,
6193
                                       self.instance.name, secondary_node)
6194

    
6195
    if remote_node is not None:
6196
      self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
6197
      assert self.remote_node_info is not None, \
6198
        "Cannot retrieve locked node %s" % remote_node
6199
    else:
6200
      self.remote_node_info = None
6201

    
6202
    if remote_node == self.instance.primary_node:
6203
      raise errors.OpPrereqError("The specified node is the primary node of"
6204
                                 " the instance.")
6205

    
6206
    if remote_node == secondary_node:
6207
      raise errors.OpPrereqError("The specified node is already the"
6208
                                 " secondary node of the instance.")
6209

    
6210
    if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
6211
                                    constants.REPLACE_DISK_CHG):
6212
      raise errors.OpPrereqError("Cannot specify disks to be replaced")
6213

    
6214
    if self.mode == constants.REPLACE_DISK_AUTO:
6215
      faulty_primary = self._FindFaultyDisks(self.instance.primary_node)
6216
      faulty_secondary = self._FindFaultyDisks(secondary_node)
6217

    
6218
      if faulty_primary and faulty_secondary:
6219
        raise errors.OpPrereqError("Instance %s has faulty disks on more than"
6220
                                   " one node and can not be repaired"
6221
                                   " automatically" % self.instance_name)
6222

    
6223
      if faulty_primary:
6224
        self.disks = faulty_primary
6225
        self.target_node = self.instance.primary_node
6226
        self.other_node = secondary_node
6227
        check_nodes = [self.target_node, self.other_node]
6228
      elif faulty_secondary:
6229
        self.disks = faulty_secondary
6230
        self.target_node = secondary_node
6231
        self.other_node = self.instance.primary_node
6232
        check_nodes = [self.target_node, self.other_node]
6233
      else:
6234
        self.disks = []
6235
        check_nodes = []
6236

    
6237
    else:
6238
      # Non-automatic modes
6239
      if self.mode == constants.REPLACE_DISK_PRI:
6240
        self.target_node = self.instance.primary_node
6241
        self.other_node = secondary_node
6242
        check_nodes = [self.target_node, self.other_node]
6243

    
6244
      elif self.mode == constants.REPLACE_DISK_SEC:
6245
        self.target_node = secondary_node
6246
        self.other_node = self.instance.primary_node
6247
        check_nodes = [self.target_node, self.other_node]
6248

    
6249
      elif self.mode == constants.REPLACE_DISK_CHG:
6250
        self.new_node = remote_node
6251
        self.other_node = self.instance.primary_node
6252
        self.target_node = secondary_node
6253
        check_nodes = [self.new_node, self.other_node]
6254

    
6255
        _CheckNodeNotDrained(self.lu, remote_node)
6256

    
6257
      else:
6258
        raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
6259
                                     self.mode)
6260

    
6261
      # If not specified all disks should be replaced
6262
      if not self.disks:
6263
        self.disks = range(len(self.instance.disks))
6264

    
6265
    for node in check_nodes:
6266
      _CheckNodeOnline(self.lu, node)
6267

    
6268
    # Check whether disks are valid
6269
    for disk_idx in self.disks:
6270
      self.instance.FindDisk(disk_idx)
6271

    
6272
    # Get secondary node IP addresses
6273
    node_2nd_ip = {}
6274

    
6275
    for node_name in [self.target_node, self.other_node, self.new_node]:
6276
      if node_name is not None:
6277
        node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
6278

    
6279
    self.node_secondary_ip = node_2nd_ip
6280

    
6281
  def Exec(self, feedback_fn):
6282
    """Execute disk replacement.
6283

6284
    This dispatches the disk replacement to the appropriate handler.
6285

6286
    """
6287
    if not self.disks:
6288
      feedback_fn("No disks need replacement")
6289
      return
6290

    
6291
    feedback_fn("Replacing disk(s) %s for %s" %
6292
                (", ".join([str(i) for i in self.disks]), self.instance.name))
6293

    
6294
    activate_disks = (not self.instance.admin_up)
6295

    
6296
    # Activate the instance disks if we're replacing them on a down instance
6297
    if activate_disks:
6298
      _StartInstanceDisks(self.lu, self.instance, True)
6299

    
6300
    try:
6301
      # Should we replace the secondary node?
6302
      if self.new_node is not None:
6303
        return self._ExecDrbd8Secondary()
6304
      else:
6305
        return self._ExecDrbd8DiskOnly()
6306

    
6307
    finally:
6308
      # Deactivate the instance disks if we're replacing them on a down instance
6309
      if activate_disks:
6310
        _SafeShutdownInstanceDisks(self.lu, self.instance)
6311

    
6312
  def _CheckVolumeGroup(self, nodes):
6313
    self.lu.LogInfo("Checking volume groups")
6314

    
6315
    vgname = self.cfg.GetVGName()
6316

    
6317
    # Make sure volume group exists on all involved nodes
6318
    results = self.rpc.call_vg_list(nodes)
6319
    if not results:
6320
      raise errors.OpExecError("Can't list volume groups on the nodes")
6321

    
6322
    for node in nodes:
6323
      res = results[node]
6324
      res.Raise("Error checking node %s" % node)
6325
      if vgname not in res.payload:
6326
        raise errors.OpExecError("Volume group '%s' not found on node %s" %
6327
                                 (vgname, node))
6328

    
6329
  def _CheckDisksExistence(self, nodes):
6330
    # Check disk existence
6331
    for idx, dev in enumerate(self.instance.disks):
6332
      if idx not in self.disks:
6333
        continue
6334

    
6335
      for node in nodes:
6336
        self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
6337
        self.cfg.SetDiskID(dev, node)
6338

    
6339
        result = self.rpc.call_blockdev_find(node, dev)
6340

    
6341
        msg = result.fail_msg
6342
        if msg or not result.payload:
6343
          if not msg:
6344
            msg = "disk not found"
6345
          raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
6346
                                   (idx, node, msg))
6347

    
6348
  def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
6349
    for idx, dev in enumerate(self.instance.disks):
6350
      if idx not in self.disks:
6351
        continue
6352

    
6353
      self.lu.LogInfo("Checking disk/%d consistency on node %s" %
6354
                      (idx, node_name))
6355

    
6356
      if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
6357
                                   ldisk=ldisk):
6358
        raise errors.OpExecError("Node %s has degraded storage, unsafe to"
6359
                                 " replace disks for instance %s" %
6360
                                 (node_name, self.instance.name))
6361

    
6362
  def _CreateNewStorage(self, node_name):
6363
    vgname = self.cfg.GetVGName()
6364
    iv_names = {}
6365

    
6366
    for idx, dev in enumerate(self.instance.disks):
6367
      if idx not in self.disks:
6368
        continue
6369

    
6370
      self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
6371

    
6372
      self.cfg.SetDiskID(dev, node_name)
6373

    
6374
      lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
6375
      names = _GenerateUniqueNames(self.lu, lv_names)
6376

    
6377
      lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
6378
                             logical_id=(vgname, names[0]))
6379
      lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6380
                             logical_id=(vgname, names[1]))
6381

    
6382
      new_lvs = [lv_data, lv_meta]
6383
      old_lvs = dev.children
6384
      iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
6385

    
6386
      # we pass force_create=True to force the LVM creation
6387
      for new_lv in new_lvs:
6388
        _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
6389
                        _GetInstanceInfoText(self.instance), False)
6390

    
6391
    return iv_names
6392

    
6393
  def _CheckDevices(self, node_name, iv_names):
6394
    for name, (dev, old_lvs, new_lvs) in iv_names.iteritems():
6395
      self.cfg.SetDiskID(dev, node_name)
6396

    
6397
      result = self.rpc.call_blockdev_find(node_name, dev)
6398

    
6399
      msg = result.fail_msg
6400
      if msg or not result.payload:
6401
        if not msg:
6402
          msg = "disk not found"
6403
        raise errors.OpExecError("Can't find DRBD device %s: %s" %
6404
                                 (name, msg))
6405

    
6406
      if result.payload.is_degraded:
6407
        raise errors.OpExecError("DRBD device %s is degraded!" % name)
6408

    
6409
  def _RemoveOldStorage(self, node_name, iv_names):
6410
    for name, (dev, old_lvs, _) in iv_names.iteritems():
6411
      self.lu.LogInfo("Remove logical volumes for %s" % name)
6412

    
6413
      for lv in old_lvs:
6414
        self.cfg.SetDiskID(lv, node_name)
6415

    
6416
        msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
6417
        if msg:
6418
          self.lu.LogWarning("Can't remove old LV: %s" % msg,
6419
                             hint="remove unused LVs manually")
6420

    
6421
  def _ExecDrbd8DiskOnly(self):
6422
    """Replace a disk on the primary or secondary for DRBD 8.
6423

6424
    The algorithm for replace is quite complicated:
6425

6426
      1. for each disk to be replaced:
6427

6428
        1. create new LVs on the target node with unique names
6429
        1. detach old LVs from the drbd device
6430
        1. rename old LVs to name_replaced.<time_t>
6431
        1. rename new LVs to old LVs
6432
        1. attach the new LVs (with the old names now) to the drbd device
6433

6434
      1. wait for sync across all devices
6435

6436
      1. for each modified disk:
6437

6438
        1. remove old LVs (which have the name name_replaces.<time_t>)
6439

6440
    Failures are not very well handled.
6441

6442
    """
6443
    steps_total = 6
6444

    
6445
    # Step: check device activation
6446
    self.lu.LogStep(1, steps_total, "Check device existence")
6447
    self._CheckDisksExistence([self.other_node, self.target_node])
6448
    self._CheckVolumeGroup([self.target_node, self.other_node])
6449

    
6450
    # Step: check other node consistency
6451
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6452
    self._CheckDisksConsistency(self.other_node,
6453
                                self.other_node == self.instance.primary_node,
6454
                                False)
6455

    
6456
    # Step: create new storage
6457
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6458
    iv_names = self._CreateNewStorage(self.target_node)
6459

    
6460
    # Step: for each lv, detach+rename*2+attach
6461
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6462
    for dev, old_lvs, new_lvs in iv_names.itervalues():
6463
      self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
6464

    
6465
      result = self.rpc.call_blockdev_removechildren(self.target_node, dev, old_lvs)
6466
      result.Raise("Can't detach drbd from local storage on node"
6467
                   " %s for device %s" % (self.target_node, dev.iv_name))
6468
      #dev.children = []
6469
      #cfg.Update(instance)
6470

    
6471
      # ok, we created the new LVs, so now we know we have the needed
6472
      # storage; as such, we proceed on the target node to rename
6473
      # old_lv to _old, and new_lv to old_lv; note that we rename LVs
6474
      # using the assumption that logical_id == physical_id (which in
6475
      # turn is the unique_id on that node)
6476

    
6477
      # FIXME(iustin): use a better name for the replaced LVs
6478
      temp_suffix = int(time.time())
6479
      ren_fn = lambda d, suff: (d.physical_id[0],
6480
                                d.physical_id[1] + "_replaced-%s" % suff)
6481

    
6482
      # Build the rename list based on what LVs exist on the node
6483
      rename_old_to_new = []
6484
      for to_ren in old_lvs:
6485
        result = self.rpc.call_blockdev_find(self.target_node, to_ren)
6486
        if not result.fail_msg and result.payload:
6487
          # device exists
6488
          rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
6489

    
6490
      self.lu.LogInfo("Renaming the old LVs on the target node")
6491
      result = self.rpc.call_blockdev_rename(self.target_node, rename_old_to_new)
6492
      result.Raise("Can't rename old LVs on node %s" % self.target_node)
6493

    
6494
      # Now we rename the new LVs to the old LVs
6495
      self.lu.LogInfo("Renaming the new LVs on the target node")
6496
      rename_new_to_old = [(new, old.physical_id)
6497
                           for old, new in zip(old_lvs, new_lvs)]
6498
      result = self.rpc.call_blockdev_rename(self.target_node, rename_new_to_old)
6499
      result.Raise("Can't rename new LVs on node %s" % self.target_node)
6500

    
6501
      for old, new in zip(old_lvs, new_lvs):
6502
        new.logical_id = old.logical_id
6503
        self.cfg.SetDiskID(new, self.target_node)
6504

    
6505
      for disk in old_lvs:
6506
        disk.logical_id = ren_fn(disk, temp_suffix)
6507
        self.cfg.SetDiskID(disk, self.target_node)
6508

    
6509
      # Now that the new lvs have the old name, we can add them to the device
6510
      self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
6511
      result = self.rpc.call_blockdev_addchildren(self.target_node, dev, new_lvs)
6512
      msg = result.fail_msg
6513
      if msg:
6514
        for new_lv in new_lvs:
6515
          msg2 = self.rpc.call_blockdev_remove(self.target_node, new_lv).fail_msg
6516
          if msg2:
6517
            self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
6518
                               hint=("cleanup manually the unused logical"
6519
                                     "volumes"))
6520
        raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
6521

    
6522
      dev.children = new_lvs
6523

    
6524
      self.cfg.Update(self.instance)
6525

    
6526
    # Wait for sync
6527
    # This can fail as the old devices are degraded and _WaitForSync
6528
    # does a combined result over all disks, so we don't check its return value
6529
    self.lu.LogStep(5, steps_total, "Sync devices")
6530
    _WaitForSync(self.lu, self.instance, unlock=True)
6531

    
6532
    # Check all devices manually
6533
    self._CheckDevices(self.instance.primary_node, iv_names)
6534

    
6535
    # Step: remove old storage
6536
    self.lu.LogStep(6, steps_total, "Removing old storage")
6537
    self._RemoveOldStorage(self.target_node, iv_names)
6538

    
6539
  def _ExecDrbd8Secondary(self):
6540
    """Replace the secondary node for DRBD 8.
6541

6542
    The algorithm for replace is quite complicated:
6543
      - for all disks of the instance:
6544
        - create new LVs on the new node with same names
6545
        - shutdown the drbd device on the old secondary
6546
        - disconnect the drbd network on the primary
6547
        - create the drbd device on the new secondary
6548
        - network attach the drbd on the primary, using an artifice:
6549
          the drbd code for Attach() will connect to the network if it
6550
          finds a device which is connected to the good local disks but
6551
          not network enabled
6552
      - wait for sync across all devices
6553
      - remove all disks from the old secondary
6554

6555
    Failures are not very well handled.
6556

6557
    """
6558
    steps_total = 6
6559

    
6560
    # Step: check device activation
6561
    self.lu.LogStep(1, steps_total, "Check device existence")
6562
    self._CheckDisksExistence([self.instance.primary_node])
6563
    self._CheckVolumeGroup([self.instance.primary_node])
6564

    
6565
    # Step: check other node consistency
6566
    self.lu.LogStep(2, steps_total, "Check peer consistency")
6567
    self._CheckDisksConsistency(self.instance.primary_node, True, True)
6568

    
6569
    # Step: create new storage
6570
    self.lu.LogStep(3, steps_total, "Allocate new storage")
6571
    for idx, dev in enumerate(self.instance.disks):
6572
      self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
6573
                      (self.new_node, idx))
6574
      # we pass force_create=True to force LVM creation
6575
      for new_lv in dev.children:
6576
        _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
6577
                        _GetInstanceInfoText(self.instance), False)
6578

    
6579
    # Step 4: dbrd minors and drbd setups changes
6580
    # after this, we must manually remove the drbd minors on both the
6581
    # error and the success paths
6582
    self.lu.LogStep(4, steps_total, "Changing drbd configuration")
6583
    minors = self.cfg.AllocateDRBDMinor([self.new_node for dev in self.instance.disks],
6584
                                        self.instance.name)
6585
    logging.debug("Allocated minors %r" % (minors,))
6586

    
6587
    iv_names = {}
6588
    for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
6589
      self.lu.LogInfo("activating a new drbd on %s for disk/%d" % (self.new_node, idx))
6590
      # create new devices on new_node; note that we create two IDs:
6591
      # one without port, so the drbd will be activated without
6592
      # networking information on the new node at this stage, and one
6593
      # with network, for the latter activation in step 4
6594
      (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
6595
      if self.instance.primary_node == o_node1:
6596
        p_minor = o_minor1
6597
      else:
6598
        p_minor = o_minor2
6599

    
6600
      new_alone_id = (self.instance.primary_node, self.new_node, None, p_minor, new_minor, o_secret)
6601
      new_net_id = (self.instance.primary_node, self.new_node, o_port, p_minor, new_minor, o_secret)
6602

    
6603
      iv_names[idx] = (dev, dev.children, new_net_id)
6604
      logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
6605
                    new_net_id)
6606
      new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
6607
                              logical_id=new_alone_id,
6608
                              children=dev.children,
6609
                              size=dev.size)
6610
      try:
6611
        _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
6612
                              _GetInstanceInfoText(self.instance), False)
6613
      except errors.GenericError:
6614
        self.cfg.ReleaseDRBDMinors(self.instance.name)
6615
        raise
6616

    
6617
    # We have new devices, shutdown the drbd on the old secondary
6618
    for idx, dev in enumerate(self.instance.disks):
6619
      self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
6620
      self.cfg.SetDiskID(dev, self.target_node)
6621
      msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
6622
      if msg:
6623
        self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
6624
                           "node: %s" % (idx, msg),
6625
                           hint=("Please cleanup this device manually as"
6626
                                 " soon as possible"))
6627

    
6628
    self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
6629
    result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node], self.node_secondary_ip,
6630
                                               self.instance.disks)[self.instance.primary_node]
6631

    
6632
    msg = result.fail_msg
6633
    if msg:
6634
      # detaches didn't succeed (unlikely)
6635
      self.cfg.ReleaseDRBDMinors(self.instance.name)
6636
      raise errors.OpExecError("Can't detach the disks from the network on"
6637
                               " old node: %s" % (msg,))
6638

    
6639
    # if we managed to detach at least one, we update all the disks of
6640
    # the instance to point to the new secondary
6641
    self.lu.LogInfo("Updating instance configuration")
6642
    for dev, _, new_logical_id in iv_names.itervalues():
6643
      dev.logical_id = new_logical_id
6644
      self.cfg.SetDiskID(dev, self.instance.primary_node)
6645

    
6646
    self.cfg.Update(self.instance)
6647

    
6648
    # and now perform the drbd attach
6649
    self.lu.LogInfo("Attaching primary drbds to new secondary"
6650
                    " (standalone => connected)")
6651
    result = self.rpc.call_drbd_attach_net([self.instance.primary_node, self.new_node], self.node_secondary_ip,
6652
                                           self.instance.disks, self.instance.name,
6653
                                           False)
6654
    for to_node, to_result in result.items():
6655
      msg = to_result.fail_msg
6656
      if msg:
6657
        self.lu.LogWarning("Can't attach drbd disks on node %s: %s", to_node, msg,
6658
                           hint=("please do a gnt-instance info to see the"
6659
                                 " status of disks"))
6660

    
6661
    # Wait for sync
6662
    # This can fail as the old devices are degraded and _WaitForSync
6663
    # does a combined result over all disks, so we don't check its return value
6664
    self.lu.LogStep(5, steps_total, "Sync devices")
6665
    _WaitForSync(self.lu, self.instance, unlock=True)
6666

    
6667
    # Check all devices manually
6668
    self._CheckDevices(self.instance.primary_node, iv_names)
6669

    
6670
    # Step: remove old storage
6671
    self.lu.LogStep(6, steps_total, "Removing old storage")
6672
    self._RemoveOldStorage(self.target_node, iv_names)
6673

    
6674

    
6675
class LURepairNodeStorage(NoHooksLU):
6676
  """Repairs the volume group on a node.
6677

6678
  """
6679
  _OP_REQP = ["node_name"]
6680
  REQ_BGL = False
6681

    
6682
  def CheckArguments(self):
6683
    node_name = self.cfg.ExpandNodeName(self.op.node_name)
6684
    if node_name is None:
6685
      raise errors.OpPrereqError("Invalid node name '%s'" % self.op.node_name)
6686

    
6687
    self.op.node_name = node_name
6688

    
6689
  def ExpandNames(self):
6690
    self.needed_locks = {
6691
      locking.LEVEL_NODE: [self.op.node_name],
6692
      }
6693

    
6694
  def _CheckFaultyDisks(self, instance, node_name):
6695
    if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
6696
                                node_name, True):
6697
      raise errors.OpPrereqError("Instance '%s' has faulty disks on"
6698
                                 " node '%s'" % (inst.name, node_name))
6699

    
6700
  def CheckPrereq(self):
6701
    """Check prerequisites.
6702

6703
    """
6704
    storage_type = self.op.storage_type
6705

    
6706
    if (constants.SO_FIX_CONSISTENCY not in
6707
        constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
6708
      raise errors.OpPrereqError("Storage units of type '%s' can not be"
6709
                                 " repaired" % storage_type)
6710

    
6711
    # Check whether any instance on this node has faulty disks
6712
    for inst in _GetNodeInstances(self.cfg, self.op.node_name):
6713
      check_nodes = set(inst.all_nodes)
6714
      check_nodes.discard(self.op.node_name)
6715
      for inst_node_name in check_nodes:
6716
        self._CheckFaultyDisks(inst, inst_node_name)
6717

    
6718
  def Exec(self, feedback_fn):
6719
    feedback_fn("Repairing storage unit '%s' on %s ..." %
6720
                (self.op.name, self.op.node_name))
6721

    
6722
    st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
6723
    result = self.rpc.call_storage_execute(self.op.node_name,
6724
                                           self.op.storage_type, st_args,
6725
                                           self.op.name,
6726
                                           constants.SO_FIX_CONSISTENCY)
6727
    result.Raise("Failed to repair storage unit '%s' on %s" %
6728
                 (self.op.name, self.op.node_name))
6729

    
6730

    
6731
class LUGrowDisk(LogicalUnit):
6732
  """Grow a disk of an instance.
6733

6734
  """
6735
  HPATH = "disk-grow"
6736
  HTYPE = constants.HTYPE_INSTANCE
6737
  _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
6738
  REQ_BGL = False
6739

    
6740
  def ExpandNames(self):
6741
    self._ExpandAndLockInstance()
6742
    self.needed_locks[locking.LEVEL_NODE] = []
6743
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6744

    
6745
  def DeclareLocks(self, level):
6746
    if level == locking.LEVEL_NODE:
6747
      self._LockInstancesNodes()
6748

    
6749
  def BuildHooksEnv(self):
6750
    """Build hooks env.
6751

6752
    This runs on the master, the primary and all the secondaries.
6753

6754
    """
6755
    env = {
6756
      "DISK": self.op.disk,
6757
      "AMOUNT": self.op.amount,
6758
      }
6759
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6760
    nl = [
6761
      self.cfg.GetMasterNode(),
6762
      self.instance.primary_node,
6763
      ]
6764
    return env, nl, nl
6765

    
6766
  def CheckPrereq(self):
6767
    """Check prerequisites.
6768

6769
    This checks that the instance is in the cluster.
6770

6771
    """
6772
    instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6773
    assert instance is not None, \
6774
      "Cannot retrieve locked instance %s" % self.op.instance_name
6775
    nodenames = list(instance.all_nodes)
6776
    for node in nodenames:
6777
      _CheckNodeOnline(self, node)
6778

    
6779

    
6780
    self.instance = instance
6781

    
6782
    if instance.disk_template not in (constants.DT_PLAIN, constants.DT_DRBD8):
6783
      raise errors.OpPrereqError("Instance's disk layout does not support"
6784
                                 " growing.")
6785

    
6786
    self.disk = instance.FindDisk(self.op.disk)
6787

    
6788
    nodeinfo = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
6789
                                       instance.hypervisor)
6790
    for node in nodenames:
6791
      info = nodeinfo[node]
6792
      info.Raise("Cannot get current information from node %s" % node)
6793
      vg_free = info.payload.get('vg_free', None)
6794
      if not isinstance(vg_free, int):
6795
        raise errors.OpPrereqError("Can't compute free disk space on"
6796
                                   " node %s" % node)
6797
      if self.op.amount > vg_free:
6798
        raise errors.OpPrereqError("Not enough disk space on target node %s:"
6799
                                   " %d MiB available, %d MiB required" %
6800
                                   (node, vg_free, self.op.amount))
6801

    
6802
  def Exec(self, feedback_fn):
6803
    """Execute disk grow.
6804

6805
    """
6806
    instance = self.instance
6807
    disk = self.disk
6808
    for node in instance.all_nodes:
6809
      self.cfg.SetDiskID(disk, node)
6810
      result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
6811
      result.Raise("Grow request failed to node %s" % node)
6812
    disk.RecordGrow(self.op.amount)
6813
    self.cfg.Update(instance)
6814
    if self.op.wait_for_sync:
6815
      disk_abort = not _WaitForSync(self, instance)
6816
      if disk_abort:
6817
        self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
6818
                             " status.\nPlease check the instance.")
6819

    
6820

    
6821
class LUQueryInstanceData(NoHooksLU):
6822
  """Query runtime instance data.
6823

6824
  """
6825
  _OP_REQP = ["instances", "static"]
6826
  REQ_BGL = False
6827

    
6828
  def ExpandNames(self):
6829
    self.needed_locks = {}
6830
    self.share_locks = dict.fromkeys(locking.LEVELS, 1)
6831

    
6832
    if not isinstance(self.op.instances, list):
6833
      raise errors.OpPrereqError("Invalid argument type 'instances'")
6834

    
6835
    if self.op.instances:
6836
      self.wanted_names = []
6837
      for name in self.op.instances:
6838
        full_name = self.cfg.ExpandInstanceName(name)
6839
        if full_name is None:
6840
          raise errors.OpPrereqError("Instance '%s' not known" % name)
6841
        self.wanted_names.append(full_name)
6842
      self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
6843
    else:
6844
      self.wanted_names = None
6845
      self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
6846

    
6847
    self.needed_locks[locking.LEVEL_NODE] = []
6848
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6849

    
6850
  def DeclareLocks(self, level):
6851
    if level == locking.LEVEL_NODE:
6852
      self._LockInstancesNodes()
6853

    
6854
  def CheckPrereq(self):
6855
    """Check prerequisites.
6856

6857
    This only checks the optional instance list against the existing names.
6858

6859
    """
6860
    if self.wanted_names is None:
6861
      self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
6862

    
6863
    self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
6864
                             in self.wanted_names]
6865
    return
6866

    
6867
  def _ComputeBlockdevStatus(self, node, instance_name, dev):
6868
    """Returns the status of a block device
6869

6870
    """
6871
    if self.op.static or not node:
6872
      return None
6873

    
6874
    self.cfg.SetDiskID(dev, node)
6875

    
6876
    result = self.rpc.call_blockdev_find(node, dev)
6877
    if result.offline:
6878
      return None
6879

    
6880
    result.Raise("Can't compute disk status for %s" % instance_name)
6881

    
6882
    status = result.payload
6883
    if status is None:
6884
      return None
6885

    
6886
    return (status.dev_path, status.major, status.minor,
6887
            status.sync_percent, status.estimated_time,
6888
            status.is_degraded, status.ldisk_status)
6889

    
6890
  def _ComputeDiskStatus(self, instance, snode, dev):
6891
    """Compute block device status.
6892

6893
    """
6894
    if dev.dev_type in constants.LDS_DRBD:
6895
      # we change the snode then (otherwise we use the one passed in)
6896
      if dev.logical_id[0] == instance.primary_node:
6897
        snode = dev.logical_id[1]
6898
      else:
6899
        snode = dev.logical_id[0]
6900

    
6901
    dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
6902
                                              instance.name, dev)
6903
    dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
6904

    
6905
    if dev.children:
6906
      dev_children = [self._ComputeDiskStatus(instance, snode, child)
6907
                      for child in dev.children]
6908
    else:
6909
      dev_children = []
6910

    
6911
    data = {
6912
      "iv_name": dev.iv_name,
6913
      "dev_type": dev.dev_type,
6914
      "logical_id": dev.logical_id,
6915
      "physical_id": dev.physical_id,
6916
      "pstatus": dev_pstatus,
6917
      "sstatus": dev_sstatus,
6918
      "children": dev_children,
6919
      "mode": dev.mode,
6920
      "size": dev.size,
6921
      }
6922

    
6923
    return data
6924

    
6925
  def Exec(self, feedback_fn):
6926
    """Gather and return data"""
6927
    result = {}
6928

    
6929
    cluster = self.cfg.GetClusterInfo()
6930

    
6931
    for instance in self.wanted_instances:
6932
      if not self.op.static:
6933
        remote_info = self.rpc.call_instance_info(instance.primary_node,
6934
                                                  instance.name,
6935
                                                  instance.hypervisor)
6936
        remote_info.Raise("Error checking node %s" % instance.primary_node)
6937
        remote_info = remote_info.payload
6938
        if remote_info and "state" in remote_info:
6939
          remote_state = "up"
6940
        else:
6941
          remote_state = "down"
6942
      else:
6943
        remote_state = None
6944
      if instance.admin_up:
6945
        config_state = "up"
6946
      else:
6947
        config_state = "down"
6948

    
6949
      disks = [self._ComputeDiskStatus(instance, None, device)
6950
               for device in instance.disks]
6951

    
6952
      idict = {
6953
        "name": instance.name,
6954
        "config_state": config_state,
6955
        "run_state": remote_state,
6956
        "pnode": instance.primary_node,
6957
        "snodes": instance.secondary_nodes,
6958
        "os": instance.os,
6959
        # this happens to be the same format used for hooks
6960
        "nics": _NICListToTuple(self, instance.nics),
6961
        "disks": disks,
6962
        "hypervisor": instance.hypervisor,
6963
        "network_port": instance.network_port,
6964
        "hv_instance": instance.hvparams,
6965
        "hv_actual": cluster.FillHV(instance),
6966
        "be_instance": instance.beparams,
6967
        "be_actual": cluster.FillBE(instance),
6968
        "serial_no": instance.serial_no,
6969
        "mtime": instance.mtime,
6970
        "ctime": instance.ctime,
6971
        }
6972

    
6973
      result[instance.name] = idict
6974

    
6975
    return result
6976

    
6977

    
6978
class LUSetInstanceParams(LogicalUnit):
6979
  """Modifies an instances's parameters.
6980

6981
  """
6982
  HPATH = "instance-modify"
6983
  HTYPE = constants.HTYPE_INSTANCE
6984
  _OP_REQP = ["instance_name"]
6985
  REQ_BGL = False
6986

    
6987
  def CheckArguments(self):
6988
    if not hasattr(self.op, 'nics'):
6989
      self.op.nics = []
6990
    if not hasattr(self.op, 'disks'):
6991
      self.op.disks = []
6992
    if not hasattr(self.op, 'beparams'):
6993
      self.op.beparams = {}
6994
    if not hasattr(self.op, 'hvparams'):
6995
      self.op.hvparams = {}
6996
    self.op.force = getattr(self.op, "force", False)
6997
    if not (self.op.nics or self.op.disks or
6998
            self.op.hvparams or self.op.beparams):
6999
      raise errors.OpPrereqError("No changes submitted")
7000

    
7001
    # Disk validation
7002
    disk_addremove = 0
7003
    for disk_op, disk_dict in self.op.disks:
7004
      if disk_op == constants.DDM_REMOVE:
7005
        disk_addremove += 1
7006
        continue
7007
      elif disk_op == constants.DDM_ADD:
7008
        disk_addremove += 1
7009
      else:
7010
        if not isinstance(disk_op, int):
7011
          raise errors.OpPrereqError("Invalid disk index")
7012
        if not isinstance(disk_dict, dict):
7013
          msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
7014
          raise errors.OpPrereqError(msg)
7015

    
7016
      if disk_op == constants.DDM_ADD:
7017
        mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
7018
        if mode not in constants.DISK_ACCESS_SET:
7019
          raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode)
7020
        size = disk_dict.get('size', None)
7021
        if size is None:
7022
          raise errors.OpPrereqError("Required disk parameter size missing")
7023
        try:
7024
          size = int(size)
7025
        except ValueError, err:
7026
          raise errors.OpPrereqError("Invalid disk size parameter: %s" %
7027
                                     str(err))
7028
        disk_dict['size'] = size
7029
      else:
7030
        # modification of disk
7031
        if 'size' in disk_dict:
7032
          raise errors.OpPrereqError("Disk size change not possible, use"
7033
                                     " grow-disk")
7034

    
7035
    if disk_addremove > 1:
7036
      raise errors.OpPrereqError("Only one disk add or remove operation"
7037
                                 " supported at a time")
7038

    
7039
    # NIC validation
7040
    nic_addremove = 0
7041
    for nic_op, nic_dict in self.op.nics:
7042
      if nic_op == constants.DDM_REMOVE:
7043
        nic_addremove += 1
7044
        continue
7045
      elif nic_op == constants.DDM_ADD:
7046
        nic_addremove += 1
7047
      else:
7048
        if not isinstance(nic_op, int):
7049
          raise errors.OpPrereqError("Invalid nic index")
7050
        if not isinstance(nic_dict, dict):
7051
          msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
7052
          raise errors.OpPrereqError(msg)
7053

    
7054
      # nic_dict should be a dict
7055
      nic_ip = nic_dict.get('ip', None)
7056
      if nic_ip is not None:
7057
        if nic_ip.lower() == constants.VALUE_NONE:
7058
          nic_dict['ip'] = None
7059
        else:
7060
          if not utils.IsValidIP(nic_ip):
7061
            raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip)
7062

    
7063
      nic_bridge = nic_dict.get('bridge', None)
7064
      nic_link = nic_dict.get('link', None)
7065
      if nic_bridge and nic_link:
7066
        raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7067
                                   " at the same time")
7068
      elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
7069
        nic_dict['bridge'] = None
7070
      elif nic_link and nic_link.lower() == constants.VALUE_NONE:
7071
        nic_dict['link'] = None
7072

    
7073
      if nic_op == constants.DDM_ADD:
7074
        nic_mac = nic_dict.get('mac', None)
7075
        if nic_mac is None:
7076
          nic_dict['mac'] = constants.VALUE_AUTO
7077

    
7078
      if 'mac' in nic_dict:
7079
        nic_mac = nic_dict['mac']
7080
        if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7081
          if not utils.IsValidMac(nic_mac):
7082
            raise errors.OpPrereqError("Invalid MAC address %s" % nic_mac)
7083
        if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
7084
          raise errors.OpPrereqError("'auto' is not a valid MAC address when"
7085
                                     " modifying an existing nic")
7086

    
7087
    if nic_addremove > 1:
7088
      raise errors.OpPrereqError("Only one NIC add or remove operation"
7089
                                 " supported at a time")
7090

    
7091
  def ExpandNames(self):
7092
    self._ExpandAndLockInstance()
7093
    self.needed_locks[locking.LEVEL_NODE] = []
7094
    self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7095

    
7096
  def DeclareLocks(self, level):
7097
    if level == locking.LEVEL_NODE:
7098
      self._LockInstancesNodes()
7099

    
7100
  def BuildHooksEnv(self):
7101
    """Build hooks env.
7102

7103
    This runs on the master, primary and secondaries.
7104

7105
    """
7106
    args = dict()
7107
    if constants.BE_MEMORY in self.be_new:
7108
      args['memory'] = self.be_new[constants.BE_MEMORY]
7109
    if constants.BE_VCPUS in self.be_new:
7110
      args['vcpus'] = self.be_new[constants.BE_VCPUS]
7111
    # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
7112
    # information at all.
7113
    if self.op.nics:
7114
      args['nics'] = []
7115
      nic_override = dict(self.op.nics)
7116
      c_nicparams = self.cluster.nicparams[constants.PP_DEFAULT]
7117
      for idx, nic in enumerate(self.instance.nics):
7118
        if idx in nic_override:
7119
          this_nic_override = nic_override[idx]
7120
        else:
7121
          this_nic_override = {}
7122
        if 'ip' in this_nic_override:
7123
          ip = this_nic_override['ip']
7124
        else:
7125
          ip = nic.ip
7126
        if 'mac' in this_nic_override:
7127
          mac = this_nic_override['mac']
7128
        else:
7129
          mac = nic.mac
7130
        if idx in self.nic_pnew:
7131
          nicparams = self.nic_pnew[idx]
7132
        else:
7133
          nicparams = objects.FillDict(c_nicparams, nic.nicparams)
7134
        mode = nicparams[constants.NIC_MODE]
7135
        link = nicparams[constants.NIC_LINK]
7136
        args['nics'].append((ip, mac, mode, link))
7137
      if constants.DDM_ADD in nic_override:
7138
        ip = nic_override[constants.DDM_ADD].get('ip', None)
7139
        mac = nic_override[constants.DDM_ADD]['mac']
7140
        nicparams = self.nic_pnew[constants.DDM_ADD]
7141
        mode = nicparams[constants.NIC_MODE]
7142
        link = nicparams[constants.NIC_LINK]
7143
        args['nics'].append((ip, mac, mode, link))
7144
      elif constants.DDM_REMOVE in nic_override:
7145
        del args['nics'][-1]
7146

    
7147
    env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
7148
    nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
7149
    return env, nl, nl
7150

    
7151
  def _GetUpdatedParams(self, old_params, update_dict,
7152
                        default_values, parameter_types):
7153
    """Return the new params dict for the given params.
7154

7155
    @type old_params: dict
7156
    @param old_params: old parameters
7157
    @type update_dict: dict
7158
    @param update_dict: dict containing new parameter values,
7159
                        or constants.VALUE_DEFAULT to reset the
7160
                        parameter to its default value
7161
    @type default_values: dict
7162
    @param default_values: default values for the filled parameters
7163
    @type parameter_types: dict
7164
    @param parameter_types: dict mapping target dict keys to types
7165
                            in constants.ENFORCEABLE_TYPES
7166
    @rtype: (dict, dict)
7167
    @return: (new_parameters, filled_parameters)
7168

7169
    """
7170
    params_copy = copy.deepcopy(old_params)
7171
    for key, val in update_dict.iteritems():
7172
      if val == constants.VALUE_DEFAULT:
7173
        try:
7174
          del params_copy[key]
7175
        except KeyError:
7176
          pass
7177
      else:
7178
        params_copy[key] = val
7179
    utils.ForceDictType(params_copy, parameter_types)
7180
    params_filled = objects.FillDict(default_values, params_copy)
7181
    return (params_copy, params_filled)
7182

    
7183
  def CheckPrereq(self):
7184
    """Check prerequisites.
7185

7186
    This only checks the instance list against the existing names.
7187

7188
    """
7189
    self.force = self.op.force
7190

    
7191
    # checking the new params on the primary/secondary nodes
7192

    
7193
    instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7194
    cluster = self.cluster = self.cfg.GetClusterInfo()
7195
    assert self.instance is not None, \
7196
      "Cannot retrieve locked instance %s" % self.op.instance_name
7197
    pnode = instance.primary_node
7198
    nodelist = list(instance.all_nodes)
7199

    
7200
    # hvparams processing
7201
    if self.op.hvparams:
7202
      i_hvdict, hv_new = self._GetUpdatedParams(
7203
                             instance.hvparams, self.op.hvparams,
7204
                             cluster.hvparams[instance.hypervisor],
7205
                             constants.HVS_PARAMETER_TYPES)
7206
      # local check
7207
      hypervisor.GetHypervisor(
7208
        instance.hypervisor).CheckParameterSyntax(hv_new)
7209
      _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
7210
      self.hv_new = hv_new # the new actual values
7211
      self.hv_inst = i_hvdict # the new dict (without defaults)
7212
    else:
7213
      self.hv_new = self.hv_inst = {}
7214

    
7215
    # beparams processing
7216
    if self.op.beparams:
7217
      i_bedict, be_new = self._GetUpdatedParams(
7218
                             instance.beparams, self.op.beparams,
7219
                             cluster.beparams[constants.PP_DEFAULT],
7220
                             constants.BES_PARAMETER_TYPES)
7221
      self.be_new = be_new # the new actual values
7222
      self.be_inst = i_bedict # the new dict (without defaults)
7223
    else:
7224
      self.be_new = self.be_inst = {}
7225

    
7226
    self.warn = []
7227

    
7228
    if constants.BE_MEMORY in self.op.beparams and not self.force:
7229
      mem_check_list = [pnode]
7230
      if be_new[constants.BE_AUTO_BALANCE]:
7231
        # either we changed auto_balance to yes or it was from before
7232
        mem_check_list.extend(instance.secondary_nodes)
7233
      instance_info = self.rpc.call_instance_info(pnode, instance.name,
7234
                                                  instance.hypervisor)
7235
      nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
7236
                                         instance.hypervisor)
7237
      pninfo = nodeinfo[pnode]
7238
      msg = pninfo.fail_msg
7239
      if msg:
7240
        # Assume the primary node is unreachable and go ahead
7241
        self.warn.append("Can't get info from primary node %s: %s" %
7242
                         (pnode,  msg))
7243
      elif not isinstance(pninfo.payload.get('memory_free', None), int):
7244
        self.warn.append("Node data from primary node %s doesn't contain"
7245
                         " free memory information" % pnode)
7246
      elif instance_info.fail_msg:
7247
        self.warn.append("Can't get instance runtime information: %s" %
7248
                        instance_info.fail_msg)
7249
      else:
7250
        if instance_info.payload:
7251
          current_mem = int(instance_info.payload['memory'])
7252
        else:
7253
          # Assume instance not running
7254
          # (there is a slight race condition here, but it's not very probable,
7255
          # and we have no other way to check)
7256
          current_mem = 0
7257
        miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
7258
                    pninfo.payload['memory_free'])
7259
        if miss_mem > 0:
7260
          raise errors.OpPrereqError("This change will prevent the instance"
7261
                                     " from starting, due to %d MB of memory"
7262
                                     " missing on its primary node" % miss_mem)
7263

    
7264
      if be_new[constants.BE_AUTO_BALANCE]:
7265
        for node, nres in nodeinfo.items():
7266
          if node not in instance.secondary_nodes:
7267
            continue
7268
          msg = nres.fail_msg
7269
          if msg:
7270
            self.warn.append("Can't get info from secondary node %s: %s" %
7271
                             (node, msg))
7272
          elif not isinstance(nres.payload.get('memory_free', None), int):
7273
            self.warn.append("Secondary node %s didn't return free"
7274
                             " memory information" % node)
7275
          elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
7276
            self.warn.append("Not enough memory to failover instance to"
7277
                             " secondary node %s" % node)
7278

    
7279
    # NIC processing
7280
    self.nic_pnew = {}
7281
    self.nic_pinst = {}
7282
    for nic_op, nic_dict in self.op.nics:
7283
      if nic_op == constants.DDM_REMOVE:
7284
        if not instance.nics:
7285
          raise errors.OpPrereqError("Instance has no NICs, cannot remove")
7286
        continue
7287
      if nic_op != constants.DDM_ADD:
7288
        # an existing nic
7289
        if nic_op < 0 or nic_op >= len(instance.nics):
7290
          raise errors.OpPrereqError("Invalid NIC index %s, valid values"
7291
                                     " are 0 to %d" %
7292
                                     (nic_op, len(instance.nics)))
7293
        old_nic_params = instance.nics[nic_op].nicparams
7294
        old_nic_ip = instance.nics[nic_op].ip
7295
      else:
7296
        old_nic_params = {}
7297
        old_nic_ip = None
7298

    
7299
      update_params_dict = dict([(key, nic_dict[key])
7300
                                 for key in constants.NICS_PARAMETERS
7301
                                 if key in nic_dict])
7302

    
7303
      if 'bridge' in nic_dict:
7304
        update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
7305

    
7306
      new_nic_params, new_filled_nic_params = \
7307
          self._GetUpdatedParams(old_nic_params, update_params_dict,
7308
                                 cluster.nicparams[constants.PP_DEFAULT],
7309
                                 constants.NICS_PARAMETER_TYPES)
7310
      objects.NIC.CheckParameterSyntax(new_filled_nic_params)
7311
      self.nic_pinst[nic_op] = new_nic_params
7312
      self.nic_pnew[nic_op] = new_filled_nic_params
7313
      new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
7314

    
7315
      if new_nic_mode == constants.NIC_MODE_BRIDGED:
7316
        nic_bridge = new_filled_nic_params[constants.NIC_LINK]
7317
        msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
7318
        if msg:
7319
          msg = "Error checking bridges on node %s: %s" % (pnode, msg)
7320
          if self.force:
7321
            self.warn.append(msg)
7322
          else:
7323
            raise errors.OpPrereqError(msg)
7324
      if new_nic_mode == constants.NIC_MODE_ROUTED:
7325
        if 'ip' in nic_dict:
7326
          nic_ip = nic_dict['ip']
7327
        else:
7328
          nic_ip = old_nic_ip
7329
        if nic_ip is None:
7330
          raise errors.OpPrereqError('Cannot set the nic ip to None'
7331
                                     ' on a routed nic')
7332
      if 'mac' in nic_dict:
7333
        nic_mac = nic_dict['mac']
7334
        if nic_mac is None:
7335
          raise errors.OpPrereqError('Cannot set the nic mac to None')
7336
        elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7337
          # otherwise generate the mac
7338
          nic_dict['mac'] = self.cfg.GenerateMAC()
7339
        else:
7340
          # or validate/reserve the current one
7341
          if self.cfg.IsMacInUse(nic_mac):
7342
            raise errors.OpPrereqError("MAC address %s already in use"
7343
                                       " in cluster" % nic_mac)
7344

    
7345
    # DISK processing
7346
    if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
7347
      raise errors.OpPrereqError("Disk operations not supported for"
7348
                                 " diskless instances")
7349
    for disk_op, disk_dict in self.op.disks:
7350
      if disk_op == constants.DDM_REMOVE:
7351
        if len(instance.disks) == 1:
7352
          raise errors.OpPrereqError("Cannot remove the last disk of"
7353
                                     " an instance")
7354
        ins_l = self.rpc.call_instance_list([pnode], [instance.hypervisor])
7355
        ins_l = ins_l[pnode]
7356
        msg = ins_l.fail_msg
7357
        if msg:
7358
          raise errors.OpPrereqError("Can't contact node %s: %s" %
7359
                                     (pnode, msg))
7360
        if instance.name in ins_l.payload:
7361
          raise errors.OpPrereqError("Instance is running, can't remove"
7362
                                     " disks.")
7363

    
7364
      if (disk_op == constants.DDM_ADD and
7365
          len(instance.nics) >= constants.MAX_DISKS):
7366
        raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
7367
                                   " add more" % constants.MAX_DISKS)
7368
      if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
7369
        # an existing disk
7370
        if disk_op < 0 or disk_op >= len(instance.disks):
7371
          raise errors.OpPrereqError("Invalid disk index %s, valid values"
7372
                                     " are 0 to %d" %
7373
                                     (disk_op, len(instance.disks)))
7374

    
7375
    return
7376

    
7377
  def Exec(self, feedback_fn):
7378
    """Modifies an instance.
7379

7380
    All parameters take effect only at the next restart of the instance.
7381

7382
    """
7383
    # Process here the warnings from CheckPrereq, as we don't have a
7384
    # feedback_fn there.
7385
    for warn in self.warn:
7386
      feedback_fn("WARNING: %s" % warn)
7387

    
7388
    result = []
7389
    instance = self.instance
7390
    cluster = self.cluster
7391
    # disk changes
7392
    for disk_op, disk_dict in self.op.disks:
7393
      if disk_op == constants.DDM_REMOVE:
7394
        # remove the last disk
7395
        device = instance.disks.pop()
7396
        device_idx = len(instance.disks)
7397
        for node, disk in device.ComputeNodeTree(instance.primary_node):
7398
          self.cfg.SetDiskID(disk, node)
7399
          msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
7400
          if msg:
7401
            self.LogWarning("Could not remove disk/%d on node %s: %s,"
7402
                            " continuing anyway", device_idx, node, msg)
7403
        result.append(("disk/%d" % device_idx, "remove"))
7404
      elif disk_op == constants.DDM_ADD:
7405
        # add a new disk
7406
        if instance.disk_template == constants.DT_FILE:
7407
          file_driver, file_path = instance.disks[0].logical_id
7408
          file_path = os.path.dirname(file_path)
7409
        else:
7410
          file_driver = file_path = None
7411
        disk_idx_base = len(instance.disks)
7412
        new_disk = _GenerateDiskTemplate(self,
7413
                                         instance.disk_template,
7414
                                         instance.name, instance.primary_node,
7415
                                         instance.secondary_nodes,
7416
                                         [disk_dict],
7417
                                         file_path,
7418
                                         file_driver,
7419
                                         disk_idx_base)[0]
7420
        instance.disks.append(new_disk)
7421
        info = _GetInstanceInfoText(instance)
7422

    
7423
        logging.info("Creating volume %s for instance %s",
7424
                     new_disk.iv_name, instance.name)
7425
        # Note: this needs to be kept in sync with _CreateDisks
7426
        #HARDCODE
7427
        for node in instance.all_nodes:
7428
          f_create = node == instance.primary_node
7429
          try:
7430
            _CreateBlockDev(self, node, instance, new_disk,
7431
                            f_create, info, f_create)
7432
          except errors.OpExecError, err:
7433
            self.LogWarning("Failed to create volume %s (%s) on"
7434
                            " node %s: %s",
7435
                            new_disk.iv_name, new_disk, node, err)
7436
        result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
7437
                       (new_disk.size, new_disk.mode)))
7438
      else:
7439
        # change a given disk
7440
        instance.disks[disk_op].mode = disk_dict['mode']
7441
        result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
7442
    # NIC changes
7443
    for nic_op, nic_dict in self.op.nics:
7444
      if nic_op == constants.DDM_REMOVE:
7445
        # remove the last nic
7446
        del instance.nics[-1]
7447
        result.append(("nic.%d" % len(instance.nics), "remove"))
7448
      elif nic_op == constants.DDM_ADD:
7449
        # mac and bridge should be set, by now
7450
        mac = nic_dict['mac']
7451
        ip = nic_dict.get('ip', None)
7452
        nicparams = self.nic_pinst[constants.DDM_ADD]
7453
        new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
7454
        instance.nics.append(new_nic)
7455
        result.append(("nic.%d" % (len(instance.nics) - 1),
7456
                       "add:mac=%s,ip=%s,mode=%s,link=%s" %
7457
                       (new_nic.mac, new_nic.ip,
7458
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
7459
                        self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
7460
                       )))
7461
      else:
7462
        for key in 'mac', 'ip':
7463
          if key in nic_dict:
7464
            setattr(instance.nics[nic_op], key, nic_dict[key])
7465
        if nic_op in self.nic_pnew:
7466
          instance.nics[nic_op].nicparams = self.nic_pnew[nic_op]
7467
        for key, val in nic_dict.iteritems():
7468
          result.append(("nic.%s/%d" % (key, nic_op), val))
7469

    
7470
    # hvparams changes
7471
    if self.op.hvparams:
7472
      instance.hvparams = self.hv_inst
7473
      for key, val in self.op.hvparams.iteritems():
7474
        result.append(("hv/%s" % key, val))
7475

    
7476
    # beparams changes
7477
    if self.op.beparams:
7478
      instance.beparams = self.be_inst
7479
      for key, val in self.op.beparams.iteritems():
7480
        result.append(("be/%s" % key, val))
7481

    
7482
    self.cfg.Update(instance)
7483

    
7484
    return result
7485

    
7486

    
7487
class LUQueryExports(NoHooksLU):
7488
  """Query the exports list
7489

7490
  """
7491
  _OP_REQP = ['nodes']
7492
  REQ_BGL = False
7493

    
7494
  def ExpandNames(self):
7495
    self.needed_locks = {}
7496
    self.share_locks[locking.LEVEL_NODE] = 1
7497
    if not self.op.nodes:
7498
      self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7499
    else:
7500
      self.needed_locks[locking.LEVEL_NODE] = \
7501
        _GetWantedNodes(self, self.op.nodes)
7502

    
7503
  def CheckPrereq(self):
7504
    """Check prerequisites.
7505

7506
    """
7507
    self.nodes = self.acquired_locks[locking.LEVEL_NODE]
7508

    
7509
  def Exec(self, feedback_fn):
7510
    """Compute the list of all the exported system images.
7511

7512
    @rtype: dict
7513
    @return: a dictionary with the structure node->(export-list)
7514
        where export-list is a list of the instances exported on
7515
        that node.
7516

7517
    """
7518
    rpcresult = self.rpc.call_export_list(self.nodes)
7519
    result = {}
7520
    for node in rpcresult:
7521
      if rpcresult[node].fail_msg:
7522
        result[node] = False
7523
      else:
7524
        result[node] = rpcresult[node].payload
7525

    
7526
    return result
7527

    
7528

    
7529
class LUExportInstance(LogicalUnit):
7530
  """Export an instance to an image in the cluster.
7531

7532
  """
7533
  HPATH = "instance-export"
7534
  HTYPE = constants.HTYPE_INSTANCE
7535
  _OP_REQP = ["instance_name", "target_node", "shutdown"]
7536
  REQ_BGL = False
7537

    
7538
  def ExpandNames(self):
7539
    self._ExpandAndLockInstance()
7540
    # FIXME: lock only instance primary and destination node
7541
    #
7542
    # Sad but true, for now we have do lock all nodes, as we don't know where
7543
    # the previous export might be, and and in this LU we search for it and
7544
    # remove it from its current node. In the future we could fix this by:
7545
    #  - making a tasklet to search (share-lock all), then create the new one,
7546
    #    then one to remove, after
7547
    #  - removing the removal operation altogether
7548
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7549

    
7550
  def DeclareLocks(self, level):
7551
    """Last minute lock declaration."""
7552
    # All nodes are locked anyway, so nothing to do here.
7553

    
7554
  def BuildHooksEnv(self):
7555
    """Build hooks env.
7556

7557
    This will run on the master, primary node and target node.
7558

7559
    """
7560
    env = {
7561
      "EXPORT_NODE": self.op.target_node,
7562
      "EXPORT_DO_SHUTDOWN": self.op.shutdown,
7563
      }
7564
    env.update(_BuildInstanceHookEnvByObject(self, self.instance))
7565
    nl = [self.cfg.GetMasterNode(), self.instance.primary_node,
7566
          self.op.target_node]
7567
    return env, nl, nl
7568

    
7569
  def CheckPrereq(self):
7570
    """Check prerequisites.
7571

7572
    This checks that the instance and node names are valid.
7573

7574
    """
7575
    instance_name = self.op.instance_name
7576
    self.instance = self.cfg.GetInstanceInfo(instance_name)
7577
    assert self.instance is not None, \
7578
          "Cannot retrieve locked instance %s" % self.op.instance_name
7579
    _CheckNodeOnline(self, self.instance.primary_node)
7580

    
7581
    self.dst_node = self.cfg.GetNodeInfo(
7582
      self.cfg.ExpandNodeName(self.op.target_node))
7583

    
7584
    if self.dst_node is None:
7585
      # This is wrong node name, not a non-locked node
7586
      raise errors.OpPrereqError("Wrong node name %s" % self.op.target_node)
7587
    _CheckNodeOnline(self, self.dst_node.name)
7588
    _CheckNodeNotDrained(self, self.dst_node.name)
7589

    
7590
    # instance disk type verification
7591
    for disk in self.instance.disks:
7592
      if disk.dev_type == constants.LD_FILE:
7593
        raise errors.OpPrereqError("Export not supported for instances with"
7594
                                   " file-based disks")
7595

    
7596
  def Exec(self, feedback_fn):
7597
    """Export an instance to an image in the cluster.
7598

7599
    """
7600
    instance = self.instance
7601
    dst_node = self.dst_node
7602
    src_node = instance.primary_node
7603

    
7604
    if self.op.shutdown:
7605
      # shutdown the instance, but not the disks
7606
      feedback_fn("Shutting down instance %s" % instance.name)
7607
      result = self.rpc.call_instance_shutdown(src_node, instance)
7608
      result.Raise("Could not shutdown instance %s on"
7609
                   " node %s" % (instance.name, src_node))
7610

    
7611
    vgname = self.cfg.GetVGName()
7612

    
7613
    snap_disks = []
7614

    
7615
    # set the disks ID correctly since call_instance_start needs the
7616
    # correct drbd minor to create the symlinks
7617
    for disk in instance.disks:
7618
      self.cfg.SetDiskID(disk, src_node)
7619

    
7620
    # per-disk results
7621
    dresults = []
7622
    try:
7623
      for idx, disk in enumerate(instance.disks):
7624
        feedback_fn("Creating a snapshot of disk/%s on node %s" %
7625
                    (idx, src_node))
7626

    
7627
        # result.payload will be a snapshot of an lvm leaf of the one we passed
7628
        result = self.rpc.call_blockdev_snapshot(src_node, disk)
7629
        msg = result.fail_msg
7630
        if msg:
7631
          self.LogWarning("Could not snapshot disk/%s on node %s: %s",
7632
                          idx, src_node, msg)
7633
          snap_disks.append(False)
7634
        else:
7635
          disk_id = (vgname, result.payload)
7636
          new_dev = objects.Disk(dev_type=constants.LD_LV, size=disk.size,
7637
                                 logical_id=disk_id, physical_id=disk_id,
7638
                                 iv_name=disk.iv_name)
7639
          snap_disks.append(new_dev)
7640

    
7641
    finally:
7642
      if self.op.shutdown and instance.admin_up:
7643
        feedback_fn("Starting instance %s" % instance.name)
7644
        result = self.rpc.call_instance_start(src_node, instance, None, None)
7645
        msg = result.fail_msg
7646
        if msg:
7647
          _ShutdownInstanceDisks(self, instance)
7648
          raise errors.OpExecError("Could not start instance: %s" % msg)
7649

    
7650
    # TODO: check for size
7651

    
7652
    cluster_name = self.cfg.GetClusterName()
7653
    for idx, dev in enumerate(snap_disks):
7654
      feedback_fn("Exporting snapshot %s from %s to %s" %
7655
                  (idx, src_node, dst_node.name))
7656
      if dev:
7657
        result = self.rpc.call_snapshot_export(src_node, dev, dst_node.name,
7658
                                               instance, cluster_name, idx)
7659
        msg = result.fail_msg
7660
        if msg:
7661
          self.LogWarning("Could not export disk/%s from node %s to"
7662
                          " node %s: %s", idx, src_node, dst_node.name, msg)
7663
          dresults.append(False)
7664
        else:
7665
          dresults.append(True)
7666
        msg = self.rpc.call_blockdev_remove(src_node, dev).fail_msg
7667
        if msg:
7668
          self.LogWarning("Could not remove snapshot for disk/%d from node"
7669
                          " %s: %s", idx, src_node, msg)
7670
      else:
7671
        dresults.append(False)
7672

    
7673
    feedback_fn("Finalizing export on %s" % dst_node.name)
7674
    result = self.rpc.call_finalize_export(dst_node.name, instance, snap_disks)
7675
    fin_resu = True
7676
    msg = result.fail_msg
7677
    if msg:
7678
      self.LogWarning("Could not finalize export for instance %s"
7679
                      " on node %s: %s", instance.name, dst_node.name, msg)
7680
      fin_resu = False
7681

    
7682
    nodelist = self.cfg.GetNodeList()
7683
    nodelist.remove(dst_node.name)
7684

    
7685
    # on one-node clusters nodelist will be empty after the removal
7686
    # if we proceed the backup would be removed because OpQueryExports
7687
    # substitutes an empty list with the full cluster node list.
7688
    iname = instance.name
7689
    if nodelist:
7690
      feedback_fn("Removing old exports for instance %s" % iname)
7691
      exportlist = self.rpc.call_export_list(nodelist)
7692
      for node in exportlist:
7693
        if exportlist[node].fail_msg:
7694
          continue
7695
        if iname in exportlist[node].payload:
7696
          msg = self.rpc.call_export_remove(node, iname).fail_msg
7697
          if msg:
7698
            self.LogWarning("Could not remove older export for instance %s"
7699
                            " on node %s: %s", iname, node, msg)
7700
    return fin_resu, dresults
7701

    
7702

    
7703
class LURemoveExport(NoHooksLU):
7704
  """Remove exports related to the named instance.
7705

7706
  """
7707
  _OP_REQP = ["instance_name"]
7708
  REQ_BGL = False
7709

    
7710
  def ExpandNames(self):
7711
    self.needed_locks = {}
7712
    # We need all nodes to be locked in order for RemoveExport to work, but we
7713
    # don't need to lock the instance itself, as nothing will happen to it (and
7714
    # we can remove exports also for a removed instance)
7715
    self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7716

    
7717
  def CheckPrereq(self):
7718
    """Check prerequisites.
7719
    """
7720
    pass
7721

    
7722
  def Exec(self, feedback_fn):
7723
    """Remove any export.
7724

7725
    """
7726
    instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
7727
    # If the instance was not found we'll try with the name that was passed in.
7728
    # This will only work if it was an FQDN, though.
7729
    fqdn_warn = False
7730
    if not instance_name:
7731
      fqdn_warn = True
7732
      instance_name = self.op.instance_name
7733

    
7734
    locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7735
    exportlist = self.rpc.call_export_list(locked_nodes)
7736
    found = False
7737
    for node in exportlist:
7738
      msg = exportlist[node].fail_msg
7739
      if msg:
7740
        self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
7741
        continue
7742
      if instance_name in exportlist[node].payload:
7743
        found = True
7744
        result = self.rpc.call_export_remove(node, instance_name)
7745
        msg = result.fail_msg
7746
        if msg:
7747
          logging.error("Could not remove export for instance %s"
7748
                        " on node %s: %s", instance_name, node, msg)
7749

    
7750
    if fqdn_warn and not found:
7751
      feedback_fn("Export not found. If trying to remove an export belonging"
7752
                  " to a deleted instance please use its Fully Qualified"
7753
                  " Domain Name.")
7754

    
7755

    
7756
class TagsLU(NoHooksLU):
7757
  """Generic tags LU.
7758

7759
  This is an abstract class which is the parent of all the other tags LUs.
7760

7761
  """
7762

    
7763
  def ExpandNames(self):
7764
    self.needed_locks = {}
7765
    if self.op.kind == constants.TAG_NODE:
7766
      name = self.cfg.ExpandNodeName(self.op.name)
7767
      if name is None:
7768
        raise errors.OpPrereqError("Invalid node name (%s)" %
7769
                                   (self.op.name,))
7770
      self.op.name = name
7771
      self.needed_locks[locking.LEVEL_NODE] = name
7772
    elif self.op.kind == constants.TAG_INSTANCE:
7773
      name = self.cfg.ExpandInstanceName(self.op.name)
7774
      if name is None:
7775
        raise errors.OpPrereqError("Invalid instance name (%s)" %
7776
                                   (self.op.name,))
7777
      self.op.name = name
7778
      self.needed_locks[locking.LEVEL_INSTANCE] = name
7779

    
7780
  def CheckPrereq(self):
7781
    """Check prerequisites.
7782

7783
    """
7784
    if self.op.kind == constants.TAG_CLUSTER:
7785
      self.target = self.cfg.GetClusterInfo()
7786
    elif self.op.kind == constants.TAG_NODE:
7787
      self.target = self.cfg.GetNodeInfo(self.op.name)
7788
    elif self.op.kind == constants.TAG_INSTANCE:
7789
      self.target = self.cfg.GetInstanceInfo(self.op.name)
7790
    else:
7791
      raise errors.OpPrereqError("Wrong tag type requested (%s)" %
7792
                                 str(self.op.kind))
7793

    
7794

    
7795
class LUGetTags(TagsLU):
7796
  """Returns the tags of a given object.
7797

7798
  """
7799
  _OP_REQP = ["kind", "name"]
7800
  REQ_BGL = False
7801

    
7802
  def Exec(self, feedback_fn):
7803
    """Returns the tag list.
7804

7805
    """
7806
    return list(self.target.GetTags())
7807

    
7808

    
7809
class LUSearchTags(NoHooksLU):
7810
  """Searches the tags for a given pattern.
7811

7812
  """
7813
  _OP_REQP = ["pattern"]
7814
  REQ_BGL = False
7815

    
7816
  def ExpandNames(self):
7817
    self.needed_locks = {}
7818

    
7819
  def CheckPrereq(self):
7820
    """Check prerequisites.
7821

7822
    This checks the pattern passed for validity by compiling it.
7823

7824
    """
7825
    try:
7826
      self.re = re.compile(self.op.pattern)
7827
    except re.error, err:
7828
      raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
7829
                                 (self.op.pattern, err))
7830

    
7831
  def Exec(self, feedback_fn):
7832
    """Returns the tag list.
7833

7834
    """
7835
    cfg = self.cfg
7836
    tgts = [("/cluster", cfg.GetClusterInfo())]
7837
    ilist = cfg.GetAllInstancesInfo().values()
7838
    tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
7839
    nlist = cfg.GetAllNodesInfo().values()
7840
    tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
7841
    results = []
7842
    for path, target in tgts:
7843
      for tag in target.GetTags():
7844
        if self.re.search(tag):
7845
          results.append((path, tag))
7846
    return results
7847

    
7848

    
7849
class LUAddTags(TagsLU):
7850
  """Sets a tag on a given object.
7851

7852
  """
7853
  _OP_REQP = ["kind", "name", "tags"]
7854
  REQ_BGL = False
7855

    
7856
  def CheckPrereq(self):
7857
    """Check prerequisites.
7858

7859
    This checks the type and length of the tag name and value.
7860

7861
    """
7862
    TagsLU.CheckPrereq(self)
7863
    for tag in self.op.tags:
7864
      objects.TaggableObject.ValidateTag(tag)
7865

    
7866
  def Exec(self, feedback_fn):
7867
    """Sets the tag.
7868

7869
    """
7870
    try:
7871
      for tag in self.op.tags:
7872
        self.target.AddTag(tag)
7873
    except errors.TagError, err:
7874
      raise errors.OpExecError("Error while setting tag: %s" % str(err))
7875
    try:
7876
      self.cfg.Update(self.target)
7877
    except errors.ConfigurationError:
7878
      raise errors.OpRetryError("There has been a modification to the"
7879
                                " config file and the operation has been"
7880
                                " aborted. Please retry.")
7881

    
7882

    
7883
class LUDelTags(TagsLU):
7884
  """Delete a list of tags from a given object.
7885

7886
  """
7887
  _OP_REQP = ["kind", "name", "tags"]
7888
  REQ_BGL = False
7889

    
7890
  def CheckPrereq(self):
7891
    """Check prerequisites.
7892

7893
    This checks that we have the given tag.
7894

7895
    """
7896
    TagsLU.CheckPrereq(self)
7897
    for tag in self.op.tags:
7898
      objects.TaggableObject.ValidateTag(tag)
7899
    del_tags = frozenset(self.op.tags)
7900
    cur_tags = self.target.GetTags()
7901
    if not del_tags <= cur_tags:
7902
      diff_tags = del_tags - cur_tags
7903
      diff_names = ["'%s'" % tag for tag in diff_tags]
7904
      diff_names.sort()
7905
      raise errors.OpPrereqError("Tag(s) %s not found" %
7906
                                 (",".join(diff_names)))
7907

    
7908
  def Exec(self, feedback_fn):
7909
    """Remove the tag from the object.
7910

7911
    """
7912
    for tag in self.op.tags:
7913
      self.target.RemoveTag(tag)
7914
    try:
7915
      self.cfg.Update(self.target)
7916
    except errors.ConfigurationError:
7917
      raise errors.OpRetryError("There has been a modification to the"
7918
                                " config file and the operation has been"
7919
                                " aborted. Please retry.")
7920

    
7921

    
7922
class LUTestDelay(NoHooksLU):
7923
  """Sleep for a specified amount of time.
7924

7925
  This LU sleeps on the master and/or nodes for a specified amount of
7926
  time.
7927

7928
  """
7929
  _OP_REQP = ["duration", "on_master", "on_nodes"]
7930
  REQ_BGL = False
7931

    
7932
  def ExpandNames(self):
7933
    """Expand names and set required locks.
7934

7935
    This expands the node list, if any.
7936

7937
    """
7938
    self.needed_locks = {}
7939
    if self.op.on_nodes:
7940
      # _GetWantedNodes can be used here, but is not always appropriate to use
7941
      # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
7942
      # more information.
7943
      self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
7944
      self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
7945

    
7946
  def CheckPrereq(self):
7947
    """Check prerequisites.
7948

7949
    """
7950

    
7951
  def Exec(self, feedback_fn):
7952
    """Do the actual sleep.
7953

7954
    """
7955
    if self.op.on_master:
7956
      if not utils.TestDelay(self.op.duration):
7957
        raise errors.OpExecError("Error during master delay test")
7958
    if self.op.on_nodes:
7959
      result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
7960
      for node, node_result in result.items():
7961
        node_result.Raise("Failure during rpc call to node %s" % node)
7962

    
7963

    
7964
class IAllocator(object):
7965
  """IAllocator framework.
7966

7967
  An IAllocator instance has three sets of attributes:
7968
    - cfg that is needed to query the cluster
7969
    - input data (all members of the _KEYS class attribute are required)
7970
    - four buffer attributes (in|out_data|text), that represent the
7971
      input (to the external script) in text and data structure format,
7972
      and the output from it, again in two formats
7973
    - the result variables from the script (success, info, nodes) for
7974
      easy usage
7975

7976
  """
7977
  _ALLO_KEYS = [
7978
    "mem_size", "disks", "disk_template",
7979
    "os", "tags", "nics", "vcpus", "hypervisor",
7980
    ]
7981
  _RELO_KEYS = [
7982
    "relocate_from",
7983
    ]
7984

    
7985
  def __init__(self, cfg, rpc, mode, name, **kwargs):
7986
    self.cfg = cfg
7987
    self.rpc = rpc
7988
    # init buffer variables
7989
    self.in_text = self.out_text = self.in_data = self.out_data = None
7990
    # init all input fields so that pylint is happy
7991
    self.mode = mode
7992
    self.name = name
7993
    self.mem_size = self.disks = self.disk_template = None
7994
    self.os = self.tags = self.nics = self.vcpus = None
7995
    self.hypervisor = None
7996
    self.relocate_from = None
7997
    # computed fields
7998
    self.required_nodes = None
7999
    # init result fields
8000
    self.success = self.info = self.nodes = None
8001
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8002
      keyset = self._ALLO_KEYS
8003
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8004
      keyset = self._RELO_KEYS
8005
    else:
8006
      raise errors.ProgrammerError("Unknown mode '%s' passed to the"
8007
                                   " IAllocator" % self.mode)
8008
    for key in kwargs:
8009
      if key not in keyset:
8010
        raise errors.ProgrammerError("Invalid input parameter '%s' to"
8011
                                     " IAllocator" % key)
8012
      setattr(self, key, kwargs[key])
8013
    for key in keyset:
8014
      if key not in kwargs:
8015
        raise errors.ProgrammerError("Missing input parameter '%s' to"
8016
                                     " IAllocator" % key)
8017
    self._BuildInputData()
8018

    
8019
  def _ComputeClusterData(self):
8020
    """Compute the generic allocator input data.
8021

8022
    This is the data that is independent of the actual operation.
8023

8024
    """
8025
    cfg = self.cfg
8026
    cluster_info = cfg.GetClusterInfo()
8027
    # cluster data
8028
    data = {
8029
      "version": constants.IALLOCATOR_VERSION,
8030
      "cluster_name": cfg.GetClusterName(),
8031
      "cluster_tags": list(cluster_info.GetTags()),
8032
      "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
8033
      # we don't have job IDs
8034
      }
8035
    iinfo = cfg.GetAllInstancesInfo().values()
8036
    i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
8037

    
8038
    # node data
8039
    node_results = {}
8040
    node_list = cfg.GetNodeList()
8041

    
8042
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8043
      hypervisor_name = self.hypervisor
8044
    elif self.mode == constants.IALLOCATOR_MODE_RELOC:
8045
      hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
8046

    
8047
    node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
8048
                                        hypervisor_name)
8049
    node_iinfo = \
8050
      self.rpc.call_all_instances_info(node_list,
8051
                                       cluster_info.enabled_hypervisors)
8052
    for nname, nresult in node_data.items():
8053
      # first fill in static (config-based) values
8054
      ninfo = cfg.GetNodeInfo(nname)
8055
      pnr = {
8056
        "tags": list(ninfo.GetTags()),
8057
        "primary_ip": ninfo.primary_ip,
8058
        "secondary_ip": ninfo.secondary_ip,
8059
        "offline": ninfo.offline,
8060
        "drained": ninfo.drained,
8061
        "master_candidate": ninfo.master_candidate,
8062
        }
8063

    
8064
      if not (ninfo.offline or ninfo.drained):
8065
        nresult.Raise("Can't get data for node %s" % nname)
8066
        node_iinfo[nname].Raise("Can't get node instance info from node %s" %
8067
                                nname)
8068
        remote_info = nresult.payload
8069

    
8070
        for attr in ['memory_total', 'memory_free', 'memory_dom0',
8071
                     'vg_size', 'vg_free', 'cpu_total']:
8072
          if attr not in remote_info:
8073
            raise errors.OpExecError("Node '%s' didn't return attribute"
8074
                                     " '%s'" % (nname, attr))
8075
          if not isinstance(remote_info[attr], int):
8076
            raise errors.OpExecError("Node '%s' returned invalid value"
8077
                                     " for '%s': %s" %
8078
                                     (nname, attr, remote_info[attr]))
8079
        # compute memory used by primary instances
8080
        i_p_mem = i_p_up_mem = 0
8081
        for iinfo, beinfo in i_list:
8082
          if iinfo.primary_node == nname:
8083
            i_p_mem += beinfo[constants.BE_MEMORY]
8084
            if iinfo.name not in node_iinfo[nname].payload:
8085
              i_used_mem = 0
8086
            else:
8087
              i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
8088
            i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
8089
            remote_info['memory_free'] -= max(0, i_mem_diff)
8090

    
8091
            if iinfo.admin_up:
8092
              i_p_up_mem += beinfo[constants.BE_MEMORY]
8093

    
8094
        # compute memory used by instances
8095
        pnr_dyn = {
8096
          "total_memory": remote_info['memory_total'],
8097
          "reserved_memory": remote_info['memory_dom0'],
8098
          "free_memory": remote_info['memory_free'],
8099
          "total_disk": remote_info['vg_size'],
8100
          "free_disk": remote_info['vg_free'],
8101
          "total_cpus": remote_info['cpu_total'],
8102
          "i_pri_memory": i_p_mem,
8103
          "i_pri_up_memory": i_p_up_mem,
8104
          }
8105
        pnr.update(pnr_dyn)
8106

    
8107
      node_results[nname] = pnr
8108
    data["nodes"] = node_results
8109

    
8110
    # instance data
8111
    instance_data = {}
8112
    for iinfo, beinfo in i_list:
8113
      nic_data = []
8114
      for nic in iinfo.nics:
8115
        filled_params = objects.FillDict(
8116
            cluster_info.nicparams[constants.PP_DEFAULT],
8117
            nic.nicparams)
8118
        nic_dict = {"mac": nic.mac,
8119
                    "ip": nic.ip,
8120
                    "mode": filled_params[constants.NIC_MODE],
8121
                    "link": filled_params[constants.NIC_LINK],
8122
                   }
8123
        if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
8124
          nic_dict["bridge"] = filled_params[constants.NIC_LINK]
8125
        nic_data.append(nic_dict)
8126
      pir = {
8127
        "tags": list(iinfo.GetTags()),
8128
        "admin_up": iinfo.admin_up,
8129
        "vcpus": beinfo[constants.BE_VCPUS],
8130
        "memory": beinfo[constants.BE_MEMORY],
8131
        "os": iinfo.os,
8132
        "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
8133
        "nics": nic_data,
8134
        "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
8135
        "disk_template": iinfo.disk_template,
8136
        "hypervisor": iinfo.hypervisor,
8137
        }
8138
      pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
8139
                                                 pir["disks"])
8140
      instance_data[iinfo.name] = pir
8141

    
8142
    data["instances"] = instance_data
8143

    
8144
    self.in_data = data
8145

    
8146
  def _AddNewInstance(self):
8147
    """Add new instance data to allocator structure.
8148

8149
    This in combination with _AllocatorGetClusterData will create the
8150
    correct structure needed as input for the allocator.
8151

8152
    The checks for the completeness of the opcode must have already been
8153
    done.
8154

8155
    """
8156
    data = self.in_data
8157

    
8158
    disk_space = _ComputeDiskSize(self.disk_template, self.disks)
8159

    
8160
    if self.disk_template in constants.DTS_NET_MIRROR:
8161
      self.required_nodes = 2
8162
    else:
8163
      self.required_nodes = 1
8164
    request = {
8165
      "type": "allocate",
8166
      "name": self.name,
8167
      "disk_template": self.disk_template,
8168
      "tags": self.tags,
8169
      "os": self.os,
8170
      "vcpus": self.vcpus,
8171
      "memory": self.mem_size,
8172
      "disks": self.disks,
8173
      "disk_space_total": disk_space,
8174
      "nics": self.nics,
8175
      "required_nodes": self.required_nodes,
8176
      }
8177
    data["request"] = request
8178

    
8179
  def _AddRelocateInstance(self):
8180
    """Add relocate instance data to allocator structure.
8181

8182
    This in combination with _IAllocatorGetClusterData will create the
8183
    correct structure needed as input for the allocator.
8184

8185
    The checks for the completeness of the opcode must have already been
8186
    done.
8187

8188
    """
8189
    instance = self.cfg.GetInstanceInfo(self.name)
8190
    if instance is None:
8191
      raise errors.ProgrammerError("Unknown instance '%s' passed to"
8192
                                   " IAllocator" % self.name)
8193

    
8194
    if instance.disk_template not in constants.DTS_NET_MIRROR:
8195
      raise errors.OpPrereqError("Can't relocate non-mirrored instances")
8196

    
8197
    if len(instance.secondary_nodes) != 1:
8198
      raise errors.OpPrereqError("Instance has not exactly one secondary node")
8199

    
8200
    self.required_nodes = 1
8201
    disk_sizes = [{'size': disk.size} for disk in instance.disks]
8202
    disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
8203

    
8204
    request = {
8205
      "type": "relocate",
8206
      "name": self.name,
8207
      "disk_space_total": disk_space,
8208
      "required_nodes": self.required_nodes,
8209
      "relocate_from": self.relocate_from,
8210
      }
8211
    self.in_data["request"] = request
8212

    
8213
  def _BuildInputData(self):
8214
    """Build input data structures.
8215

8216
    """
8217
    self._ComputeClusterData()
8218

    
8219
    if self.mode == constants.IALLOCATOR_MODE_ALLOC:
8220
      self._AddNewInstance()
8221
    else:
8222
      self._AddRelocateInstance()
8223

    
8224
    self.in_text = serializer.Dump(self.in_data)
8225

    
8226
  def Run(self, name, validate=True, call_fn=None):
8227
    """Run an instance allocator and return the results.
8228

8229
    """
8230
    if call_fn is None:
8231
      call_fn = self.rpc.call_iallocator_runner
8232

    
8233
    result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
8234
    result.Raise("Failure while running the iallocator script")
8235

    
8236
    self.out_text = result.payload
8237
    if validate:
8238
      self._ValidateResult()
8239

    
8240
  def _ValidateResult(self):
8241
    """Process the allocator results.
8242

8243
    This will process and if successful save the result in
8244
    self.out_data and the other parameters.
8245

8246
    """
8247
    try:
8248
      rdict = serializer.Load(self.out_text)
8249
    except Exception, err:
8250
      raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
8251

    
8252
    if not isinstance(rdict, dict):
8253
      raise errors.OpExecError("Can't parse iallocator results: not a dict")
8254

    
8255
    for key in "success", "info", "nodes":
8256
      if key not in rdict:
8257
        raise errors.OpExecError("Can't parse iallocator results:"
8258
                                 " missing key '%s'" % key)
8259
      setattr(self, key, rdict[key])
8260

    
8261
    if not isinstance(rdict["nodes"], list):
8262
      raise errors.OpExecError("Can't parse iallocator results: 'nodes' key"
8263
                               " is not a list")
8264
    self.out_data = rdict
8265

    
8266

    
8267
class LUTestAllocator(NoHooksLU):
8268
  """Run allocator tests.
8269

8270
  This LU runs the allocator tests
8271

8272
  """
8273
  _OP_REQP = ["direction", "mode", "name"]
8274

    
8275
  def CheckPrereq(self):
8276
    """Check prerequisites.
8277

8278
    This checks the opcode parameters depending on the director and mode test.
8279

8280
    """
8281
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8282
      for attr in ["name", "mem_size", "disks", "disk_template",
8283
                   "os", "tags", "nics", "vcpus"]:
8284
        if not hasattr(self.op, attr):
8285
          raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
8286
                                     attr)
8287
      iname = self.cfg.ExpandInstanceName(self.op.name)
8288
      if iname is not None:
8289
        raise errors.OpPrereqError("Instance '%s' already in the cluster" %
8290
                                   iname)
8291
      if not isinstance(self.op.nics, list):
8292
        raise errors.OpPrereqError("Invalid parameter 'nics'")
8293
      for row in self.op.nics:
8294
        if (not isinstance(row, dict) or
8295
            "mac" not in row or
8296
            "ip" not in row or
8297
            "bridge" not in row):
8298
          raise errors.OpPrereqError("Invalid contents of the"
8299
                                     " 'nics' parameter")
8300
      if not isinstance(self.op.disks, list):
8301
        raise errors.OpPrereqError("Invalid parameter 'disks'")
8302
      for row in self.op.disks:
8303
        if (not isinstance(row, dict) or
8304
            "size" not in row or
8305
            not isinstance(row["size"], int) or
8306
            "mode" not in row or
8307
            row["mode"] not in ['r', 'w']):
8308
          raise errors.OpPrereqError("Invalid contents of the"
8309
                                     " 'disks' parameter")
8310
      if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
8311
        self.op.hypervisor = self.cfg.GetHypervisorType()
8312
    elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
8313
      if not hasattr(self.op, "name"):
8314
        raise errors.OpPrereqError("Missing attribute 'name' on opcode input")
8315
      fname = self.cfg.ExpandInstanceName(self.op.name)
8316
      if fname is None:
8317
        raise errors.OpPrereqError("Instance '%s' not found for relocation" %
8318
                                   self.op.name)
8319
      self.op.name = fname
8320
      self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
8321
    else:
8322
      raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
8323
                                 self.op.mode)
8324

    
8325
    if self.op.direction == constants.IALLOCATOR_DIR_OUT:
8326
      if not hasattr(self.op, "allocator") or self.op.allocator is None:
8327
        raise errors.OpPrereqError("Missing allocator name")
8328
    elif self.op.direction != constants.IALLOCATOR_DIR_IN:
8329
      raise errors.OpPrereqError("Wrong allocator test '%s'" %
8330
                                 self.op.direction)
8331

    
8332
  def Exec(self, feedback_fn):
8333
    """Run the allocator test.
8334

8335
    """
8336
    if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
8337
      ial = IAllocator(self.cfg, self.rpc,
8338
                       mode=self.op.mode,
8339
                       name=self.op.name,
8340
                       mem_size=self.op.mem_size,
8341
                       disks=self.op.disks,
8342
                       disk_template=self.op.disk_template,
8343
                       os=self.op.os,
8344
                       tags=self.op.tags,
8345
                       nics=self.op.nics,
8346
                       vcpus=self.op.vcpus,
8347
                       hypervisor=self.op.hypervisor,
8348
                       )
8349
    else:
8350
      ial = IAllocator(self.cfg, self.rpc,
8351
                       mode=self.op.mode,
8352
                       name=self.op.name,
8353
                       relocate_from=list(self.relocate_from),
8354
                       )
8355

    
8356
    if self.op.direction == constants.IALLOCATOR_DIR_IN:
8357
      result = ial.in_text
8358
    else:
8359
      ial.Run(self.op.allocator, validate=False)
8360
      result = ial.out_text
8361
    return result